这是NumPy的方法-
def intervaled_cumsum(a, trigger_val=1, start_val = 0, invalid_specifier=-1): out = np.ones(a.size,dtype=int) idx = np.flatnonzero(a==trigger_val) if len(idx)==0: return np.full(a.size,invalid_specifier) else: out[idx[0]] = -idx[0] + 1 out[0] = start_val out[idx[1:]] = idx[:-1] - idx[1:] + 1 np.cumsum(out, out=out) out[:idx[0]] = invalid_specifier return out
很少有关于数组数据的示例来展示涉及触发器和起始值的各种场景的用法:
In [120]: aOut[120]: array([0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0])In [121]: p1 = intervaled_cumsum(a, trigger_val=1, start_val=0) ...: p2 = intervaled_cumsum(a, trigger_val=1, start_val=1) ...: p3 = intervaled_cumsum(a, trigger_val=0, start_val=0) ...: p4 = intervaled_cumsum(a, trigger_val=0, start_val=1) ...:In [122]: np.vstack(( a, p1, p2, p3, p4 ))Out[122]: array([[ 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0], [-1, 0, 0, 0, 1, 2, 0, 1, 2, 0, 0, 0, 0, 0, 1], [-1, 1, 1, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 1, 2], [ 0, 1, 2, 3, 0, 0, 1, 0, 0, 1, 2, 3, 4, 5, 0], [ 1, 2, 3, 4, 1, 1, 2, 1, 1, 2, 3, 4, 5, 6, 1]])
用它来解决我们的情况:
df['Last_Occurence'] = intervaled_cumsum(df.Value.values)
样本输出-
In [181]: dfOut[181]: Date Value Last_Occurence0 01/01/17 0 -11 01/02/17 0 -12 01/03/17 1 03 01/04/17 0 14 01/05/17 0 25 01/06/17 0 36 01/07/17 1 07 01/08/17 0 18 01/09/17 0 2
运行时测试
方法-
# @Scott Boston's solndef pandas_groupby(df): mask = df.Value.cumsum().replace(0,False).astype(bool) return df.assign(Last_Occurance=df.groupby(df.Value.astype(bool). cumsum()).cumcount().where(mask))# Proposed in this postdef numpy_based(df): df['Last_Occurence'] = intervaled_cumsum(df.Value.values)
时间-
In [33]: df = pd.Dataframe((np.random.rand(10000000)>0.7).astype(int), columns=[['Value']])In [34]: %timeit pandas_groupby(df)1 loops, best of 3: 1.06 s per loopIn [35]: %timeit numpy_based(df)10 loops, best of 3: 103 ms per loopIn [36]: df = pd.Dataframe((np.random.rand(100000000)>0.7).astype(int), columns=[['Value']])In [37]: %timeit pandas_groupby(df)1 loops, best of 3: 11.1 s per loopIn [38]: %timeit numpy_based(df)1 loops, best of 3: 1.03 s per loop



