Another way using np.where + np.diff to identify the split locations:
out = [ar.sum() for ar in np.split(a2, np.where(np.diff(a2.astype(int), prepend=0)==1)[0])[1:]]
np.split is slow, so we can replace it with zip in a list comp and walk the over an array of indices. Also, instead of sum, we could index the array and use len:
idx = np.where(np.diff(a2.astype(int), prepend=0)==1)[0]
out = [len(a2[i:j][a2[i:j]]) for i,j in zip(idx, idx[1:])] + [len(a2[idx[-1]:][a2[idx[-1]:]])]
Output:
[1, 3]
Performance comparison:
import perfplot
import numpy as np
import itertools
import random
def diff_where_split_sum(a2):
return [ar.sum() for ar in np.split(a2, np.where(np.diff(a2.astype(int), prepend=0)==1)[0])[1:]]
def flatnonzero_split_if_sum(a2):
return [l.sum() for l in np.split(a2, np.flatnonzero(~a2)) if l.sum() > 0]
def groupby_if_sum(a2):
return [sum( 1 for _ in group ) for key, group in itertools.groupby( a2 ) if key]
def diff_where_slice_index_len(a2):
idx = np.where(np.diff(a2.astype(int), prepend=0)==1)[0]
return [len(a2[i:j][a2[i:j]]) for i,j in zip(idx, idx[1:])] + [len(a2[idx[-1]:][a2[idx[-1]:]])]
perfplot.show(
setup=lambda n: np.array(random.choices([True, False], k=10) * n),
kernels=[
lambda arr: diff_where_split_sum(arr),
lambda arr: flatnonzero_split_if_sum(arr),
lambda arr: groupby_if_sum(arr),
lambda arr: diff_where_slice_len(arr)
],
labels=['diff_where_split_sum', 'flatnonzero_split_if_sum',
'groupby_if_sum', 'diff_where_slice_index_len'],
n_range=[2 ** k for k in range(20)],
equality_check=np.allclose,
xlabel='~len(arr)'
)
