If you define
code_notnull = pd.notnull(df['code'])
Then you can identify the start of each new group using
# True when the row is not null, but the prior row is null
mask = code_notnull & ~(code_notnull.shift(1, fill_value=False))
0 True
1 False
2 False
3 False
4 True
...
You can then define group numbers using
group_num = mask.cumsum()
0 1
1 1
2 1
3 1
4 2
...
and then group by group_num:
import numpy as np
import pandas as pd
nan = np.nan
df = pd.DataFrame({'code': [1111.0, 1111.0, nan, nan, 2222.0, 2222.0, nan, nan, nan, 3333.0, nan,
1111.0, 1111.0, nan, nan, nan, 5555.0], 'item01': ['a', 'b', 'c', 'd',
'b', 'b', 'c', 'd', 'e', 'd', 'b', 'c', 'b', 'c', 'd',
'e', 'a'], 'item02': [123.0, nan, nan, nan, 123.0, nan, nan, nan, nan,
123.0, nan, 123.0, nan, nan, nan, nan, nan], 'item03': [234.0, nan, nan, nan,
234.0, nan, nan, nan, nan, 234.0, nan, 234.0, nan, nan, nan, nan, nan],
'item04': [345.0, nan, nan, nan, 345.0, nan, nan, nan, nan, 345.0, nan, 345.0,
nan, nan, nan, nan, nan], 'item05': [440.0, nan, nan, nan, 456.0, nan, nan,
nan, nan, 456.0, nan, 456.0, nan, nan, nan, nan, nan]})
code_notnull = pd.notnull(df['code'])
mask = code_notnull & ~(code_notnull.shift(1, fill_value=False))
group_num = mask.cumsum()
# Forward-fill all NaNs.
df = df.ffill()
grouped = df.groupby(group_num)
result = grouped.first()
result['item01'] = grouped['item01'].sum()
print(result)
yields
code item01 item02 item03 item04 item05
code
1 1111.0 abcd 123.0 234.0 345.0 440.0
2 2222.0 bbcde 123.0 234.0 345.0 456.0
3 3333.0 db 123.0 234.0 345.0 456.0
4 1111.0 cbcde 123.0 234.0 345.0 456.0
5 5555.0 a 123.0 234.0 345.0 456.0
Note that above I assumed your strings in item01 do not begin and end with single quotation marks.
If they do, you could remove them with
df['item01'] = df['item01'].str[1:-1]
and then proceed as above.
import numpy as np
import pandas as pd
nan = np.nan
df = pd.DataFrame({'code': [1111.0, 1111.0, nan, nan, 2222.0, 2222.0, nan, nan, nan, 3333.0, nan,
1111.0, 1111.0, nan, nan, nan, 5555.0], 'item01': ["'a'", "'b'", "'c'", "'d'",
"'b'", "'b'", "'c'", "'d'", "'e'", "'d'", "'b'", "'c'", "'b'", "'c'", "'d'",
"'e'", "'a'"], 'item02': [123.0, nan, nan, nan, 123.0, nan, nan, nan, nan,
123.0, nan, 123.0, nan, nan, nan, nan, nan], 'item03': [234.0, nan, nan, nan,
234.0, nan, nan, nan, nan, 234.0, nan, 234.0, nan, nan, nan, nan, nan],
'item04': [345.0, nan, nan, nan, 345.0, nan, nan, nan, nan, 345.0, nan, 345.0,
nan, nan, nan, nan, nan], 'item05': [440.0, nan, nan, nan, 456.0, nan, nan,
nan, nan, 456.0, nan, 456.0, nan, nan, nan, nan, nan]})
df['item01'] = df['item01'].str[1:-1]
print(df)
yields (single quotes in df['item0'] have been removed)
code item01 item02 item03 item04 item05
0 1111.0 a 123.0 234.0 345.0 440.0
1 1111.0 b NaN NaN NaN NaN
2 NaN c NaN NaN NaN NaN
3 NaN d NaN NaN NaN NaN
...
If you want to add single quotes back to the final result, you could use:
result['item01'] = "'" + result['item01'] + "'"