You can use groupby with custom function f, which use shift and combine_first:
def f(x):
#print x
x['Secondname'] = x['Name'].shift(1).combine_first(x['Name'].shift(-1))
return x
print df.groupby('ID').apply(f)
ID Name Rate Secondname
0 1 A 65.5 NaN
1 2 B 67.3 C
2 2 C 78.8 B
3 3 D 65.0 NaN
4 4 E 45.3 NaN
5 5 F 52.0 G
6 5 G 66.0 F
7 6 H 34.0 NaN
8 7 I 2.0 NaN
You can avoid groupby and find duplicated, then fill helper columns by loc with column Name, then shift and combine_first and last drop helper columns:
print df.duplicated('ID', keep='first')
0 False
1 False
2 True
3 False
4 False
5 False
6 True
7 False
8 False
dtype: bool
print df.duplicated('ID', keep='last')
0 False
1 True
2 False
3 False
4 False
5 True
6 False
7 False
8 False
dtype: bool
df.loc[ df.duplicated('ID', keep='first'), 'first'] = df['Name']
df.loc[ df.duplicated('ID', keep='last'), 'last'] = df['Name']
print df
ID Name Rate first last
0 1 A 65.5 NaN NaN
1 2 B 67.3 NaN B
2 2 C 78.8 C NaN
3 3 D 65.0 NaN NaN
4 4 E 45.3 NaN NaN
5 5 F 52.0 NaN F
6 5 G 66.0 G NaN
7 6 H 34.0 NaN NaN
8 7 I 2.0 NaN NaN
df['SecondName'] = df['first'].shift(-1).combine_first(df['last'].shift(1))
df = df.drop(['first', 'l1'], axis=1)
print df
ID Name Rate SecondName
0 1 A 65.5 NaN
1 2 B 67.3 C
2 2 C 78.8 B
3 3 D 65.0 NaN
4 4 E 45.3 NaN
5 5 F 52.0 G
6 5 G 66.0 F
7 6 H 34.0 NaN
8 7 I 2.0 NaN
TESTING: (in time of testing solution of Roman Kh has wrong output)
len(df) = 9:
In [154]: %timeit jez(df1)
100 loops, best of 3: 15 ms per loop
In [155]: %timeit jez2(df2)
100 loops, best of 3: 3.45 ms per loop
In [156]: %timeit rom(df)
100 loops, best of 3: 3.55 ms per loop
len(df) = 90k:
In [158]: %timeit jez(df1)
10 loops, best of 3: 57.1 ms per loop
In [159]: %timeit jez2(df2)
10 loops, best of 3: 36.4 ms per loop
In [160]: %timeit rom(df)
10 loops, best of 3: 40.4 ms per loop
import pandas as pd
mydict = {'ID':[1,2,2,3,4,5,5,6,7],
'Name':['A','B','C','D','E','F','G','H','I'],
'Rate':[65.5,67.3,78.8,65,45.3,52,66,34,2]}
df=pd.DataFrame(mydict)
print df
df = pd.concat([df]*10000).reset_index(drop=True)
df1 = df.copy()
df2 = df.copy()
def jez(df):
def f(x):
#print x
x['Secondname'] = x['Name'].shift(1).combine_first(x['Name'].shift(-1))
return x
return df.groupby('ID').apply(f)
def jez2(df):
#print df.duplicated('ID', keep='first')
#print df.duplicated('ID', keep='last')
df.loc[ df.duplicated('ID', keep='first'), 'first'] = df['Name']
df.loc[ df.duplicated('ID', keep='last'), 'last'] = df['Name']
#print df
df['SecondName'] = df['first'].shift(-1).combine_first(df['last'].shift(1))
df = df.drop(['first', 'last'], axis=1)
return df
def rom(df):
# cpIDs = True if the next row has the same ID
df['cpIDs'] = df['ID'][:-1] == df['ID'][1:]
# fill in the last row (get rid of NaN)
df.iloc[-1,df.columns.get_loc('cpIDs')] = False
# ShiftName == Name of the next row
df['ShiftName'] = df['Name'].shift(-1)
# fill in SecondName
df.loc[df['cpIDs'], 'SecondName'] = df.loc[df['cpIDs'], 'ShiftName']
# remove columns
del df['cpIDs']
del df['ShiftName']
return df
print jez(df1)
print jez2(df2)
print rom(df)
print jez(df1)
ID Name Rate Secondname
0 1 A 65.5 NaN
1 2 B 67.3 C
2 2 C 78.8 B
3 3 D 65.0 NaN
4 4 E 45.3 NaN
5 5 F 52.0 G
6 5 G 66.0 F
7 6 H 34.0 NaN
8 7 I 2.0 NaN
print jez2(df2)
ID Name Rate SecondName
0 1 A 65.5 NaN
1 2 B 67.3 C
2 2 C 78.8 B
3 3 D 65.0 NaN
4 4 E 45.3 NaN
5 5 F 52.0 G
6 5 G 66.0 F
7 6 H 34.0 NaN
8 7 I 2.0 NaN
print rom(df)
ID Name Rate SecondName
0 1 A 65.5 NaN
1 2 B 67.3 C
2 2 C 78.8 NaN
3 3 D 65.0 NaN
4 4 E 45.3 NaN
5 5 F 52.0 G
6 5 G 66.0 NaN
7 6 H 34.0 NaN
8 7 I 2.0 NaN
EDIT:
If there is more duplicated pairs with same names, use shift for creating first and last columns:
df.loc[ df['ID'] == df['ID'].shift(), 'first'] = df['Name']
df.loc[ df['ID'] == df['ID'].shift(-1), 'last'] = df['Name']