import pandas as pd
data = {'A': [1,2],
'B':[[1,1,1,2,2,4,4,4,4],[5, 4, 8, 1, 1, 1, 3, 2, 4, 2, 2, 2, 1, 1, 1]]}
df = pd.DataFrame(data)
| A | B |
|---|---|
| 1 | [1, 1, 1, 2, 2, 4, 4, 4, 4] |
| 2 | [5, 4, 8, 1, 1, 1, 3, 2, 4, 2, 2, 2, 1, 1, 1] |
def top_frequent(a):
import numpy
k = {}
for j in a:
if j in k:
k[j] +=1
else:
k[j] =1
occ = []
for key, val in k.items():
occ.append(val)
Z = numpy.percentile(occ, 75, interpolation='higher')
print(Z)
bucket = [[] for l in range(len(a)+1)]
for key, val in k.items():
if val >= Z :
if val != 1 :
bucket[val].append(key)
res = []
for i in reversed(range(len(bucket))):
if bucket[i]:
res.extend(bucket[i])
return res
df['C'] = df.apply(top_frequent(df['B']))
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_13728/2052560572.py in <module>
28 return res
29
---> 30 df['C'] = df.apply(top_frequent(df['B']))
~\AppData\Local\Temp/ipykernel_13728/2052560572.py in top_frequent(ids)
4 k = {}
5 for j in ids:
----> 6 if j in k:
7 k[j] +=1
8 else:
TypeError: unhashable type: 'list'
When I apply the function on just one row it works fine But when I apply it for all lines I get this error : TypeError: unhashable type: 'list'
df.B.apply(top_frequent)does this do what you want?