I have to loop through a list of over 4000 item and check their similarity with a recommendation algorithm in python.
The script takes a long time to run (10-11 Hours) and I wanted to incorporate multi-threading to improve speed but dont know how to do it exactly.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data=pd.read_csv('data.csv',index_col=0, encoding="ISO-8859-1")
# Get list of unique items
itemList=list(set(data["product_ref"].tolist()))
# Get count of customers
userCount=len(set(data["customer_id"].tolist()))
# Create an empty data frame to store item affinity scores for items.
itemAffinity= pd.DataFrame(columns=('item1', 'item2', 'score'))
def itemUsers(ind):
return data[data.product_ref==itemList[ind]]["customer_id"].tolist()
rowCount=0
for ind1 in range(len(itemList)):
item1Users = itemUsers(ind1)
pool = Pool()
pool.map(loop2, data_inputs)
for ind2 in range(ind1+1, len(itemList)):
print(ind1, ":", ind2)
item2Users = itemUsers(ind2)
commonUsers= len(set(item1Users).intersection(set(item2Users)))
score=commonUsers / userCount
itemAffinity.loc[rowCount] = [itemList[ind1],itemList[ind2],score]
rowCount +=1
itemUsersdoing? Can you give a small example input and expected output for this?