I am trying to use multiprocessing library to speed up CSV reading from files. I've done so using Pool and now I'm trying to do it with Process(). However when running the code, it's giving me the following error:
AttributeError: 'tuple' object has no attribute 'join'
Can someone tell me what's wrong? I don't understand the error.
import glob
import pandas as pd
from multiprocessing import Process
import matplotlib.pyplot as plt
import os
location = "/home/data/csv/"
uber_data = []
def read_csv(filename):
return uber_data.append(pd.read_csv(filename))
def data_wrangling(uber_data):
uber_data['Date/Time'] = pd.to_datetime(uber_data['Date/Time'], format="%m/%d/%Y %H:%M:%S")
uber_data['Dia Setmana'] = uber_data['Date/Time'].dt.weekday_name
uber_data['Num dia'] = uber_data['Date/Time'].dt.dayofweek
return uber_data
def plotting(uber_data):
weekdays = uber_data.pivot_table(index=['Num dia','Dia Setmana'], values='Base', aggfunc='count')
weekdays.plot(kind='bar', figsize=(8,6))
plt.ylabel('Total Journeys')
plt.title('Journey on Week Day')
def main():
processes = []
files = list(glob.glob(os.path.join(location,'*.csv*')))
for i in files:
p = Process(target=read_csv, args=[i])
processes.append(p)
p.start()
for process in enumerate(processes):
process.join()
#combined_df = pd.concat(df_list, ignore_index=True)
#dades_mod = data_wrangling(combined_df)
#plotting(dades_mod)
main()
Thank you.