Here is my code:
from download1 import download
import threading,lxml.html
def getInfo(initial,ending):
for Number in range(initial,ending):
Fields = ['country', 'area', 'population', 'iso', 'capital', 'continent', 'tld', 'currency_code',
'currency_name', 'phone',
'postal_code_format', 'postal_code_regex', 'languages', 'neighbours']
url = 'http://example.webscraping.com/places/default/view/%d'%Number
html=download(url)
tree = lxml.html.fromstring(html)
results=[]
for field in Fields:
x=tree.cssselect('table > tr#places_%s__row >td.w2p_fw' % field)[0].text_content()
results.append(x)#should i start writing here?
downloadthreads=[]
for i in range(1,252,63): #create 4 threads
downloadThread=threading.Thread(target=getInfo,args=(i,i+62))
downloadthreads.append(downloadThread)
downloadThread.start()
for threadobj in downloadthreads:
threadobj.join() #end of each thread
print "Done"
So results will have the values of Fields ,I need to write the data with Fields as top row (only once) then the values in results into CSV file.
I am not sure i can open the file in the function because threads will open the file multiple times simultaneously.
Note: i know threading isn't desirable when crawling but i am just testing