in this part of scraping code , I fetch alot of URLs from stored URLs in (url.xml) file and it is take so long to finish, how to implement (multiprocessing pool)
any simple code to fix this problem ? Thanks
from bs4 import BeautifulSoup as soup
import requests
from multiprocessing import Pool
p = Pool(10) # “10” means that 10 URLs will be processed at the same time
p.map
page_url = "url.xml"
out_filename = "prices.csv"
headers = "availableOffers,otherpricess,currentprice \n"
with open(out_filename, "w") as fw:
fw.write(headers)
with open("url.xml", "r") as fr:
for url in map(lambda x: x.strip(), fr.readlines()):
print(url)
response = requests.get(url)
page_soup = soup(response.text, "html.parser")
availableOffers = page_soup.find("input", {"id": "availableOffers"})
otherpricess = page_soup.find("span", {"class": "price"})
currentprice = page_soup.find("div", {"class": "is"})
fw.write(availableOffers + ", " + otherpricess + ", " + currentprice + "\n")
p.terminate()
p.join()
p.terminate() p.join()but still not work