I am trying to scrape some data from a website and save it on to csv file. When i get the scaraped data i have a huge space between each line. I want to be able to remove this unnecessary space. Below is my code
from bs4 import BeautifulSoup
import requests
import csv
#URL to be scraped
url_to_scrape = 'https://www.sainsburys.co.uk/shop/gb/groceries/meat-fish/CategoryDisplay?langId=44&storeId=10151&catalogId=10241&categoryId=310864&orderBy=FAVOURITES_ONLY%7CSEQUENCING%7CTOP_SELLERS&beginIndex=0&promotionId=&listId=&searchTerm=&hasPreviousOrder=&previousOrderId=&categoryFacetId1=&categoryFacetId2=&ImportedProductsCount=&ImportedStoreName=&ImportedSupermarket=&bundleId=&parent_category_rn=13343&top_category=13343&pageSize=120#langId=44&storeId=10151&catalogId=10241&categoryId=310864&parent_category_rn=13343&top_category=13343&pageSize=120&orderBy=FAVOURITES_ONLY%7CSEQUENCING%7CTOP_SELLERS&searchTerm=&beginIndex=0&hideFilters=true'
#Load html's plain data into a variable
plain_html_text = requests.get(url_to_scrape)
#parse the data
soup = BeautifulSoup(plain_html_text.text, "lxml")
#
# #Get the name of the class
csv_file = open('sainsburys.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Description','Price'])
for name_of in soup.find_all('li',class_='gridItem'):
name = name_of.h3.a.text
print(name)
try:
price = name_of.find('div', class_='product')
pricen = price.find('div', class_='addToTrolleytabBox').p.text
print(pricen)
csv_writer.writerow([name, pricen])
except:
print('Sold Out')
print()
csv_writer.writerow([name, pricen])
csv_file.close()
The results that i get is this:
J. James Chicken Goujons 270g
£1.25/unit
Sainsbury's Chicken Whole Bird (approx. 0.9-1.35kg)
£1.90/kg
Sainsbury's British Fresh Chicken Fajita Mini Fillets 320g
£2.55/unit
Sainsbury's Slow Cook Fire Cracker Chicken 573g
£4.75/unit
Thank you