I need to write data to a csv file, I am currently engaged in parsing an online store, there is a different number of characteristics on each product, for example: weight, length, etc. I am trying to write data using pandas, but I can't write all the data correctly into the dictionary, tell me how to do it correctly
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import requests
import time
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import pandas as pd
URLS = ['https://www.ikea.com/ru/ru/p/pahl-pol-pismennyy-stol-belyy-s29278422/','https://www.ikea.com/ru/ru/p/micke-mikke-pismennyy-stol-belyy-20373923/']
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'}
content = []
colum = []
driver = webdriver.Chrome(ChromeDriverManager().install())
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def get_specifications_parameter():
num = 0
WebDriverWait(driver, 5).until(expected_conditions.visibility_of_element_located((By.XPATH, '//*[@id="content"]/div/div[1]/div/div[2]/div[2]/div[2]/div[2]/button')))
driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div/div[2]/div[2]/div[2]/div[2]/button').click()
time.sleep(3)
table = driver.find_element_by_xpath('//*[@id="range-modal-mount-node"]/div/div[4]/div/div[2]/div/div/div').get_attribute('innerHTML')
soup = BeautifulSoup(table,'html.parser')
param = soup.find_all('dd',class_='range-revamp-product-dimensions__list-item-measure')
titles = soup.find_all('dt',class_='range-revamp-product-dimensions__list-item-name')
for item in titles:
if item.text in colum:
pass
else:
colum.append(item.text)
for item in param:
content.append({titles[num].text:item.text}) #Writing characteristics to content
num+=1
print(content)
def get_content(url):
driver.get(url)
get_specifications_parameter()
# content.append[{
# 'name':name,
# 'price':price,
# 'photo':photo,
# 'description':description
# }]
# Additional data to be recorded
print(content)
def start():
for URL in URLS:
html = get_html(URL)
if html.status_code == 200:
get_content(URL)
else:
print('Network error')
def write():
df = pd.DataFrame(colum)
for p in content:
df = pd.concat([df, pd.DataFrame(p,index=[0])],ignore_index=True)
df.to_csv("output.csv", index=False)
start()
write()
At the output to the content, I get this
[
{"Ширина:\xa0": "128 см"},
{"Глубина:\xa0": "58 см"},
{"Мин высота:\xa0": "59 см"},
{"Макс высота:\xa0": "72 см"},
{"Макс нагрузка:\xa0": "50 кг"},
]
[
{"Ширина:\xa0": "128 см"},
{"Глубина:\xa0": "58 см"},
{"Мин высота:\xa0": "59 см"},
{"Макс высота:\xa0": "72 см"},
{"Макс нагрузка:\xa0": "50 кг"},
]
[
{"Ширина:\xa0": "128 см"},
{"Глубина:\xa0": "58 см"},
{"Мин высота:\xa0": "59 см"},
{"Макс высота:\xa0": "72 см"},
{"Макс нагрузка:\xa0": "50 кг"},
{"Ширина:\xa0": "73 см"},
{"Глубина:\xa0": "50 см"},
{"Высота:\xa0": "75 см"},
{"Макс нагрузка:\xa0": "50 кг"},
]
[
{"Ширина:\xa0": "128 см"},
{"Глубина:\xa0": "58 см"},
{"Мин высота:\xa0": "59 см"},
{"Макс высота:\xa0": "72 см"},
{"Макс нагрузка:\xa0": "50 кг"},
{"Ширина:\xa0": "73 см"},
{"Глубина:\xa0": "50 см"},
{"Высота:\xa0": "75 см"},
{"Макс нагрузка:\xa0": "50 кг"},
]
And I want to separate this
products = [
{
"артикул": 12345,
"высота": 50,
"материал": "дерево",
},
{
"артикул": 12346,
"ширина": 30,
"вес": 1.5,
},
{
"артикул": 12347,
"длина": 14,
"высота": 6.2,
"материал": "пластик",
},
]
To get such a file as a link https://drive.google.com/file/d/1uGoW1kpsDGDA-Zh7SiiCDcg9cf2lHQUd/view?usp=sharing
contentshould just be a dict, not a list you append 1-length dicts into.