Very new to Python and Selenium, looking to scrape a few data points. I'm struggling in three areas:
- I don't understand how to loop through multiple URLs properly
- I can't figure out why the script is iterating twice over each URL
- I can't figure out why it's only outputting the data for the second URL
Much thanks for taking a look!
Here's my current script:
urls = [
'https://developers.google.com/speed/pagespeed/insights/?url=https://www.crutchfield.com/%2F&tab=mobile',
'https://developers.google.com/speed/pagespeed/insights/?url=https://www.lastpass.com%2F&tab=mobile'
]
driver = webdriver.Chrome(executable_path='/Library/Frameworks/Python.framework/Versions/3.9/bin/chromedriver')
for url in urls:
for page in range(0, 1):
driver.get(url)
wait = WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.CLASS_NAME, 'origin-field-data')))
df = pd.DataFrame(columns = ['Title', 'Core Web Vitals', 'FCP', 'FID', 'CLS', 'TTI', 'TBT', 'Total Score'])
company = driver.find_elements_by_class_name("audited-url__link")
data = []
for i in company:
data.append(i.get_attribute('href'))
for x in data:
#Get URL name
title = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[1]/div/div[2]/h1/a')
co_name = title.text
#Get Core Web Vitals text pass/fail
cwv = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[2]/div/div[1]/div[1]/div[1]/span[2]')
core_web = cwv.text
#Get FCP
fcp = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[2]/div/div[1]/div[1]/div[2]/div[1]/div[1]/div')
first_content = fcp.text
#Get FID
fid = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[2]/div/div[1]/div[1]/div[2]/div[3]/div[1]/div')
first_input = fid.text
#Get CLS
cls = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[2]/div/div[1]/div[1]/div[2]/div[4]/div[1]/div')
layout_shift = cls.text
#Get TTI
tti = driver.find_element_by_xpath('//*[@id="interactive"]/div/div[1]')
time_interactive = tti.text
#Get TBT
tbt = driver.find_element_by_xpath('//*[@id="total-blocking-time"]/div/div[1]')
total_block = tbt.text
#Get Total Score
total_score = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[1]/div/div[1]/a/div[2]')
score = total_score.text
#Adding all columns to dataframe
df.loc[len(df)] = [co_name,core_web,first_content,first_input,layout_shift,time_interactive,total_block,score]
driver.close()
#df.to_csv('Double Page Speed Test 9-10.csv')
print(df)