I am using Soup and Selenium to access this page https://www.chewy.com/blue-buffalo-basics-limited/dp/37047 and trying to get a list of all packaging types' prices and ratings.
Below is my code:
import requests
import time
from bs4 import BeautifulSoup
from selenium import webdriver
headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0'}
# use Selenium to get buttons through all pages
test_url = 'https://www.chewy.com/blue-buffalo-basics-limited/dp/37047'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')
btn_count = []
for btn_cnt in test.select('.js-sku-selector > div'):
btn_cnt = btn_cnt['data-attributes'].count('isSelected')
btn_count.append(btn_cnt)
buttons = list(range(1,btn_cnt+1))
xpath = []
for b in buttons:
btn_path = '//*[@id="variation-Size"]/div[2]/div[' + str(b) + ']/div/label'
print(btn_path)
xpath.append(btn_path)
print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format('brand', 'product', 'id','auto_ship', 'regular','rating'))
for btn in xpath:
test_url = 'https://www.chewy.com/blue-buffalo-basics-limited/dp/37047'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')
driver = webdriver.Chrome(executable_path=r'C:\Users\public\chromedriver')
driver.get(test_url)
time.sleep(5)
driver.find_element_by_xpath(btn).click()
time.sleep(5)
for brand, product, id, auto_ship, price, rating in zip(test.findAll('span', attrs={'itemprop': 'brand'}),
test.findAll('div', attrs={'id': 'product-title'}),
test.findAll('div', attrs={'class': 'value js-part-number'}),
test.findAll('p', attrs={'class': 'autoship-pricing p'}),
test.findAll('span', attrs={'class': 'ga-eec__price'}),
test.select('div.ugc')):
#date = date.today()
brand = brand.text
product = ' '.join(product.h1.text.split())
id = ' '.join(id.span.text.split())
p1 = auto_ship.text.index('(')
auto_ship = ' '.join(auto_ship.text[:p1].split())
regular_price = ' '.join(price.text.split())
rating = rating.picture.img['src'][-7:-4].replace('_', '.')
print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format(brand, product, id, auto_ship, regular_price, rating))
driver.quit()
I would expect the data to be different for the three different buttons, but it seems it is only returning the value from the default page.
Is there anything else I should do to dynamically insert values for each button?
The HTML looks like
I copied the xpath of labels. It does bring me to the target view for different packages and the underlying HTML values do change. However, my print statment is still getting it from the main page. Any recommendation?


