I am trying to scrape the data from https://www.similarweb.com/website/zalando.de/#overview using Python and Selenium. The difficult part is that the data only appears when a point on the graph is hovered over.
Here's my code.
websites = ['https://www.similarweb.com/website/zalando.de/#overview']
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
browser = webdriver.Chrome(ChromeDriverManager().install(), options=options)
delays = [7, 4, 6, 2, 10, 19]
delay = np.random.choice(delays)
for crawler in websites:
browser.get(crawler)
time.sleep(2)
time.sleep(delay)
tooltip = browser.find_element(By.XPATH, "//*[local-name() = 'svg']/*[local-name()='g'][8]/*[local-name()='text']")
ActionChains(browser).move_to_element(tooltip).perform()
month_value = browser.find_element(By.XPATH, "//*[local-name() = 'svg']/*[local-name()='g' and @class='highcharts-tooltip']/*[local-name()='text']")
print('Are they here?', month_value.text)
months = browser.find_elements(By.XPATH, "//*[local-name() = 'svg']/*[local-name()='g'][6]/*/*")
for date in months:
print(date.text)
I can print the months data as:
Nov '20
Dec '20
Jan '21
Feb '21
Mar '21
Apr '21
But not able to print the values of each month- it gives an empty print -Are they here?
How do I ensure that it is hovered first and then scraped? Please help
EDIT : Here's the updated code
def website_monitoring():
websites = ['https://www.similarweb.com/website/zalando.de/#overview']
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
browser = webdriver.Chrome(ChromeDriverManager().install(), options=options)
for crawler in websites:
browser.get(crawler)
wait = WebDriverWait(browser, 10)
months = []
monthly_values = []
charts = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="highcharts-0"]')))
highchart = browser.find_elements_by_xpath('//*[@id="highcharts-0"]/svg/g[4]/g[1]')
for elements in highchart:
hover = ActionChains(browser).move_to_element(elements)
hover.perform()
month = browser.find_elements_by_css_selector('#highcharts-0 > svg > g.highcharts-tooltip > text > tspan:nth-child(1)')
month_values = browser.find_elements_by_css_selector('#highcharts-0 > svg > g.highcharts-tooltip > text > tspan:nth-child(3)')
months.append(month[0].text)
monthly_values.append(month_values[0].text)
print('Months', months)
print('Monthly Values', monthly_values)
if __name__ == "__main__":
website_monitoring()
The output that I get is:
Months []
Monthly Values []