Have been trying many things for the last few hours on this. However, strangely when selenium opens the new windows, it switches to it but instead of scraping data from the new page, if keeps closing the previous windows and scrapes data from them. Also, it keeps opening all the links instead of the next one. Sort of hit a wall with this one. Any help is appreciated. Thanks.
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome(executable_path='C:/chromedriver.exe')
actions = ActionChains(driver)
search_term = input("Enter your search term :")
url = f'https://www.sciencedirect.com/search?qs={search_term}&years=2021%2C2020%2C2019&lastSelectedFacet=years'
driver.get(url)
driver.maximize_window()
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'/html/body/div[3]/div/div/div/button/span'))).click()
divs = driver.find_elements_by_class_name('result-item-content')
links = []
for div in divs:
link = div.find_element_by_tag_name('a')
links.append(link)
def get_data():
actions.key_down(Keys.CONTROL).click(link).key_up(Keys.CONTROL).perform()
par_guid = driver.current_window_handle
allguid = driver.window_handles
for guid in allguid:
if guid != par_guid:
driver.switch_to.window(guid)
break
author_group = driver.find_element_by_id('author-group')
for author in author_group.find_elements_by_css_selector("a.author"):
try:
given_name = author.find_element_by_css_selector(".given-name").text
surname = author.find_element_by_css_selector(".surname").text
except NoSuchElementException:
print("Could not extract first or last name")
continue
try:
mail_icon = author.find_element_by_css_selector(".icon-envelope")
mail_icon.click()
mail_icon_present = True
mail = driver.find_element_by_class_name('e-address')
print(mail.text)
except NoSuchElementException:
mail_icon_present = False
print(f"Author {given_name} {surname}. Mail icon present: {mail_icon_present}")
driver.close()
driver.switch_to.window(par_guid)
for link in links:
get_data()
requestsorreqeusts-HTMLlibraries for it and parser it with another parser? requests-HTML requestsdriver.get(link)so no new tab opens?