0

Have been trying many things for the last few hours on this. However, strangely when selenium opens the new windows, it switches to it but instead of scraping data from the new page, if keeps closing the previous windows and scrapes data from them. Also, it keeps opening all the links instead of the next one. Sort of hit a wall with this one. Any help is appreciated. Thanks.

from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException


driver = webdriver.Chrome(executable_path='C:/chromedriver.exe')
actions = ActionChains(driver)
search_term = input("Enter your search term :")
url = f'https://www.sciencedirect.com/search?qs={search_term}&years=2021%2C2020%2C2019&lastSelectedFacet=years'
driver.get(url)
driver.maximize_window()

WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'/html/body/div[3]/div/div/div/button/span'))).click()
divs = driver.find_elements_by_class_name('result-item-content')
links = []
for div in divs:
    link = div.find_element_by_tag_name('a')
    links.append(link)


def get_data():
    actions.key_down(Keys.CONTROL).click(link).key_up(Keys.CONTROL).perform()
    par_guid = driver.current_window_handle
    allguid = driver.window_handles
    for guid in allguid:
        if guid != par_guid:
            driver.switch_to.window(guid)
            break
    author_group = driver.find_element_by_id('author-group')
    for author in author_group.find_elements_by_css_selector("a.author"):
        try:
            given_name = author.find_element_by_css_selector(".given-name").text
            surname = author.find_element_by_css_selector(".surname").text

        except NoSuchElementException:
            print("Could not extract first or last name")
            continue

        try:
            mail_icon = author.find_element_by_css_selector(".icon-envelope")
            mail_icon.click()
            mail_icon_present = True
            mail = driver.find_element_by_class_name('e-address')
            print(mail.text)

        except NoSuchElementException:
            mail_icon_present = False
            print(f"Author {given_name} {surname}. Mail icon present: {mail_icon_present}")
    driver.close()
    driver.switch_to.window(par_guid)

for link in links:
    get_data()
6
  • Hello! hmmm those links that are being opened to a new tab... you do some scraping there right? Commented Dec 19, 2020 at 7:00
  • Yes..but instead of scraping the new ones and closing them..it is opening all the links again. Scraping the old pages as well. Commented Dec 19, 2020 at 7:01
  • Hmm... how about you may consider instead of opening the links to a new tab is you can use python requests or reqeusts-HTML libraries for it and parser it with another parser? requests-HTML requests Commented Dec 19, 2020 at 7:06
  • No. I need to click on the new page. So, no requests. Commented Dec 19, 2020 at 7:11
  • Could you instead of clicking on the link try to driver.get(link) so no new tab opens? Commented Dec 19, 2020 at 8:46

1 Answer 1

1

The problem part was this line actions.key_down(Keys.CONTROL).click(link).key_up(Keys.CONTROL).perform() I think this is not the right way to open a new tab if you are dealing with multiple links. I changed it to driver.execute_script('window.open(arguments[0]);', link) . I also let go of saving the links to a list. This works because there are only two windows open at a time. The home window and the link we clicked on. So, In entirety the code becomes

from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException


driver = webdriver.Chrome(executable_path='C:/chromedriver.exe')
actions = ActionChains(driver)
search_term = input("Enter your search term :")
url = f'https://www.sciencedirect.com/search?qs={search_term}&years=2021%2C2020%2C2019&lastSelectedFacet=years'
driver.get(url)
driver.maximize_window()

WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'/html/body/div[3]/div/div/div/button/span'))).click()
divs = driver.find_elements_by_class_name('result-item-content')
for div in divs:
    parent_window = driver.current_window_handle
    link = div.find_element_by_tag_name('a')
    driver.execute_script('window.open(arguments[0]);', link)
    all_windows = driver.window_handles
    child_window = [window for window in all_windows if window != parent_window][0]
    driver.switch_to.window(child_window)
    title = driver.find_element_by_tag_name('h1')
    print("Article Title:- ",title.text)
    author_group = driver.find_element_by_id('author-group')
    for author in author_group.find_elements_by_css_selector("a.author"):
        try:
            given_name = author.find_element_by_css_selector(".given-name").text
            surname = author.find_element_by_css_selector(".surname").text

        except NoSuchElementException:
            print("Could not extract first or last name")
            continue

        try:
            mail_icon = author.find_element_by_css_selector(".icon-envelope")
            mail_icon.click()

            mail = driver.find_element_by_class_name('e-address')
            print(mail.text)

        except NoSuchElementException:
            print(f"Author {given_name} {surname}")
    driver.close()
    driver.switch_to.window(parent_window)
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.