I am repeating this code so i can scrape some news from a website. On weekends and holidays, the website is not refreshed, there are only news on workdays.
This is the solution i created for the code to get the previous workday news, but i know its not the right way to be programming.
import smtplib, ssl
import datetime, time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException,StaleElementReferenceException, ElementClickInterceptedException
options = webdriver.ChromeOptions()
#options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 10)
driver.get('https://fiis.com.br/atualizacoes/')
driver.implicitly_wait(10)
time.sleep(2)
#here I try to close a new html popup that has been there since last week
try:
driver.find_element_by_xpath('/html/body/div[1]/div/button').click()
except NoSuchElementException:
print("NoSuchElementException")
try:
driver.find_element_by_xpath('/html/body/div[2]/div/button').click()
except NoSuchElementException:
print("NoSuchElementException")
except ElementClickInterceptedException:
try:
driver.find_element_by_xpath('/html/body/div[2]/div/button').click()
except NoSuchElementException:
print("NoSuchElementException")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '[data-type="date"]'))).click()
#here I try diferent dates, first a today minus 1, then today minus 2 ... until today minus 4
try:
today = datetime.date.today()
five_day = datetime.timedelta(days=-1)
d_N1 = today + five_day
d_N1_2 = d_N1.strftime('%Y-%m-%d')
d_N1_3 = d_N1.strftime('%d.%m.%Y')
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, f'li[data-item="{d_N1_2}"]')))
driver.find_element_by_css_selector(f'li[data-item="{d_N1_2}"]').click()
except TimeoutException or NoSuchElementException:
try:
today = datetime.date.today()
five_day = datetime.timedelta(days=-2)
d_N1 = today + five_day
d_N1_2 = d_N1.strftime('%Y-%m-%d')
d_N1_3 = d_N1.strftime('%d.%m.%Y')
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, f'li[data-item="{d_N1_2}"]')))
driver.find_element_by_css_selector(f'li[data-item="{d_N1_2}"]').click()
except TimeoutException or NoSuchElementException:
try:
today = datetime.date.today()
five_day = datetime.timedelta(days=-3)
d_N1 = today + five_day
d_N1_2 = d_N1.strftime('%Y-%m-%d')
d_N1_3 = d_N1.strftime('%d.%m.%Y')
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, f'li[data-item="{d_N1_2}"]')))
driver.find_element_by_css_selector(f'li[data-item="{d_N1_2}"]').click()
except TimeoutException or NoSuchElementException:
try:
today = datetime.date.today()
five_day = datetime.timedelta(days=-3)
d_N1 = today + five_day
d_N1_2 = d_N1.strftime('%Y-%m-%d')
d_N1_3 = d_N1.strftime('%d.%m.%Y')
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, f'li[data-item="{d_N1_2}"]')))
driver.find_element_by_css_selector(f'li[data-item="{d_N1_2}"]').click()
except TimeoutException or NoSuchElementException:
driver.close()
driver.quit()
If yesterdays news is avaiable, i dont need to get the previous day.
4 days wont be enough in the future, so a better solution can be thought of.