I tried debugging my program with print statements to see what was going on during each iteration.
This part works fine: The program goes through a total of 50 combinations of the drop-down menus (25 for each year).
This part isn't working: However, for some reason the totals dictionary is only storing the inputs from the second iteration of the initial "year" for-loop. It is returning a dictionary with a length of 25 (only half of what I actually want).
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
# General Stuff about the website
path = '/Users/admin/desktop/projects/scraper/chromedriver'
options = Options()
options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options=options, executable_path=path)
website = 'http://siops.datasus.gov.br/filtro_rel_ges_covid_municipal.php'
driver.get(website)
# Initial Test: printing the title
print(driver.title)
print()
# Dictionary to Store stuff in
totals = {}
### Drop Down Menus ###
state_select = Select(driver.find_element(By.XPATH, '//*[@id="cmbUF"]'))
state_options = state_select.options
year_select = Select(driver.find_element(By.XPATH, '//*[@id="cmbAno"]'))
year_options = year_select.options
# county_select = Select(driver.find_element(By.XPATH, '//*[@id="cmbMunicipio"]'))
# county_select.select_by_value('120025')
# report_select = Select(driver.find_element(By.XPATH, '//*[@id="gesRelatorio"]'))
# report_select.select_by_value('rel_ges_covid_rep_uniao_municipal.php')
# period_select = Select(driver.find_element(By.XPATH, '//*[@id="cmbPeriodo"]'))
# period_select.select_by_value('14')
### Loop through all combinations ###
for year in range(1, 3):
year_select = Select(driver.find_element(By.XPATH, '//*[@id="cmbAno"]'))
year_select.select_by_index(year)
for index in range(0, len(state_options) - 1):
state_select = Select(driver.find_element(By.XPATH, '//*[@id="cmbUF"]'))
state_select.select_by_index(index)
# Click the Submit Button
submit_button = driver.find_element(By.XPATH, '//*[@id="container"]/div[2]/form/div[2]/div/input[2]')
submit_button.click()
# Pulling data from the webpage
nameof = driver.find_element(By.XPATH, '//*[@id="arearelatorio"]/div[1]/div/table[1]/tbody/tr[2]').text
total_balance = driver.find_element(By.XPATH, '//*[@id="arearelatorio"]/div[1]/div/table[3]/tbody/tr[9]/td[2]').text
paid_expenses = driver.find_element(By.XPATH, '//*[@id="arearelatorio"]/div[1]/div/table[4]/tbody/tr[11]/td[4]').text
# Update Dictionary with the new info
totals.update({nameof: [total_balance, paid_expenses, year]})
print([nameof, year])
driver.back()
# Print the final Dictionary and quit
print(len(totals))
print(totals)
driver.quit()