I'm trying to scrap this website, but there are some forms to fill.
The mainly objective is to fill these 5 forms (one appears after selecting another) and download the data through the button "Consultar".
This forms are coded in javascript and I can't find them in the page's html code. When I inspect the frames trough Google Chrome, I find the forms ID's, but the code doesn't find them.
I have just a prototype of my code. I can't advance without knowing what I can do to find these forms.
from selenium import webdriver
from bs4 import BeautifulSoup
import time
import os
#Variables
url = 'http://www.anbima.com.br/pt_br/informar/sistema-reune.htm'
path_phantom = 'C:\\Users\\TBMEPYG\\AppData\\Local\\Continuum\\Anaconda3\\Lib\\site-packages\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe'
#Processing
driver = webdriver.PhantomJS(executable_path= path_phantom)
driver.get(url)
data = driver.find_element_by_id('data_ref')
data.send_keys("21/08/2017")
driver.quit()
Edit:
I updated the code to this:
from selenium import webdriver
path_phantom = 'C:\\Users\\TBMEPYG\\AppData\\Local\\Continuum\\Anaconda3\\Lib\\site-packages\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe'
driver = webdriver.PhantomJS(executable_path= path_phantom)
driver.get('http://www.anbima.com.br/reune/reune.asp')
driver.switch_to.frame(driver.find_element_by_xpath('//iframe[@class="full"]'))
data = driver.find_element_by_name('Dt_Ref')
data.clear()
data.send_keys('21/08/
And I got this error:
CD: C:\Users\TBMEPYG\AppData\Local\Continuum\Anaconda3
Current directory: C:\Users\TBMEPYG\AppData\Local\Continuum\Anaconda3
python "C:\Users\TBMEPYG\Desktop\vamo.py"
Process started >>>
Traceback (most recent call last):
File "C:\Users\TBMEPYG\Desktop\vamo.py", line 8, in <module>
data = driver.find_element_by_name('Dt_Ref')
File "C:\Users\TBMEPYG\AppData\Local\Continuum\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 426, in find_element_by_name
return self.find_element(by=By.NAME, value=name)
File "C:\Users\TBMEPYG\AppData\Local\Continuum\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 832, in find_element
'value': value})['value']
File "C:\Users\TBMEPYG\AppData\Local\Continuum\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 297, in execute
self.error_handler.check_response(response)
File "C:\Users\TBMEPYG\AppData\Local\Continuum\Anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: {"errorMessage":"Unable to find element with name 'Dt_Ref'","request":{"headers":{"Accept":"application/json","Accept-Encoding":"identity","Connection":"close","Content-Length":"89","Content-Type":"application/json;charset=UTF-8","Host":"127.0.0.1:62040","User-Agent":"Python http auth"},"httpVersion":"1.1","method":"POST","post":"{\"using\": \"name\", \"value\": \"Dt_Ref\", \"sessionId\": \"bdd3fc70-8dd0-11e7-aeb1-85b8cfbe0d1c\"}","url":"/element","urlParsed":{"anchor":"","query":"","file":"element","directory":"/","path":"/element","relative":"/element","port":"","host":"","password":"","user":"","userInfo":"","authority":"","protocol":"","source":"/element","queryKey":{},"chunks":["element"]},"urlOriginal":"/session/bdd3fc70-8dd0-11e7-aeb1-85b8cfbe0d1c/element"}}
Screenshot: available via screen
Edit2:
Another possibility is to use the link inside of the the mains page http://www.anbima.com.br/reune/reune.asp
When I changed the code to this, I've got another error
from selenium import webdriver
path_phantom = 'C:\\Users\\TBMEPYG\\AppData\\Local\\Continuum\\Anaconda3\\Lib\\site-packages\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe'
driver = webdriver.PhantomJS(executable_path= path_phantom)
driver.get('http://www.anbima.com.br/reune/reune.asp')
data = driver.find_element_by_name('Dt_Ref')
data.clear()
data.send_keys('21/08/2017')
Error:
Traceback (most recent call last):
File "C:\Users\TBMEPYG\Desktop\vamo.py", line 9, in <module>
data = driver.find_element_by_name('Dt_Ref')
File "C:\Users\TBMEPYG\AppData\Local\Continuum\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 426, in find_element_by_name
return self.find_element(by=By.NAME, value=name)
File "C:\Users\TBMEPYG\AppData\Local\Continuum\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 832, in find_element
'value': value})['value']
File "C:\Users\TBMEPYG\AppData\Local\Continuum\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 297, in execute
self.error_handler.check_response(response)
File "C:\Users\TBMEPYG\AppData\Local\Continuum\Anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: {"request":{"headers":{"Accept":"application/json","Accept-Encoding":"identity","Connection":"close","Content-Length":"89","Content-Type":"application/json;charset=UTF-8","Host":"127.0.0.1:61820","User-Agent":"Python http auth"},"httpVersion":"1.1","method":"POST","post":"{\"using\": \"name\", \"value\": \"Dt_Ref\", \"sessionId\": \"e61dd170-8dcf-11e7-a019-41573671066b\"}","url":"/element","urlParsed":{"anchor":"","query":"","file":"element","directory":"/","path":"/element","relative":"/element","port":"","host":"","password":"","user":"","userInfo":"","authority":"","protocol":"","source":"/element","queryKey":{},"chunks":["element"]},"urlOriginal":"/session/e61dd170-8dcf-11e7-a019-41573671066b/element"}}
Screenshot: available via screen
data = driver.find_element_by_name('Dt_Ref')