The idea is to load the page with PhantomJS, wait for the contents of the workbook to load, get all the necessary parameters for the download file handler endpoint request which we can do with requests package.
Full working solution:
import json
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
WORKBOOK_TYPE = "PublishedItemsSnapshot"
driver = webdriver.PhantomJS()
driver.maximize_window()
driver.get('http://www.cbe.org.eg/en/EconomicResearch/Publications/_layouts/xlviewer.aspx?id=/MonthlyStatisticaclBulletinDL/External%20Sector%20Data%20235.xlsx&DefaultItemOpen=1#')
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.ID, "ctl00_PlaceHolderMain_m_excelWebRenderer_ewaCtl_rowHeadersDiv")))
# get workbook uri
hidden_input = wait.until(EC.presence_of_element_located((By.ID, "ctl00_PlaceHolderMain_m_excelWebRenderer_ewaCtl_m_workbookContextJson")))
workbook_uri = json.loads(hidden_input.get_attribute('value'))['EncryptedWorkbookUri']
# get session id
session_id = driver.find_element_by_id("ctl00_PlaceHolderMain_m_excelWebRenderer_ewaCtl_m_workbookId").get_attribute("value")
# get workbook filename
workbook_filename = driver.find_element_by_xpath("//h2[contains(@class, 's4-mini-header')]/span[contains(., '.xlsx')]").text
driver.close()
print("Downloading workbook '%s'..." % workbook_filename)
response = requests.get("http://www.cbe.org.eg/en/EconomicResearch/Publications/_layouts/XlFileHandler.aspx", params={
'id': workbook_uri,
'sessionId': session_id,
'workbookFileName': workbook_filename,
'workbookType': WORKBOOK_TYPE
})
with open(workbook_filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
driver.page_sourceonce you open this page on the screenshot and add the relevant part of the HTML (the menu with the "Download a Snapshot" link) to the question? Thanks.