Download file via javascript onclick using Python?

Question

On this page there is a button that reads Download CSV: http://www.nasdaqomxnordic.com/aktier/microsite?Instrument=CSE77855&name=Pandora&ISIN=DK0060252690

How can I download the file using Python?

The html of the page reads:

<a class="floatRight exportTrades" id="exportIntradayTradesCSV">Download CSV</a>

<script>
// #*

var tradesForShare = {
    load: function () {
        var q = {
            "SubSystem": "Prices",
            "Action": "GetInstrument",
            "inst.an": "nm",
            "inst.e": "3",
            "Exchange": "NMF",
            "Instrument": webCore.getInstrument(),
            "cache": "skip",
            "app": location["pathname"],
            "datasource": "prod",
            "translationFile": "translation",
            "DefaultDecimals": false
        };

        $("#tradesForShareOutput").loading("/static/nordic/css/img/loading.gif");
        var nordicRTI = NordicRTI.getInstance();
        var url = window.webCore.getWebAPIURL("prod", "MarketData/GetMarketData", true);
        var tradesRTI = new RTIObject(url, q, function (data) {
            tradesForShare.parseData(data);
                       console.log(tradesRTI);
        });
        nordicRTI.addRTIObject(tradesRTI);

        if($("tradesForShareTable").has("tr.odd")) {
                     $('.exportTrades').removeClass('disabled');
                     $('.exportTrades.disabled').css("pointer-events","auto");
                } else {
                     $('.exportTrades').addClass('disabled');
                     $('.exportTrades').css("pointer-events","none");
                }
        /*webCore.getMarketData(q, function (data) {
            tradesForShare.parseData(data);
        }, true);*/

        //var url = window.webCore.getWebAPIURL("prod", "MarketData/GetMarketData", true);
        /*$.getJSON(url, q, function (data) {
            tradesForShare.parseData(data);
        });*/
        /*$.ajax({
            type: "get",
            url: url,
            data: q,
            dataType: "jsonp",
            cache: true,
            success: function (data) {
                tradesForShare.parseData(data);
            },
            jsonp: "callback"
        });*/

        //setTimeout ( tradesForShare.load, 1000*30 );  // update every minute
    },
    parseData: function (data) {
        if(data.instruments != null) {
            $("#tradesForShareOutput").empty();
            var table = $("<table></table>").attr("id", "tradesForShareTable").addClass("tablesorter");
            var thead = $("<thead></thead>");
            var row = $("<tr></tr>");
            var kurs = $("<th></th>").text(webCore.getTranslationFor("trades", "p", data));// data.attributeTranslations.trades.p.trans[window.currentLanguage]);
            var vol = $("<th></th>").text(webCore.getTranslationFor("trades", "v", data));// data.attributeTranslations.trades.v.trans[window.currentLanguage]);
            var name = $("<th></th>").text(webCore.getTranslationFor("trades", "nm", data));// data.attributeTranslations.trades.nm.trans[window.currentLanguage]);
            var buyer = $("<th></th>").text(webCore.getTranslationFor("trades", "b", data));// data.attributeTranslations.trades.b.trans[window.currentLanguage]);
            var seller = $("<th></th>").text(webCore.getTranslationFor("trades", "s", data));// data.attributeTranslations.trades.s.trans[window.currentLanguage]);
            var time = $("<th></th>").text(webCore.getTranslationFor("trades", "t", data));// data.attributeTranslations.trades.t.trans[window.currentLanguage]);
            row.append(kurs).append(vol).append(name).append(buyer).append(seller).append(time);
            thead.append(row);
            var tbody = $("<tbody></tbody>");
            $.each(data.instruments[webCore.getInstrument().toLowerCase()].trades, function (k, v) {
                row = $("<tr></tr>");
                kurs = $("<td></td>").text(webCore.formatNumeric(v.values.p, 3));
                vol = $("<td></td>").text(window.webCore.formatNumeric(v.values.v, 0));
                name = $("<td></td>").text(v.values.nm);
                buyer = $("<td></td>").text(v.values.b);
                seller = $("<td></td>").text(v.values.s);
                time = $("<td></td>").text(webCore.getTimeFromDateString(v.values.t));
                row.append(kurs).append(vol).append(name).append(buyer).append(seller).append(time);
                tbody.append(row);
            });
            table.append(thead).append(tbody);
            $("#tradesForShareOutput").append(table);
            $("#tradesForShareTable").tablesorter({widgets: ['zebra']});
        }
    },
    excel: function () {
        var instrument = null;
        instrument = window.webCore.getInstrument();
        var utc = new Date().toJSON().slice(0,10).replace(/-/g,'-');
        $("#xlsForm").attr( "action", webCore.getProxyURL("prod"));
        var xmlquery = webCore.createQuery( Utils.Constants.marketAction.getTrades, {}, {
            t__a: "1,2,5,10,7,8,18",
                         FromDate : utc,
                         Instrument : instrument,
            ext_contenttype : "application/vnd.ms-excel",
            ext_contenttypefilename : "share_export.xls",
            ext_xslt:"t_table_simple.xsl",
            ext_xslt_lang: currentLanguage,
                        showall: "1"
        });
        console.log(xmlquery);
        $("#xmlquery").val( xmlquery );
        $("#xlsForm").submit();
    }
};

$(function () {
    tradesForShare.load();
$("#exportIntradayTradesCSV").on({
        click: function (e) {
                tradesForShare.excel();
            //window.webCore.exportTableToCSVClickEvent($("#exportIntradayTradesCSV"), $("#tradesForShareOutput"), '_' + window.webCore.getInstrument() + '.csv');
        }
    });
});



</script>

I tried using Inspect in Google Chrome and clicking on Event Listeners.

I got the following output, when clicking the button:

<post>
<param name="SubSystem" value="Prices"/>
<param name="Action" value="GetTrades"/>
<param name="Exchange" value="NMF"/>
<param name="t__a" value="1,2,5,10,7,8,18"/>
<param name="FromDate" value="2018-08-29"/>
<param name="Instrument" value="CSE77855"/>
<param name="ext_contenttype" value="application/vnd.ms-excel"/>
<param name="ext_contenttypefilename" value="share_export.xls"/>
<param name="ext_xslt" value="/nordicV3/t_table_simple.xsl"/>
<param name="ext_xslt_lang" value="en"/>
<param name="showall" value="1"/>
<param name="app" value="/aktier/microsite"/>
</post>

So I thought I could do something similar to the following, but it doesn't work; see output further below.

import requests

url = 'http://www.nasdaqomxnordic.com/WebAPI/api/MarketData/GetMarketData'
params = {
"SubSystem": "Prices",
"Action": "GetTrades",
"Exchange": "NMF",
"t__a": "1,2,5,10,7,8,18",
"FromDate": "2018-08-29",
"Instrument": "CSE77855",
"ext_contenttype": "application/vnd.ms-excel",
"ext_contenttypefilename": "share_export.xls",
"ext_xslt": "/nordicV3/t_table_simple.xsl",
"ext_xslt_lang": "en",
"showall": "1",
"app": "/aktier/microsite",
}

r = requests.get(url, params=params)

print(r.json())

I get the following output:

{'linkCall': 'SubSystem=Prices&Action=GetTrades&Exchange=NMF&t.a=1&t.a=2&t.a=5&t.a=10&t.a=7&t.a=8&t.a=18&FromDate=2018-08-29&Instrument=CSE77855&ext_contenttype=application%2fvnd.ms-excel&ext_contenttypefilename=share_export.xls&ext_xslt=%2fnordicV3%2ft_table_simple.xsl&ext_xslt_lang=en&showall=1&app=%2faktier%2fmicrosite', 'instruments': None, 'derivatives': None, 'warrants': None, 'attributeTranslations': {}, 'message': None, 'success': False}

If possible I would like to avoid Selenium.

t.m.adam · Accepted Answer · 2018-08-31 17:57:40Z

Inspecting the html I've noticed that the form's action is /webproxy/DataFeedProxy.aspx and the method is post. That means that the form is submitted to: http://www.nasdaqomxnordic.com/webproxy/DataFeedProxy.aspx via POST request. The form has one field with name xmlquery and value the html in your question. The code below should download the file.

import requests

url = 'http://www.nasdaqomxnordic.com/webproxy/DataFeedProxy.aspx'
xmlquery = '''<post>
<param name="SubSystem" value="Prices"/>
<param name="Action" value="GetTrades"/>
<param name="Exchange" value="NMF"/>
<param name="t__a" value="1,2,5,10,7,8,18"/>
<param name="FromDate" value="2018-08-29"/>
<param name="Instrument" value="CSE77855"/>
<param name="ext_contenttype" value="application/vnd.ms-excel"/>
<param name="ext_contenttypefilename" value="share_export.xls"/>
<param name="ext_xslt" value="/nordicV3/t_table_simple.xsl"/>
<param name="ext_xslt_lang" value="en"/>
<param name="showall" value="1"/>
<param name="app" value="/aktier/microsite"/>
</post>'''

r = requests.post(url, data = {'xmlquery': xmlquery})
html = r.text

The file is not csv (neither is the file that I get from my browser), it has .xls extension but contains a large html table. However you could create a csv file with the help of BeautifulSoup and csv.

from bs4 import BeautifulSoup
import csv

soup = BeautifulSoup(html, 'html.parser')
names = [i.text for i in soup.select('th')] + ['Name']
values = [
    [td.text for td in tr.select('td')] + [tr.td['title'].rstrip(' - ')]
    for tr in soup.select('tr')[1:]
]

with open('file.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(names)
    writer.writerows(values)

Note that it may take some time for BeautifulSoup to parse the file as it is quite large. If you're using Python 2x open doesn't accept the newline parameter. In that case you'll have to open the file in binary mode or it may contain empty lines.

As mentioned by tommy.carstensen, pandas is better for this task. It has the right tools (read_html and to_csv) and it's faster than BeautifulSoup.

import pandas as pd

pd.read_html(htm_string, index_col='Time', parse_dates=True)[0].to_csv(path)

The Name column is not included in the file because it's not in the table columns, but it's the value of a title attribute. But we could get this column by other means - from the original url for example. Since it's the same for all columns we could just create a new Name column with the name value of the query string.

import pandas as pd
from urllib.parse import urlparse, parse_qs

url = 'http://www.nasdaqomxnordic.com/aktier/microsite?Instrument=CSE77855&name=Pandora&ISIN=DK0060252690'
df = pd.read_html(html, index_col='Time', parse_dates=True)[0]
df['Name'] = parse_qs(urlparse(url).query)['name'][0]
df.to_csv('file.csv')

Hi @t.m.adam, I suppose you might be interested in solving the issue detailed in this post. Thanks.

Simon · Accepted Answer · 2021-05-23 19:54:28Z

0

In addition to the excellent solution proposed by t.m.adam, I also had to specify/mock User-Agent in headers to get a response from the post call:

headers = {'User-Agent': 'Safari/526.5'}
r = requests.post(url, data={'xmlquery': xmlquery}, headers=headers)

answered May 23, 2021 at 19:54

Simon

311 bronze badge

Collectives™ on Stack Overflow

Download file via javascript onclick using Python?

2 Answers 2

1 Comment

Comments

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

2 Answers 2

1 Comment

Comments

Your Answer

Sign up or log in

Post as a guest

Linked

Related