I want to parse information on a website and I have been doing it successfully with just pure HTML. For instance for the following code:
<div>
<ul>
<h3 This is a heading> </h3>
I would use "answ = pagehtml.xpath('//div/ul/h3'):" and "answ" would be = "This is a heading".
But now I have a web page with a JavaScript that looks like this:
<script>
var XYZ = XYZ || {};
XYZ.contentModel = {
layout: "no-rail",
analytics: {
"pageTop": {},
"chartbeat": {
"sections": ""
},
"branding_content_page": "default",
"branding_content_card": [""]
},
edition: "Hometown",
title: "This is the title",
siblings: {
"articleList": [{
"uri": "Got-to-this-webpage.html",
"description": "",
"layout": ""
}]
So I would like to know how do I parse the uri link in this script? Here is what I have tried, but it has failed: answ = pagehtml.xpath('//script/XYZ/siblings/articleList/uri')
What would be the correct xpath to use, if any?
Thanks allot