I'm trying to write an XPath statement to fetch the contents of each row in a table, but only when the 2nd column of each row is not set to "TBA". The page I am working off this page. I am new to using XPath.
I've come up with the following statement, which I've managed to test successfully (or appears successful anyway) with an online XPath tester, but have been unable to figure out how to apply it in node.js:
//*[@id="body_column_left"]/div[4]/table/tbody/tr/[not(contains(./td[2], 'TBA'))]
This is my attempt below, I've tried variations but I can't get it to even validate as a valid XPath statement and as a result I've been lost in not very helpful stack traces:
var fs = require('fs');
var xpath = require('xpath');
var parse5 = require('parse5');
var xmlser = require('xmlserializer');
var dom = require('xmldom').DOMParser;
var request = require('request');
var getHTML = function (url, callback) {
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
return callback(body) // return the HTML
}
})
}
getHTML("http://au.cybergamer.com/pc/csgo/ladder/scheduled/", function (html) {
var parser = new parse5.Parser();
var document = parser.parse(html.toString());
var xhtml = xmlser.serializeToString(document);
var doc = new dom().parseFromString(xhtml);
var select = xpath.useNamespaces({"x": "http://www.w3.org/1999/xhtml"});
var nodes = select("//x:*[@id=\"body_column_left\"]/div[4]/table/tbody/tr/[not(contains(./td[2], 'TBA'))]", doc);
console.log(nodes);
});
Any help would be appreciated!
cheerio, would be a lot easier.cheerio.