I am trying out node-htmlparser2 and am stuck right at the very start. I have thousands of xml files like so:
<document … loads of attribs …>
<foo … loads of attribs …>
<loads…> … </loads>
<of…> … </of>
<other…> … </other>
<tags…> … </tags>
</foo>
</document>
I want everything inside <foo></foo> as a single string. My code below works but doesn't seem to me to be the right way of doing this
let isFoo = false;
let txt = '';
const p = new htmlparser.Parser({
onopentag: function(name, attribs){
if (name === 'foo') {
isFoo = true;
}
},
ontext: function(text){
if (isFoo) {
txt += text;
}
},
onclosetag: function(tagname){
if (tagname === 'foo') {
isFoo = false;
return txt;
}
}
}, {decodeEntities: true, xmlMode: true});
let data = [];
for (let file in files) {
let record = {
filename: file,
filetext: p.write(file)
}
data.push(record);
p.end();
}
Is there a better way to work with htmlparser2 without that silly isFoo flag?