Iam parsing specific tags (eg. titles) from XML file using libxml2.
Parsing this XML:
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
<entry>
<title type="html">Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs1</title>
</entry>
<entry>
<title type="html">Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs2</title>
</entry>
<entry>
<title type="html">Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs3</title>
</entry>
<entry>
<title type="html">Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs4</title>
</entry>
<entry>
<title type="html">Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs5</title>
</entry>
<entry>
<title type="html">Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs6</title>
</entry>
<entry>
<title type="html">Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs7</title>
</entry>
<entry>
<title type="html">Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs8</title>
</entry>
<entry>
<title type="html">Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs9</title>
</entry>
<entry>
<title type="html">Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs10</title>
</entry>
</feed>
Using this C++ code
void CXMLManager::processNode(xmlTextReaderPtr reader)
{
static bool root = true;
std::string name;
name = std::string((const char *) xmlTextReaderConstName (reader));
if (name == "entry")
{
if (root)
{
m_name = m_title;
root = false;
return;
}
static bool closeEntry = true;
if (closeEntry)
{
m_feedBuffer.push_back( CFeed { m_name, m_title, m_updated, m_author, m_link } );
m_title = "";
}
closeEntry = !closeEntry;
}
else if (name == "title" && xmlTextReaderNodeType(reader) != XML_READER_TYPE_END_ELEMENT)
{
m_title = getElementContent(reader);
std::cout << "Title: " << m_title << std::endl;
}
}
std::string CXMLManager::getElementContent(xmlTextReaderPtr reader)
{
xmlNodePtr node = xmlTextReaderCurrentNode(reader);
xmlChar* text = xmlNodeGetContent(node);
return std::string((const char *) text);
}
void CXMLManager::streamFile(const char *data, size_t size)
{
xmlTextReaderPtr reader;
int ret;
/*
* Pass some special parsing options to activate DTD attribute defaulting,
* entities substitution and DTD validation
*/
reader = xmlReaderForMemory(data, size, NULL, NULL,
XML_PARSE_DTDATTR | /* default DTD attributes */
XML_PARSE_NOENT); /* substitute entities */
if (reader != NULL)
{
ret = xmlTextReaderRead(reader);
while (ret == 1)
{
processNode(reader);
ret = xmlTextReaderRead(reader);
}
}
else
{
throw CFeedreaderException("FEEDREADER: Failed to parse XML.", E_WRONG_XML);
}
}
and in the most cases, iam getting correct result, but once a time -- iam getting empty string (even thought its correct in XML):
Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs1
Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs2
Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs3
Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs4
Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs6
Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs7
Swedish ISP spanked for sexist 'distracted boyfriend' advert for developer jobs8
I have checked XML many times before parsing and its correct, so i dont know what could be the problem here. The 5th string is missed periodicaly with this input.
return std::move(std::string((const char *) text));, you have a memory leak as you must callxmlFree()on the pointer returned byxmlNodeGetContent(). You should be usingxmlTextReaderValue()(which also requiresxmlFree()) orxmlTextReaderConstValue()(which doesn't) instead. Also, thestd::move()is redundant.xmlNodeGetContent(). You are also leaking thereaderitself