1

Is there any kind of XML parser wrapper library that would allow switching the actual XML parser engine at configuration or run time instead of forcing me to choose between libxml2, expat or Xalan-C++?

1 Answer 1

1

I wrote something similar a while back:

struct xerces;
struct msxml;
struct rapid;
struct tiny;
struct pugixml;

template <typename T> struct platform_manager;
template <typename T> double parse_file(std::string const& f, QueryPerfCounter& qpc);

template<class T>
void demo(std::string const& f, size_t N = 10) {
    platform_manager<T> pm;
    QueryPerfCounter qpc;
    std::vector<double> timing_data;
    timing_data.reserve(N);
    std::generate_n(std::back_inserter(timing_data), N, std::tr1::bind(&parse_file<typename T>, f, qpc));
    adobe::Statistics<double> s(timing_data.begin(), timing_data.end());
    std::cout << "Iteration count: " << s.count() << " Mean time: " << s.mean() << "s. Variance: " << s.variance() << "s.\n";
}
/***************************************************************/
template <> 
struct platform_manager<msxml> {
    platform_manager() {        
        if (FAILED(CoInitialize(NULL)))
            throw std::runtime_error("CoCreateInstance failed");
    }

    ~platform_manager() {
        CoUninitialize();
    }
};

template<>
double parse_file<msxml>(std::string const& f, QueryPerfCounter& qpc) {
    CComPtr<IXMLDOMDocument> pXMLDom;
    HRESULT hr = CoCreateInstance(__uuidof(MSXML2::DOMDocument60), NULL, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pXMLDom));
    CComPtr<IXMLDOMParseError> pXMLErr;
    VARIANT_BOOL varStatus;
    qpc.Start();
    if (FAILED(pXMLDom->load(CComVariant(f.c_str()), &varStatus)))
        std::cout << "Parsing failed" << std::endl;
    qpc.Stop();
    return qpc.Duration(QueryPerfCounter::seconds);
}

/***************************************************************/
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/PlatformUtils.hpp>

#ifdef XERCES_CPP_NAMESPACE_USE
XERCES_CPP_NAMESPACE_USE
#endif 

template <> 
struct platform_manager<xerces> {
    platform_manager() try {
        XMLPlatformUtils::Initialize();
    } catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Failed to init: " << XMLString::transcode(message) << std::endl;
        XMLString::release(&message);
    }

    ~platform_manager() {
        XMLPlatformUtils::Terminate();
    }
};

template<>
double parse_file<xerces>(std::string const& f, QueryPerfCounter& qpc) {
    double duration = 0;
    std::tr1::shared_ptr<XercesDOMParser> parser(new XercesDOMParser());
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true);    // optional

    std::tr1::shared_ptr<ErrorHandler> errHandler(new HandlerBase());
    parser->setErrorHandler(errHandler.get());

    try {
        qpc.Start();
        parser->parse(f.c_str());
        qpc.Stop();
        duration = qpc.Duration(QueryPerfCounter::seconds);
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Exception message is: \n"
            << message << "\n";
        XMLString::release(&message);
    }
    catch (const DOMException& toCatch) {
        char* message = XMLString::transcode(toCatch.msg);
        std::cout << "Exception message is: \n"
            << message << "\n";
        XMLString::release(&message);
    }
    catch (...) {
        std::cout << "Unexpected Exception \n" ;
    }
    return duration;
}

/***************************************************************/
#include "rapidxml.hpp"
#include <vector>
#include <fstream>
#include <iterator>

template <> 
struct platform_manager<rapid> {};

enum size_hint { B = 1, KB = 1024, MB = 1024 * 1024 };

double file_size(std::ifstream& f, size_hint factor = MB) {
    f.seekg (0, std::ios::end);
    size_t length = f.tellg();
    f.seekg (0, std::ios::beg);
    return double(length) / factor;
}

template<>
double parse_file<rapid>(std::string const& f, QueryPerfCounter& qpc) {
    double duration = 0;
    rapidxml::xml_document<> doc;
    try {
        qpc.Start();
        std::ifstream myfile(f.c_str());
        myfile.seekg (0, std::ios::end);
        size_t length = myfile.tellg();
        myfile.seekg (0, std::ios::beg);
        std::vector<char> buffer(length);
        myfile.read(& buffer[0], length);
        //buffer.reserve(length);
        //buffer.insert(std::istreambuf_iterator<char>(myfile)), std::istreambuf_iterator<char>( ));
        //std::copy(std::istreambuf_iterator<char>(myfile), std::istreambuf_iterator<char>( ), std::back_insert_iterator(buffer));
        buffer.push_back('\0');
        qpc.Stop();
        duration += qpc.Duration(QueryPerfCounter::seconds);
        //std::cout << "Buffer load time: " << duration << "s" << std::endl;

        //QueryPerfCounter qpc;
        qpc.Start();
        doc.parse<rapidxml::parse_non_destructive>(&buffer[0]);
        qpc.Stop();
        duration += qpc.Duration(QueryPerfCounter::seconds);        
    } catch (rapidxml::parse_error const& e) {
        std::cout << e.what() << std::endl;
    } catch (std::exception const& e) {
        std::cout << e.what() << std::endl;
    }
    return duration;
}
/***************************************************************/
template <> 
struct platform_manager<tiny> {};

template<>
double parse_file<tiny>(std::string const& f, QueryPerfCounter& qpc) {
    tinyxml2::XMLDocument doc;
    qpc.Start();
    doc.LoadFile(f.c_str());
    doc.PrintError(); // emits nothing on success
    qpc.Stop();
    return qpc.Duration(QueryPerfCounter::seconds); 
}
/***************************************************************/
struct banner_printer {
    banner_printer(std::string const& libname, std::string const& input) : lib(libname), in(input) {
        std::cout << "/*+------------------- BEGIN test for " << lib << " with file: " << in << " -------------------+*/" << std::endl;
    }
    ~banner_printer() {
        std::cout << "/*+------------------- END test for " << lib << " with file: " << in << " -------------------+*/" << std::endl;
    }
private:
    std::string lib, in;
};
/***************************************************************/
#include "pugixml.hpp"

template <> 
struct platform_manager<pugixml> {};

template<>
double parse_file<pugixml>(std::string const& f, QueryPerfCounter& qpc) {
    pugi::xml_document doc;
    qpc.Start();
    pugi::xml_parse_result result = doc.load_file(f.c_str());
    qpc.Stop();
    if (!result) {
        std::cout << "XML [" << f << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n";
        std::cout << "Error description: " << result.description() << "\n";
        std::cout << "Error offset: " << result.offset << " (error at offset [..." << (result.offset) << "]\n\n";
    }
    return qpc.Duration(QueryPerfCounter::seconds);
}
/***************************************************************/

int main() {
    std::vector<std::string> v = parse_catalog("D:/Work/xml_parsers/perfcompare/benchmark/catalog.txt");
    std::for_each(v.begin(), v.end(), [](std::string const& s) {
    {
        std::ifstream f(s);
        std::cout << "Input file name: " << s << " size: " << file_size(f) << "MB\n\n";
    }
    {
        banner_printer b("xerces", s);
        demo<xerces>(s);
    }
    {
        banner_printer b("rapid", s);
        demo<rapid>(s);
    }
    {
        banner_printer b("tiny", s);
        demo<tiny>(s);
    }
    {
        banner_printer b("pugi", s);
        demo<pugixml>(s);
    }
    {
        banner_printer b("MSXML6", s);
        demo<msxml>(s);
    }
    }
    );
    //expat_demo(argc, argv);
    return 0;
} 

It may or may not help you get started. I've skipped header includes and some other trivia. I tried to keep the interface simple and identical. This meant that some libraries required additional helper functions.

Sign up to request clarification or add additional context in comments.

1 Comment

Very helpful code. Thanks. People should be upvoting this by the dozens.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.