2

I'm loading in an XML file in JavaScript, and once I've done that, I'm iterating through all of the Document's childNodes, which are all of the XML elements in the document. The problem that I'm having is that I need to ignore elements which are just not real elements, but rather newlines, tabs, etc. Right now I'm doing the following:

for (var i = 0; i < childList.length; i++)
{
    switch (childList[i].nodeType)
    {
        case 1: // Node.ELEMENT_NODE
            /* snip */
            break;
        case 3: // Node.TEXT_NODE
        case 8: // Node.COMMENT_NODE

            // Ensure the node is a valid node, and not newlines, tabs, etc
            if (!nodeEmpty(childList[i].nodeValue)
            {
                // do stuff
            }

            break;
    }
}

function nodeEmpty(nodeValue)
{
    var isEmpty = true;
    var length = nodeValue.length;
    for (var i = 0; i < length; i++)
    {
        if (nodeValue[i] != ' ' && nodeValue[i] != '\n' && nodeValue[i] != '\t')
        {
            isEmpty = false;
            break;
        }
    }

    return isEmpty;
}

But this seems like a very non-elegant way of achieving this. Is there a better way of doing this?

0

2 Answers 2

3
/**
 * xmlsimplify simplifies an XML object by copying it into a
 * javascript object with only a subset of the attributes of the original.
 * This makes it suitable for output as JSON, or for further processing by
 * other functions.

 * @param {Object} xml
 * @param {Boolean} strip if true, strip empty (whitespace) nodes
 */
var xmlsimplify = function(xml, strip) {
    var obj = {};
    if (typeof xml === "string") {
        xml = getXmlDocument(xml);
    }
    var traverse = function(node) {
        var i, l, n, a, j;
        if (node.nodeType) {
            var o = {};
            switch (node.nodeType) {
            case 1:
                //element node;
                o = {
                    nodeName: node.nodeName
                }; //record nodename
                for (i = 0, l = node.attributes.length, n = node.attributes; i < l; i++) { //append attributes
                    a = traverse(n.item(i));
                    for (j in a) {
                        if (a.hasOwnProperty(j)) {
                            o[j] = a[j];
                        }
                    }
                }
                if (node.childNodes.length) {
                    o.childNodes = [];
                    for (i = 0, l = node.childNodes.length, n = node.childNodes; i < l; i++) {
                        a = traverse(n.item(i));
                        if (a !== null) {
                            o.childNodes.push(a);
                        }
                    }
                    if (o.childNodes.length === 0) {
                        delete o.childNodes;
                    }
                }
                break;
            case 2:
                //attribute node
                o[node.nodeName] = node.nodeValue; //return an attribute object
                break;
            case 3:
                //text node
                //strip empty nodes
                if (node.nodeValue.match(/[^\s]/) && (strip === true)) {
                    o = node.nodeValue;
                } else {
                    o = null;
                }
                if (strip !== true) {
                    o = node.nodeValue;
                }
                break;
            case 4:
                //cdata section node
                o = node.nodeValue;
                break;
            case 9:
                //document node;
                o = traverse(node.firstChild);
                break;
            case 10:
                o = traverse(node.nextSibling);
                break;
            }
        }
        return o;
    };
    obj = traverse(xml);
    return obj;
};
Sign up to request clarification or add additional context in comments.

2 Comments

Why does the regex use "^\s" instead of "\S"?
I suppose the two are equivalent, but I find the former to be clearer. I'm a bit hazy on my character classes. Code clarity is generally important. I might know that \S is any non whitespace character today, but I may forget it again in 6 months time.
0
function btn2Click() {
    try{
        ConObj = CreateXMLObj();
        ConObj.onreadystatechange = function () {
        if (this.readyState == 4 && this.status == 200) {            
            xmlObj = this.responseXML;
            oTitle = xmlObj.getElementsByTagName('TITLE');
            oYear = xmlObj.getElementsByTagName('YEAR');
            txt = "<center><table border=2>"; 
            for (i = 0; i < oTitle.length ; i++) {
                txt += "<TR><TD>" + setNA(oTitle[i].childNodes) + "</TD><TD>" + setNA(oYear[i].childNodes) + "</TD> </TR>";
                              }
              document.getElementById("demo").innerHTML = txt; alert('stage1');
        }   
        
    };
    ConObj.open("GET", "AjaxManager.aspx?cmd=ajaxXML", true);
    ConObj.send();
    } catch (e) {
        alert(e.toString);
    }
}

    

function setNA(value) {
    name = "NA";
    if (value.length > 0)
        name = value[0].nodeValue;
    return name;
}

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.