3

I am trying to parse a XML document that has two xmlns namespaces and all my xPath queries are returning null.

I want to parse the values of category nodes and create an array but because the document has two namespaces, no matter what xpath expression I use, it always returns null.

If I remove one namespace then it works fine. I have looked up other answers but couldn't find something that works so posting this as a new question.

Here's what I have tried so far. I am using this article as a reference.

Thanks for your help in advance.

import java.io.FileInputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;

import org.xml.sax.InputSource;

import org.w3c.dom.Document;
import org.w3c.dom.NodeList;


class Main
{
    public static void main(String[] args) throws Exception
    {

        //Parse XML file
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse(new InputSource(new StringReader("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
                "<newsItem guid=\"urn:newsml:news.com.au:20210401\" version=\"1\"\n" +
                "  standard=\"NewsML-G2\" standardversion=\"2.9\"\n" +
                "  xmlns=\"http://iptc.org/std/nar/2006-10-01/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n" +
                "  <catalogRef href=\"http://www.iptc.org/std/catalog/catalog.IPTC-G2-Standards_16.xml\"/>\n" +
                "  <itemMeta>\n" +
                "    <itemClass qcode=\"ninat:video\"/>\n" +
                "    <provider>\n" +
                "      <name>FoxSports</name>\n" +
                "    </provider>\n" +
                "    <versionCreated>2021-04-01T16:10:15.736+11:00</versionCreated>\n" +
                "    <event>create</event>\n" +
                "  </itemMeta>\n" +
                "  <contentMeta>\n" +
                "    <FWID>0</FWID>\n" +
                "    <originalId>799186</originalId>\n" +
                "    <contentCreated>2021-04-01T16:10:15.736+11:00</contentCreated>\n" +
                "    <expiration>2021-05-01T15:00:43.057+10:00</expiration>\n" +
                "    <slugline>Test - Video Name</slugline>\n" +
                "    <headline>Test - video headline</headline>\n" +
                "    <description>Test AFL: David King breaks down his new theory surrounding Dimma and the Tigers. </description>\n" +
                "    <category>\n" +
                "      <id>208</id>\n" +
                "      <name>AFL</name>\n" +
                "      <category>\n" +
                "        <id>320</id>\n" +
                "        <name>AFL 360</name>\n" +
                "      </category>\n" +
                "    </category>\n" +
                "    <collections>\n" +
                "      <collection>\n" +
                "        <id>138</id>\n" +
                "        <name>alexa</name>\n" +
                "      </collection>\n" +
                "    </collections>\n" +
                "    <isPremiumPay>false</isPremiumPay>\n" +
                "    <geoblock>false</geoblock>\n" +
                "  </contentMeta>\n" +
                "  <contentSet>\n" +
                "    <remoteContent id=\"web\"\n" +
                "      href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09.jpg\" version=\"1\"\n" +
                "      rendition=\"rnd:web\" size=\"44848\" contenttype=\"image/jpeg\"\n" +
                "      width=\"640\" height=\"360\" colourspace=\"colsp:sRGB\" orientation=\"1\" resolution=\"96\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_564.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"512\" height=\"288\" duration=\"121\"\n" +
                "      audiobitrate=\"64000\" videoavgbitrate=\"500000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_248.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"512\" height=\"288\" duration=\"121\"\n" +
                "      audiobitrate=\"48000\" videoavgbitrate=\"200000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_1596.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"800\" height=\"450\" duration=\"121\"\n" +
                "      audiobitrate=\"96000\" videoavgbitrate=\"1500000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_2628.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"1280\" height=\"720\" duration=\"121\"\n" +
                "      audiobitrate=\"128000\" videoavgbitrate=\"2500000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_1096.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"640\" height=\"360\" duration=\"121\"\n" +
                "      audiobitrate=\"96000\" videoavgbitrate=\"1000000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_896.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"640\" height=\"360\" duration=\"121\"\n" +
                "      audiobitrate=\"96000\" videoavgbitrate=\"800000\" videoaspectratio=\"16:9\"/>\n" +
                "  </contentSet>\n" +
                "</newsItem>\n")));

        //Get XPath expression
        XPathFactory xpathfactory = XPathFactory.newInstance();
        XPath xpath = xpathfactory.newXPath();
        xpath.setNamespaceContext(new NamespaceResolver(doc));
        XPathExpression expr = xpath.compile("/newsItem/itemMeta");

        //Search XPath expression
        Object result = expr.evaluate(doc, XPathConstants.NODESET);

        //Iterate over results and fetch book names
        NodeList nodes = (NodeList) result;
        for (int i = 0; i < nodes.getLength(); i++) {
            System.out.println(nodes.item(i).getNodeValue());
        }
        
    }
}
class NamespaceResolver implements NamespaceContext
{
    //Store the source document to search the namespaces
    private Document sourceDocument;

    public NamespaceResolver(Document document) {
        sourceDocument = document;
    }

    //The lookup for the namespace uris is delegated to the stored document.
    public String getNamespaceURI(String prefix) {
        if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) {
            return sourceDocument.lookupNamespaceURI(null);
        }  else {
            return sourceDocument.lookupNamespaceURI(prefix);
        }
    }

    public String getPrefix(String namespaceURI) {
        return sourceDocument.lookupPrefix(namespaceURI);
    }

    @SuppressWarnings("rawtypes")
    public Iterator getPrefixes(String namespaceURI) {
        return null;
    }
}```

2 Answers 2

2

Your XML elements are bound to the namespace http://iptc.org/std/nar/2006-10-01/, but your XPath is not using any namespace-prefixes, so /newsItem/itemMeta is asking for elements that are bound to no namespace.

You could address them by just the local-name():

/*[local-name()='newsItem']/*[local-name()='itemMeta']

Otherwise, you need to register the namespace with a namespace prefix, or use a custom NamespaceContext to resolve the namespace from your chosen namespace-prefix:

xpath.setNamespaceContext(new NamespaceContext() {
    public String getNamespaceURI(String prefix) {
      switch (prefix) {
        case "i": return "http://iptc.org/std/nar/2006-10-01/";
        // ...
       }
    });

and then use that namespace-prefix in your XPath:

/i:newsItem/i:itemMeta
Sign up to request clarification or add additional context in comments.

Comments

1

Maybe this is the simplest way to query xml with namespace.

First, create a namespace context

public static class NamespaceResolver implements NamespaceContext {
    //Store the source document to search the namespaces
    private final Node sourceNode;

    public NamespaceResolver(Node node) {
        sourceNode = node;
    }

    //The lookup for the namespace uris is delegated to the stored document.
    public String getNamespaceURI(String prefix) {
        if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) {
            return sourceNode.lookupNamespaceURI(null);
        } else {
            return lookupNamespaceURI(sourceNode, prefix);
        }
    }

    private String lookupNamespaceURI(Node node, String prefix) {
        if (node.lookupNamespaceURI(prefix) != null) {
            return node.lookupNamespaceURI(prefix);
        }
        NodeList nodeList = node.getChildNodes();
        for (int i = 0; i < nodeList.getLength(); i++) {
            Node childNode = nodeList.item(i);
            if (childNode.getNodeType() == Node.ELEMENT_NODE) {
                return lookupNamespaceURI(childNode, prefix);
            }
        }
        return null;
    }

    public String getPrefix(String namespaceURI) {
        throw new UnsupportedOperationException();
    }

    @SuppressWarnings("rawtypes")
    public Iterator getPrefixes(String namespaceURI) {
        throw new UnsupportedOperationException();
    }
}

Then, you could query xml node using xpath like this:

public static void main(String[] args) throws Exception {
    String text = "<root xmlns=\"https://abc.xyz\" xmlns:name1=\"https://abc.xyz\" xmlns:name2=\"https:abc.xyz\">\n" +
            "    <name1:a>\n" +
            "        <name2:b>this is a test</name2:b>\n" +
            "    </name1:a>\n" +
            "</root>";
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setNamespaceAware(true);
    DocumentBuilder builder = factory.newDocumentBuilder();
    text = StringUtils.trim(text);
    InputStream inputStream = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
    Document document = builder.parse(inputStream);


    NamespaceResolver namespaceResolver = new NamespaceResolver(document);
    XPath xPath = XPathFactory.newInstance().newXPath();
    xPath.setNamespaceContext(namespaceResolver);


    Node node = (Node) xPath.compile("/:root/name1:a/name2:b").evaluate(document, XPathConstants.NODE);
    System.out.println(node.getTextContent());
}

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.