Parsing XML with multiple namespaces with xPath in Java

Question

I am trying to parse a XML document that has two xmlns namespaces and all my xPath queries are returning null.

I want to parse the values of category nodes and create an array but because the document has two namespaces, no matter what xpath expression I use, it always returns null.

If I remove one namespace then it works fine. I have looked up other answers but couldn't find something that works so posting this as a new question.

Here's what I have tried so far. I am using this article as a reference.

Thanks for your help in advance.

import java.io.FileInputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;

import org.xml.sax.InputSource;

import org.w3c.dom.Document;
import org.w3c.dom.NodeList;


class Main
{
    public static void main(String[] args) throws Exception
    {

        //Parse XML file
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse(new InputSource(new StringReader("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
                "<newsItem guid=\"urn:newsml:news.com.au:20210401\" version=\"1\"\n" +
                "  standard=\"NewsML-G2\" standardversion=\"2.9\"\n" +
                "  xmlns=\"http://iptc.org/std/nar/2006-10-01/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n" +
                "  <catalogRef href=\"http://www.iptc.org/std/catalog/catalog.IPTC-G2-Standards_16.xml\"/>\n" +
                "  <itemMeta>\n" +
                "    <itemClass qcode=\"ninat:video\"/>\n" +
                "    <provider>\n" +
                "      <name>FoxSports</name>\n" +
                "    </provider>\n" +
                "    <versionCreated>2021-04-01T16:10:15.736+11:00</versionCreated>\n" +
                "    <event>create</event>\n" +
                "  </itemMeta>\n" +
                "  <contentMeta>\n" +
                "    <FWID>0</FWID>\n" +
                "    <originalId>799186</originalId>\n" +
                "    <contentCreated>2021-04-01T16:10:15.736+11:00</contentCreated>\n" +
                "    <expiration>2021-05-01T15:00:43.057+10:00</expiration>\n" +
                "    <slugline>Test - Video Name</slugline>\n" +
                "    <headline>Test - video headline</headline>\n" +
                "    <description>Test AFL: David King breaks down his new theory surrounding Dimma and the Tigers. </description>\n" +
                "    <category>\n" +
                "      <id>208</id>\n" +
                "      <name>AFL</name>\n" +
                "      <category>\n" +
                "        <id>320</id>\n" +
                "        <name>AFL 360</name>\n" +
                "      </category>\n" +
                "    </category>\n" +
                "    <collections>\n" +
                "      <collection>\n" +
                "        <id>138</id>\n" +
                "        <name>alexa</name>\n" +
                "      </collection>\n" +
                "    </collections>\n" +
                "    <isPremiumPay>false</isPremiumPay>\n" +
                "    <geoblock>false</geoblock>\n" +
                "  </contentMeta>\n" +
                "  <contentSet>\n" +
                "    <remoteContent id=\"web\"\n" +
                "      href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09.jpg\" version=\"1\"\n" +
                "      rendition=\"rnd:web\" size=\"44848\" contenttype=\"image/jpeg\"\n" +
                "      width=\"640\" height=\"360\" colourspace=\"colsp:sRGB\" orientation=\"1\" resolution=\"96\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_564.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"512\" height=\"288\" duration=\"121\"\n" +
                "      audiobitrate=\"64000\" videoavgbitrate=\"500000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_248.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"512\" height=\"288\" duration=\"121\"\n" +
                "      audiobitrate=\"48000\" videoavgbitrate=\"200000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_1596.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"800\" height=\"450\" duration=\"121\"\n" +
                "      audiobitrate=\"96000\" videoavgbitrate=\"1500000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_2628.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"1280\" height=\"720\" duration=\"121\"\n" +
                "      audiobitrate=\"128000\" videoavgbitrate=\"2500000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_1096.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"640\" height=\"360\" duration=\"121\"\n" +
                "      audiobitrate=\"96000\" videoavgbitrate=\"1000000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_896.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"640\" height=\"360\" duration=\"121\"\n" +
                "      audiobitrate=\"96000\" videoavgbitrate=\"800000\" videoaspectratio=\"16:9\"/>\n" +
                "  </contentSet>\n" +
                "</newsItem>\n")));

        //Get XPath expression
        XPathFactory xpathfactory = XPathFactory.newInstance();
        XPath xpath = xpathfactory.newXPath();
        xpath.setNamespaceContext(new NamespaceResolver(doc));
        XPathExpression expr = xpath.compile("/newsItem/itemMeta");

        //Search XPath expression
        Object result = expr.evaluate(doc, XPathConstants.NODESET);

        //Iterate over results and fetch book names
        NodeList nodes = (NodeList) result;
        for (int i = 0; i < nodes.getLength(); i++) {
            System.out.println(nodes.item(i).getNodeValue());
        }
        
    }
}
class NamespaceResolver implements NamespaceContext
{
    //Store the source document to search the namespaces
    private Document sourceDocument;

    public NamespaceResolver(Document document) {
        sourceDocument = document;
    }

    //The lookup for the namespace uris is delegated to the stored document.
    public String getNamespaceURI(String prefix) {
        if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) {
            return sourceDocument.lookupNamespaceURI(null);
        }  else {
            return sourceDocument.lookupNamespaceURI(prefix);
        }
    }

    public String getPrefix(String namespaceURI) {
        return sourceDocument.lookupPrefix(namespaceURI);
    }

    @SuppressWarnings("rawtypes")
    public Iterator getPrefixes(String namespaceURI) {
        return null;
    }
}```

Mads Hansen · Accepted Answer · 2021-07-23 14:11:41Z

2

Your XML elements are bound to the namespace http://iptc.org/std/nar/2006-10-01/, but your XPath is not using any namespace-prefixes, so /newsItem/itemMeta is asking for elements that are bound to no namespace.

You could address them by just the local-name():

/*[local-name()='newsItem']/*[local-name()='itemMeta']

Otherwise, you need to register the namespace with a namespace prefix, or use a custom NamespaceContext to resolve the namespace from your chosen namespace-prefix:

xpath.setNamespaceContext(new NamespaceContext() {
    public String getNamespaceURI(String prefix) {
      switch (prefix) {
        case "i": return "http://iptc.org/std/nar/2006-10-01/";
        // ...
       }
    });

and then use that namespace-prefix in your XPath:

/i:newsItem/i:itemMeta

answered Jul 23, 2021 at 14:11

Mads Hansen

67.6k12 gold badges119 silver badges154 bronze badges

Sign up to request clarification or add additional context in comments.

Comments

luckyqiao · Accepted Answer · 2023-09-26 02:40:49Z

Maybe this is the simplest way to query xml with namespace.

First, create a namespace context

public static class NamespaceResolver implements NamespaceContext {
    //Store the source document to search the namespaces
    private final Node sourceNode;

    public NamespaceResolver(Node node) {
        sourceNode = node;
    }

    //The lookup for the namespace uris is delegated to the stored document.
    public String getNamespaceURI(String prefix) {
        if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) {
            return sourceNode.lookupNamespaceURI(null);
        } else {
            return lookupNamespaceURI(sourceNode, prefix);
        }
    }

    private String lookupNamespaceURI(Node node, String prefix) {
        if (node.lookupNamespaceURI(prefix) != null) {
            return node.lookupNamespaceURI(prefix);
        }
        NodeList nodeList = node.getChildNodes();
        for (int i = 0; i < nodeList.getLength(); i++) {
            Node childNode = nodeList.item(i);
            if (childNode.getNodeType() == Node.ELEMENT_NODE) {
                return lookupNamespaceURI(childNode, prefix);
            }
        }
        return null;
    }

    public String getPrefix(String namespaceURI) {
        throw new UnsupportedOperationException();
    }

    @SuppressWarnings("rawtypes")
    public Iterator getPrefixes(String namespaceURI) {
        throw new UnsupportedOperationException();
    }
}

Then, you could query xml node using xpath like this:

public static void main(String[] args) throws Exception {
    String text = "<root xmlns=\"https://abc.xyz\" xmlns:name1=\"https://abc.xyz\" xmlns:name2=\"https:abc.xyz\">\n" +
            "    <name1:a>\n" +
            "        <name2:b>this is a test</name2:b>\n" +
            "    </name1:a>\n" +
            "</root>";
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setNamespaceAware(true);
    DocumentBuilder builder = factory.newDocumentBuilder();
    text = StringUtils.trim(text);
    InputStream inputStream = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
    Document document = builder.parse(inputStream);


    NamespaceResolver namespaceResolver = new NamespaceResolver(document);
    XPath xPath = XPathFactory.newInstance().newXPath();
    xPath.setNamespaceContext(namespaceResolver);


    Node node = (Node) xPath.compile("/:root/name1:a/name2:b").evaluate(document, XPathConstants.NODE);
    System.out.println(node.getTextContent());
}

Collectives™ on Stack Overflow

Parsing XML with multiple namespaces with xPath in Java

2 Answers 2

Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

2 Answers 2

Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related