0

I have the following XML string

  <Result>
    <Component>PEV   CR</Component>
    <ComponentText xml:lang="pt-BR" />
    <ComponentText xml:lang="en-US">PEV   CR</ComponentText>
    <Item>CR_PEVPARA_BON BAD!</Item>
    <ItemText xml:lang="pt-BR">CR_PEVPARA_BON RUIM!</ItemText>
    <ItemText xml:lang="en-US">CR_PEVPARA_BON BAD!</ItemText>
    <ResultType>State</ResultType>
    <ResultText xml:lang="pt-BR" />
    <ResultText xml:lang="en-US" />
    <ResultState>NotOk</ResultState>
    <Type_State>
      <ActualString>0</ActualString>
      <ReferenceString>1</ReferenceString>
    </Type_State>
  </Result>
  <Result>
    <Component>NAV</Component>
    <ComponentText xml:lang="pt-BR" />
    <ComponentText xml:lang="en-US">NAV</ComponentText>
    <Item>ECU NO RESPONSE</Item>
    <ItemText xml:lang="pt-BR">SEM RESPOSTA UCE</ItemText>
    <ItemText xml:lang="en-US">ECU NO RESPONSE</ItemText>
    <ResultType>Execution</ResultType>
    <ResultText xml:lang="pt-BR" />
    <ResultText xml:lang="en-US" />
    <ResultState>NotOk</ResultState>
    <Type_Execution />
    <DetailTags>
      <Nack00.NackTextToTicket>Sem comunicacao UCE (00)</Nack00.NackTextToTicket>
      <Nack00.NackTextToStatistic>Nack: 0x7F 0x?? 0x00</Nack00.NackTextToStatistic>
    </DetailTags>
  </Result>

And I wanted to access, for example, the text inside this tag

<ItemText xml:lang="pt-BR">SEM RESPOSTA UCE</ItemText>

Is there a library in Java that provides this functionality in a XPath fashion just like Selenium?

4

1 Answer 1

0

You can do this using jsoup, it pretty simple too. Please look at the comments in the code

Maven:

    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.13.1</version>
    </dependency>

Test Code

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class Test {

    public static void main(String[] args) throws IOException {

        String s = "<Result>\r\n" + 
                "    <Component>PEV   CR</Component>\r\n" + 
                "    <ComponentText xml:lang=\"pt-BR\" />\r\n" + 
                "    <ComponentText xml:lang=\"en-US\">PEV   CR</ComponentText>\r\n" + 
                "    <Item>CR_PEVPARA_BON BAD!</Item>\r\n" + 
                "    <ItemText xml:lang=\"pt-BR\">CR_PEVPARA_BON RUIM!</ItemText>\r\n" + 
                "    <ItemText xml:lang=\"en-US\">CR_PEVPARA_BON BAD!</ItemText>\r\n" + 
                "    <ResultType>State</ResultType>\r\n" + 
                "    <ResultText xml:lang=\"pt-BR\" />\r\n" + 
                "    <ResultText xml:lang=\"en-US\" />\r\n" + 
                "    <ResultState>NotOk</ResultState>\r\n" + 
                "    <Type_State>\r\n" + 
                "      <ActualString>0</ActualString>\r\n" + 
                "      <ReferenceString>1</ReferenceString>\r\n" + 
                "    </Type_State>\r\n" + 
                "</Result>\r\n" + 
                "<Result>\r\n" + 
                "<Component>NAV</Component>\r\n" + 
                "<ComponentText xml:lang=\"pt-BR\" />\r\n" + 
                "<ComponentText xml:lang=\"en-US\">NAV</ComponentText>\r\n" + 
                "<Item>ECU NO RESPONSE</Item>\r\n" + 
                "<ItemText xml:lang=\"pt-BR\">SEM RESPOSTA UCE</ItemText>\r\n" + 
                "<ItemText xml:lang=\"en-US\">ECU NO RESPONSE</ItemText>\r\n" + 
                "<ResultType>Execution</ResultType>\r\n" + 
                "<ResultText xml:lang=\"pt-BR\" />\r\n" + 
                "<ResultText xml:lang=\"en-US\" />\r\n" + 
                "<ResultState>NotOk</ResultState>\r\n" + 
                "<Type_Execution />\r\n" + 
                "<DetailTags>\r\n" + 
                "  <Nack00.NackTextToTicket>Sem comunicacao UCE (00)</Nack00.NackTextToTicket>\r\n" + 
                "  <Nack00.NackTextToStatistic>Nack: 0x7F 0x?? 0x00</Nack00.NackTextToStatistic>\r\n" + 
                "</DetailTags>\r\n" + 
                "</Result>";
        
        Document doc = Jsoup.parse(s);

        /*
            doc.select("ItemText") - this contains multiple "org.jsoup.nodes.Element" objects
         */
        System.out.println(doc.select("ItemText"));
        /* above line produces output:
         
                <itemtext xml:lang="pt-BR">
                 CR_PEVPARA_BON RUIM!
                </itemtext>
                <itemtext xml:lang="en-US">
                 CR_PEVPARA_BON BAD!
                </itemtext>
                <itemtext xml:lang="pt-BR">
                 SEM RESPOSTA UCE
                </itemtext>
                <itemtext xml:lang="en-US">
                 ECU NO RESPONSE
                </itemtext>
         */
        
        
        /*
            doc.select("ItemText").get(0) - this contains 1st element of ItemText
            But it prints whole of the element, Not just text
        */
        System.out.println(doc.select("ItemText").get(0));
        /* above line produces output:
         
                <itemtext xml:lang="pt-BR">
                 CR_PEVPARA_BON RUIM!
                </itemtext>
         */
        
        
        /*
            doc.select("ItemText").get(0).text() - this contains 1st element of ItemText
            It prints just the text the element holds
        */
        System.out.println(doc.select("ItemText").get(0).text());
        /* above line produces output:
         
                CR_PEVPARA_BON RUIM!
         */

    }

}
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.