SAXParser
Read UTF-8 XML File in Java using SAX parser example
In the previous SAX parser tutorial we saw how to parse and read a simple XML File. If your file had UTF-8 encoding, there is a chance that the client produced a MalformedByteSequenceException. In order to solve this you have to set the InputSource encoding to UTF-8.
You can do this with the following code :
InputStream inputStream= new FileInputStream(xmlFile);
InputStreamReader inputReader = new InputStreamReader(inputStream,"UTF-8");
InputSource inputSource = new InputSource(inputReader);
InputSource.setEncoding("UTF-8");
Here is the XML File we are going to use for our demo. We have the special UTF-8 character ©.
testFile.xml:
<?xml version="1.0" encoding="UTF-8" standalone="no"?><company>
<employee id="10">
<firstname>Jeremy</firstname>
<lastname>Harley</lastname>
<email>james@example.org</email>
<department>Human Resources</department>
<salary>2000000</salary>
<address>34 Stanley St.©</address>
</employee>
<employee id="2">
<firstname>John</firstname>
<lastname>May</lastname>
<email>john@example.org</email>
<department>Logistics</department>
<salary>400</salary>
<address>123 Stanley St.</address>
</employee>
</company>MyHandler.java:
package com.javacodegeeks.java.core;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class MyHandler extends DefaultHandler {
boolean tagFname = false;
boolean tagLname = false;
boolean tagEmail = false;
boolean tagDep = false;
boolean tagSalary = false;
boolean tagAddress = false;
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if (attributes.getLength() > 0) {
String tag = "<" + qName;
for (int i = 0; i < attributes.getLength(); i++) {
tag += " " + attributes.getLocalName(i) + "="
+ attributes.getValue(i);
}
tag += ">";
System.out.println(tag);
} else {
System.out.println("<" + qName + ">");
}
if (qName.equalsIgnoreCase("firstname")) {
tagFname = true;
}
if (qName.equalsIgnoreCase("lastname")) {
tagLname = true;
}
if (qName.equalsIgnoreCase("email")) {
tagEmail = true;
}
if (qName.equalsIgnoreCase("department")) {
tagDep = true;
}
if (qName.equalsIgnoreCase("salary")) {
tagSalary = true;
}
if (qName.equalsIgnoreCase("address")) {
tagAddress = true;
}
}
public void characters(char ch[], int start, int length)
throws SAXException {
if (tagFname) {
System.out.println(new String(ch, start, length));
tagFname = false;
}
if (tagLname) {
System.out.println(new String(ch, start, length));
tagLname = false;
}
if (tagEmail) {
System.out.println(new String(ch, start, length));
tagEmail = false;
}
if (tagDep) {
System.out.println(new String(ch, start, length));
tagDep = false;
}
if (tagSalary) {
System.out.println(new String(ch, start, length));
tagSalary = false;
}
if (tagAddress) {
System.out.println(new String(ch, start, length));
tagAddress = false;
}
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
System.out.println("</" + qName + ">");
}
}ParseUTF8XMLFileWithSAX.java:
package com.javacodegeeks.java.core;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.InputSource;
public class ParseUTF8XMLFileWithSAX {
private static final String xmlFilePath = "C:\\Users\\nikos7\\Desktop\\filesForExamples\\testFile.xml";
public static void main(String argv[]) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
File xmlFile = new File(xmlFilePath);
InputStream inputStream= new FileInputStream(xmlFile);
InputStreamReader inputReader = new InputStreamReader(inputStream,"UTF-8");
InputSource inputSource = new InputSource(inputReader);
inputSource.setEncoding("UTF-8");
saxParser.parse(inputSource, new MyHandler());
} catch (Exception e) {
e.printStackTrace();
}
}
}Output:
<company>
<employee id=10>
<firstname>
Jeremy
</firstname>
<lastname>
Harley
</lastname>
<email>
james@example.org
</email>
<department>
Human Resources
</department>
<salary>
2000000
</salary>
<address>
34 Stanley St.©
</address>
</employee>
<employee id=2>
<firstname>
John
</firstname>
<lastname>
May
</lastname>
<email>
john@example.org
</email>
<department>
Logistics
</department>
<salary>
400
</salary>
<address>
123 Stanley St.
</address>
</employee>
</company>
This was an example on how to read UTF-8 XML File in Java using SAX parser.
