1

Description

I have a string representing an XML document, that I want to parse and create a collection of objects of the following types:

public class Invoice
{
    public const string XmlName = "Invoice";

    public int id { get; set; }
    public string title { get; set; }
    public DateTime timestamp { get; set; }
    public bool paid { get; set; }
    public IList<InvoiceItem> items { get; set; }

    public Invoice() { this.items = List<InvoiceItem>(); }

    public double getTotal()
    {
        if (items == null)
            return 0;
        
        double total = 0;

        foreach (InvoiceItem item in this.items)
            total += item.amount;

        return total;
    }
}

public class InvoiceItem
{
    public const string XmlName = "InvoiceItem";

    public string description { get; set; }
    public double amount { get; set;}
}

The XML used for testing purposes is the following. I have not yet created a schema, but essentially, it is a list of Invoices, each containing zero or more InvoiceItems:

<?xml version="1.0" encoding="UTF-8"?>
<invoices>
    <invoice id="1", title="Aug 2020", timestamp="16/08/2020 09:01:29 AM", paid="true">
        <item desc="item 1 in invoice 1", amount="50"/>
        <item desc="item 2 in invoice 1", amount="50"/>
    </invoice>
    <invoice id="2", title="Sep 2020", timestamp="16/09/2020 09:01:29 AM", paid="false">
        <item desc="item in invoice 2", amount="100"/>
    </invoice>
</invoices>

Code

The following is the method used to read the XML and generate the desired output:

public IEnumerable<Invoice> readXML()
{
    XmlReaderSettings xmlReaderSettings = new XmlReaderSettings();
    xmlReaderSettings.IgnoreWhitespace = true;
    xmlReaderSettings.IgnoreComments = true;
    xmlReaderSettings.IgnoreProcessingInstructions = true;

    using (XmlReader reader = XmlReader.Create(new System.IO.StringReader(this._fileHandler.getXML()), xmlReaderSettings))
    {
        reader.MoveToContent(); // skip over the XML declaration, should move to the invoices start tag

        reader.ReadStartElement("invoices"); // move to the next element, should be start tag for an invoice
        while (reader.NodeType == XmlNodeType.Element)
        {
            if (reader.Name == Invoice.XmlName)
            {
                // hit an invoice start tag

                // read it
                Invoice invoice = new Invoice();

                if (reader.MoveToAttribute("id"))
                    invoice.id = reader.ReadContentAsInt();

                if (reader.MoveToAttribute("title"))
                    invoice.title = reader.ReadContentAsString();

                if (reader.MoveToAttribute("timestamp"))
                    invoice.timestamp = DateTime.Parse(reader.ReadContentAsString());

                if (reader.MoveToAttribute("paid"))
                    invoice.paid = reader.ReadContentAsBoolean();

                reader.ReadStartElement("items"); // move to next element, should be items start tag
                    while (reader.NodeType == XmlNodeType.Element)
                    {
                        reader.Read();

                        InvoiceItem invoiceItem = new InvoiceItem();

                        if (reader.Name == InvoiceItem.XmlName)
                        {

                            if (reader.MoveToAttribute("desc"))
                                invoiceItem.description = reader.ReadContentAsString();

                            if (reader.MoveToAttribute("amount"))
                                invoiceItem.amount = reader.ReadContentAsDouble();

                            invoice.items.Add(invoiceItem);
                        }
                        else
                        {
                            throw new XmlException("Unexpected XML node: " + reader.Name);
                        }
                    }

                    yield return invoice;
                    }

                else
                {
                    throw new XmlException("Unexpected XML node: " + reader.Name);
                }
            }
       }
}

In a test, it yields an exception:

System.Xml.XmlException : Name cannot begin with the ',' character, hexadecimal value 0x2C. Line 3, position 17.

What is causing this? Some guidance about how to properly parse this XML into the desired list of objects would be helpful.

2 Answers 2

0

This is not a valid xml string. The ',' character is invalid. Try with the following.

In xml, the attributes are separated by whitespace and that's what the validator is warning you about. The line and character are of the first comma occurance.

<?xml version="1.0" encoding="UTF-8"?>
<invoices>
    <invoice id="1" title="Aug 2020" timestamp="16/08/2020 09:01:29 AM" paid="true">
        <item desc="item 1 in invoice 1" amount="50"/>
        <item desc="item 2 in invoice 1" amount="50"/>
    </invoice>
    <invoice id="2" title="Sep 2020" timestamp="16/09/2020 09:01:29 AM" paid="false">
</invoices>

You need to also change the following line with lowercase:

public const string XmlName = "invoice";
Sign up to request clarification or add additional context in comments.

1 Comment

Yes, I added the commas to the tags by mistake. I fixed it, but now it yields: System.Xml.XmlException : Unexpected XML node: invoice
0

Your xml contains the invoice and item nodes. Therefore, you need to change the value of the XmlName constants. Note: why are they made at all?

I changed the naming case of properties and methods to match the generally accepted style.

Also I replaced double with decimal - don't count money in real numbers.

public class Invoice
{
    public const string XmlName = "invoice";

    public int Id { get; set; }
    public string Title { get; set; }
    public DateTime Timestamp { get; set; }
    public bool Paid { get; set; }
    public IList<InvoiceItem> Items { get; }

    public Invoice() { Items = new List<InvoiceItem>(); }

    public decimal GetTotal()
    {
        if (Items == null)
            return 0;

        decimal total = 0;

        foreach (InvoiceItem item in Items)
            total += item.Amount;

        return total;
    }
}

public class InvoiceItem
{
    public const string XmlName = "item";

    public string Description { get; set; }
    public decimal Amount { get; set; }
}

The XmlReader class contains many convenient methods. In this case, we need ReadToFollowing and ReadToNextSibling.

Here is the parsing code

public IEnumerable<Invoice> readXML()
{
    XmlReaderSettings xmlReaderSettings = new XmlReaderSettings();
    xmlReaderSettings.IgnoreWhitespace = true;
    xmlReaderSettings.IgnoreComments = true;
    xmlReaderSettings.IgnoreProcessingInstructions = true;

    using (var reader = XmlReader.Create(new StringReader(...), xmlReaderSettings))
    {
        while (reader.ReadToFollowing(Invoice.XmlName))
        {
            Invoice invoice = new Invoice();

            if (reader.MoveToAttribute("id"))
                invoice.Id = reader.ReadContentAsInt();

            if (reader.MoveToAttribute("title"))
                invoice.Title = reader.ReadContentAsString();

            if (reader.MoveToAttribute("timestamp"))
                invoice.Timestamp = DateTime.Parse(reader.ReadContentAsString());

            if (reader.MoveToAttribute("paid"))
                invoice.Paid = reader.ReadContentAsBoolean();

            if (reader.ReadToFollowing(InvoiceItem.XmlName))
            {
                do
                {
                    InvoiceItem invoiceItem = new InvoiceItem();

                    if (reader.MoveToAttribute("desc"))
                        invoiceItem.Description = reader.ReadContentAsString();

                    if (reader.MoveToAttribute("amount"))
                        invoiceItem.Amount = reader.ReadContentAsDecimal();

                    invoice.Items.Add(invoiceItem);

                } while (reader.ReadToNextSibling(InvoiceItem.XmlName));
            }

            yield return invoice;
        }
    }
}

Important!

Be sure to specify FormatProvider in the DateTime.Parse method. Because it can work differently on computers with different settings.

Or use the DateTime.ParseExact method with the exact format specified.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.