37

I want to implement openXml sdk 2.5 into my project. I do everything in this link

using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System.IO.Packaging;


static void Main(string[] args)
        {

            String fileName = @"C:\OPENXML\BigData.xlsx";
            // Comment one of the following lines to test the method separately.
            ReadExcelFileDOM(fileName);    // DOM
            //ReadExcelFileSAX(fileName);    // SAX
        }

        // The DOM approach.
        // Note that the code below works only for cells that contain numeric values.
        // 
        static void ReadExcelFileDOM(string fileName)
        {
            using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, false))
            {
                WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
                WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
                SheetData sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
                string text;

                int rowCount= sheetData.Elements<Row>().Count();

                foreach (Row r in sheetData.Elements<Row>())
                {
                    foreach (Cell c in r.Elements<Cell>())
                    {
                        text = c.CellValue.Text;
                        Console.Write(text + " ");
                    }
                }
                Console.WriteLine();
                Console.ReadKey();
            }
        }

But i am not getting any row. It hasn't entered loop. Note: I also set up openXml sdk 2.5 my computer

And I find below code this is work for numeric value.For string value it writes 0 1 2 ...

 private static void Main(string[] args)
            {
                var filePath = @"C:/OPENXML/BigData.xlsx";
                using (var document = SpreadsheetDocument.Open(filePath, false))
                {
                    var workbookPart = document.WorkbookPart;
                    var workbook = workbookPart.Workbook;

                    var sheets = workbook.Descendants<Sheet>();
                    foreach (var sheet in sheets)
                    {
                        var worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id);
                        var sharedStringPart = workbookPart.SharedStringTablePart;
                        //var values = sharedStringPart.SharedStringTable.Elements<SharedStringItem>().ToArray();

                        string text;
                        var rows = worksheetPart.Worksheet.Descendants<Row>();
                        foreach (var row in rows)
                        {
                            Console.WriteLine();
                            int count = row.Elements<Cell>().Count();

                            foreach (Cell c in row.Elements<Cell>())
                            {

                                text = c.CellValue.InnerText;

                                Console.Write(text + " ");

                            }
                        }
                    }
                }

                Console.ReadLine();
            }
1
  • 4
    The reason it writes 0, 1, 2... is because of the shared string table that Excel uses. See my answer below for a basic approach to accessing the actual text. Commented Apr 16, 2014 at 10:32

5 Answers 5

83

Your approach seemed to work ok for me - in that it did "enter the loop". Nevertheless you could also try something like the following:

void Main()
{
    string fileName = @"c:\path\to\my\file.xlsx";

    using (FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
    {
        using (SpreadsheetDocument doc = SpreadsheetDocument.Open(fs, false))
        {
            WorkbookPart workbookPart = doc.WorkbookPart;
            SharedStringTablePart sstpart = workbookPart.GetPartsOfType<SharedStringTablePart>().First();
            SharedStringTable sst = sstpart.SharedStringTable;
        
            WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
            Worksheet sheet = worksheetPart.Worksheet;
            
            var cells = sheet.Descendants<Cell>();
            var rows = sheet.Descendants<Row>();
        
            Console.WriteLine("Row count = {0}", rows.LongCount());
            Console.WriteLine("Cell count = {0}", cells.LongCount());
        
            // One way: go through each cell in the sheet
            foreach (Cell cell in cells)
            {
                if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))
                {
                    int ssid = int.Parse(cell.CellValue.Text);
                    string str = sst.ChildElements[ssid].InnerText;
                    Console.WriteLine("Shared string {0}: {1}", ssid, str);
                }
                else if (cell.CellValue != null)
                {
                    Console.WriteLine("Cell contents: {0}", cell.CellValue.Text);
                }
             }
        
             // Or... via each row
             foreach (Row row in rows)
             {
                 foreach (Cell c in row.Elements<Cell>())
                 {
                     if ((c.DataType != null) && (c.DataType == CellValues.SharedString))
                     {
                         int ssid = int.Parse(c.CellValue.Text);
                         string str = sst.ChildElements[ssid].InnerText;
                         Console.WriteLine("Shared string {0}: {1}", ssid, str);
                     }
                     else if (c.CellValue != null)
                     {
                         Console.WriteLine("Cell contents: {0}", c.CellValue.Text);
                     }
                 }
             }
         }
     }
 }

I used the filestream approach to open the workbook because this allows you to open it with shared access - so that you can have the workbook open in Excel at the same time. The Spreadsheet.Open(... method won't work if the workbook is open elsewhere.

Perhaps that is why your code didn't work.

Note, also, the use of the SharedStringTable to get the cell text where appropriate.

EDIT 2018-07-11:

Since this post is still getting votes I should also point out that in many cases it may be a lot easier to use ClosedXML to manipulate/read/edit your workbooks. The documentation examples are pretty user friendly and the coding is, in my limited experience, much more straight forward. Just be aware that it does not (yet) implement all the Excel functions (for example INDEX and MATCH) which may or may not be an issue. [Not that I would want to be trying to deal with INDEX and MATCH in OpenXML anyway.]

Sign up to request clarification or add additional context in comments.

12 Comments

I think your code absolutely work but When i copy above code to my project it doesn't work and i don't find why it doesn't work.
Then you need to debug it or give a little more information than "it doesn't work". eg at what point does it fail, does it produce an exception, what are the row and cell counts. Also, for example, some Assert statements - like Assert(workbookPart != null); Assert(worksheetPart != null); Assert(sheet != null); etc etc.
it creates object about workbookPart worksheetPart. But when i want to check ResultViews i see it Empty = "Enumeration yielded no results"
I change openxml version v2.0.50727 and Windowsbase v4.0.30319. could it wrong thing about version?
I mentioned it in the answer - most of the docs online suggest using the SpreadsheetDocument.Open(... approach and this works fine - but will only work if the workbook is not open elsewhere. For what I was doing I needed to be able to have the workbook open in Excel and still read it with OpenXML. The filestream approach allowed me to specify the file open and sharing modes. Obviously this would not be a good idea if trying to write to the workbook but I wanted read-only access via OpenXML. I doubt it's any faster as I presume OpenXML uses a filestream beneath the covers anyway.
|
20

I had the same issue as the OP, and the answer above did not work for me.

I think this is the issue: when you create a document in Excel (not programmatically), you have 3 sheets by default and the WorksheetParts that has the row data for Sheet1 is the last WorksheetParts element, not the first.

I figured this out by putting a watch for document.WorkbookPart.WorksheetParts in Visual Studio, expanding Results, then looking at all of the sub elements until I found a SheetData object where HasChildren = true.

Try this:

// open the document read-only
SpreadsheetDocument document = SpreadsheetDocument.Open(filePath, false);
SharedStringTable sharedStringTable = document.WorkbookPart.SharedStringTablePart.SharedStringTable;
string cellValue = null;

foreach (WorksheetPart worksheetPart in document.WorkbookPart.WorksheetParts)
{
    foreach (SheetData sheetData in worksheetPart.Worksheet.Elements<SheetData>())
    {
        if (sheetData.HasChildren)
        {
            foreach (Row row in sheetData.Elements<Row>())
            {
                foreach (Cell cell in row.Elements<Cell>())
                {
                    cellValue = cell.InnerText;

                    if (cell.DataType != null && cell.DataType == CellValues.SharedString)
                    {
                        Console.WriteLine("cell val: " + sharedStringTable.ElementAt(Int32.Parse(cellValue)).InnerText);
                    }
                    else
                    {
                        Console.WriteLine("cell val: " + cellValue);
                    }
                }
                Console.WriteLine("---");
            }
        }
    }
}
document.Close();

Comments

11

Read Large Excel : openxml has two approaches of DOM and SAX to read an excel. the DOM one consume more RAM resource since it loads the whole xml content(Excel file) in Memory but its strong typed approach. SAX in other hand is event base parse. more here

so if you are facing large excel file its better to use SAX.

the below code sample uses SAX approach and also handle two important scenario in excel file reading.

  1. open xml skips the empty cells so your dataset faces displacement and wrong index.
  2. you need to skip the empty rows also.

this function returns the exact actual index of the cell at the time and handle the first scenario. from here

private static int CellReferenceToIndex(Cell cell)
{
    int index = 0;
    string reference = cell.CellReference.ToString().ToUpper();
    foreach (char ch in reference)
    {
         if (Char.IsLetter(ch))
         {
              int value = (int)ch - (int)'A';
              index = (index == 0) ? value : ((index + 1) * 26) + value;
         }
         else
            return index;
    }
    return index;
}

code to read excel sax approach.

//i want to import excel to data table
dt = new DataTable();

using (SpreadsheetDocument document = SpreadsheetDocument.Open(path, false))
{
    WorkbookPart workbookPart = document.WorkbookPart;
    WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();

    OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);

    //row counter
    int rcnt = 0;

    while (reader.Read())
    {
        //find xml row element type 
        //to understand the element type you can change your excel file eg : test.xlsx to test.zip
        //and inside that you may observe the elements in xl/worksheets/sheet.xml
        //that helps to understand openxml better
        if (reader.ElementType == typeof(Row))
        {
            //create data table row type to be populated by cells of this row
            DataRow tempRow = dt.NewRow();

            //***** HANDLE THE SECOND SENARIO*****
            //if row has attribute means it is not a empty row
            if (reader.HasAttributes)
            { 
                //read the child of row element which is cells

                //here first element
                reader.ReadFirstChild();

                do
                {
                    //find xml cell element type 
                    if (reader.ElementType == typeof(Cell))
                    {
                        Cell c = (Cell)reader.LoadCurrentElement();
                        string cellValue;
                        int actualCellIndex = CellReferenceToIndex(c);

                        if (c.DataType != null && c.DataType == CellValues.SharedString)
                        {
                            SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));

                            cellValue = ssi.Text.Text;
                        }
                        else
                        {
                            cellValue = c.CellValue.InnerText;
                        }

                        //if row index is 0 its header so columns headers are added & also can do some headers check incase
                        if (rcnt == 0)
                        {
                            dt.Columns.Add(cellValue);
                        }
                        else
                        {
                            // instead of tempRow[c.CellReference] = cellValue;
                            tempRow[actualCellIndex] = cellValue;
                        }
                    }
                }
                while (reader.ReadNextSibling());

                //if its not the header row so append rowdata to the datatable
                if (rcnt != 0)
                {
                    dt.Rows.Add(tempRow);
                }

                rcnt++;
            }
        }
    }
}

Comments

2

One can really do this much easier using some Open XML wrapper like SlapKit or ClosedXML. Technically these two even share the same API since SlapKit is based on ClosedXML. So the code for both of these would look like this:

using var wb = new XLWorkbook("some-file.xlsx");
var ws = wb.Worksheet("sheet1");

foreach (var row in ws.Rows())
{
    foreach (var cell in row.Cells())
    {
        Console.Write("\t|\t" + cell.Value);
    }
    Console.WriteLine();
}

SlapKit documentation maybe explains this somewhat better.

Comments

0

Everything is explained in the accepted answer.
Here is just an extension method to solve the problem

public static string GetCellText(this Cell cell, in SharedStringTable sst)
{
    if (cell.CellValue is null)
        return string.Empty;

    if ((cell.DataType is not null) &&
        (cell.DataType == CellValues.SharedString))
    {
        int ssid = int.Parse(cell.CellValue.Text);
        return sst.ChildElements[ssid].InnerText;
    }

    return cell.CellValue.Text;
}

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.