Question

I have to consume some xlsx documents. I've read Reading a date from xlsx using open xml sdk and http://www.dotnetperls.com/fromoadate. Most of my columns are texts (shared strings), but there are some numbers (integer numbers), and I have also some dates and date-times. I'm using OpenXML SDK 2.5.

My problem is that I don't know how to distinguish the actual numbers from the dates. Both of them has DataType of null, and the textual number representation is in the Text property of the cell.

Some code:

  using (var xlsxStream = assembly.GetManifestResourceStream("Checklist.xlsx"))
  using (var spreadsheetDocument = SpreadsheetDocument.Open(xlsxStream, false))
  {
    var workbookPart = spreadsheetDocument.WorkbookPart;
    var sharedStringTable = workbookPart.SharedStringTablePart.SharedStringTable;
    var worksheetPart = workbookPart.WorksheetParts.First();
    var sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
    string text;
    foreach (Row r in sheetData.Elements<Row>())
    {
      foreach (Cell c in r.Elements<Cell>())
      {
        if (c.CellValue != null)
        {
          text = c.CellValue.Text;
          if (c.DataType != null)
          {
            if (c.DataType.Value == CellValues.SharedString)
            {
              int tableIndex = int.Parse(text);
              text = sharedStringTable.ChildElements[tableIndex].InnerText;
            }
            // note: the date cells do not have c.DataType.Value == CellValues.Date
            // Their c.DataType is null, if they are OLE Automation date numbers
          }
          // So here I am, and I'd need to know if the number supposed to be an
          // OLE Automation date or a number, so I can transform it if needed.
          //if (it's a date) // <- ?????
          //{
          //    double dateDouble = double.Parse(text);
          //    DateTime dateTime = DateTime.FromOADate(dateDouble);
          //    text = dateTime.ToShortDateString();
          //}
          Console.Write(text + " ");
        }
        else
        {
          Console.Write("NULL" + " ");
        }
      }
      Console.WriteLine();
    }
    Console.WriteLine();
    Console.ReadKey();
Was it helpful?

Solution

I just came across similar issue and it is not easy to check whether a cell contains date/time value, see Using cell format to determine a cell contains date/time value, but the issue doesn't end with built-in number formats, I needed to handle custom formats too. There are no utilities in OpenXML SDK 2.5 to help, so I had to write my own (doesn't support Thai date/time formats).

public class ExcelHelper
{
    static uint[] builtInDateTimeNumberFormatIDs = new uint[] { 14, 15, 16, 17, 18, 19, 20, 21, 22, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58 };
    static Dictionary<uint, NumberingFormat> builtInDateTimeNumberFormats = builtInDateTimeNumberFormatIDs.ToDictionary(id => id, id => new NumberingFormat { NumberFormatId = id });
    static Regex dateTimeFormatRegex = new Regex(@"((?=([^[]*\[[^[\]]*\])*([^[]*[ymdhs]+[^\]]*))|.*\[(h|mm|ss)\].*)", RegexOptions.Compiled);

    public static Dictionary<uint, NumberingFormat> GetDateTimeCellFormats(WorkbookPart workbookPart)
    {
        var dateNumberFormats = workbookPart.WorkbookStylesPart.Stylesheet.NumberingFormats
            .Descendants<NumberingFormat>()
            .Where(nf => dateTimeFormatRegex.Match(nf.FormatCode.Value).Success)
            .ToDictionary(nf => nf.NumberFormatId.Value);

        var cellFormats = workbookPart.WorkbookStylesPart.Stylesheet.CellFormats
            .Descendants<CellFormat>();

        var dateCellFormats = new Dictionary<uint, NumberingFormat>();
        uint styleIndex = 0;
        foreach (var cellFormat in cellFormats)
        {
            if (cellFormat.ApplyNumberFormat != null && cellFormat.ApplyNumberFormat.Value)
            {
                if (dateNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                {
                    dateCellFormats.Add(styleIndex, dateNumberFormats[cellFormat.NumberFormatId.Value]);
                }
                else if (builtInDateTimeNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                {
                    dateCellFormats.Add(styleIndex, builtInDateTimeNumberFormats[cellFormat.NumberFormatId.Value]);
                }
            }

            styleIndex++;
        }

        return dateCellFormats;
    }

    // Usage Example
    public static bool IsDateTimeCell(WorkbookPart workbookPart, Cell cell)
    {
        if (cell.StyleIndex == null)
            return false;

        var dateTimeCellFormats = ExcelHelper.GetDateTimeCellFormats(workbookPart);

        return dateTimeCellFormats.ContainsKey(cell.StyleIndex);
    }
}

OTHER TIPS

This answer is a version of the above accepted answer to make it work for a failing scenario. It fails in a scenario where your cell is of Currency type with the built-in format applied - &quot;$&quot;#,##0_);[Red]\(&quot;$&quot;#,##0\)

The regex ((?=([^[]*\[[^[\]]*\])*([^[]*[ymdhs]+[^\]]*))|.*\[(h|mm|ss)\].*) (in the accepted answer) parses the above mentioned built-in format as well, which in turn makes it a datetime cell & the value returned is date time value, instead of a currency value.

I have modified the GetDateTimeCellFormats method little bit, so that the regex doesn't interfere with any of built-in formats not belonging to date/time/datetime.

        var dateCellFormats = new Dictionary<uint, NumberingFormat>();
        uint styleIndex = 0;
        foreach (var cellFormat in cellFormatList)
        {
            if (cellFormat.ApplyNumberFormat == null || !cellFormat.ApplyNumberFormat.Value)
            {
                styleIndex++;
                continue;
            }

            var numFmtId = cellFormat.NumberFormatId.Value;
            if (numFmtId < 164)
            {
                if (builtInDateTimeNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                    dateCellFormats.Add(styleIndex, builtInDateTimeNumberFormats[cellFormat.NumberFormatId.Value]);
            }
            else
            {
                if (dateNumberFormatsDict.ContainsKey(cellFormat.NumberFormatId.Value))
                    dateCellFormats.Add(styleIndex, dateNumberFormatsDict[cellFormat.NumberFormatId.Value]);
            }
            styleIndex++;
        }

Rest all code remains same.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top