Question

I'm currently in the process of converting a number of Access databases to Xml files. I have done this before and I still have the code from a previous project. However, this code will not let me structure the xml as I please which is what I need to do this time around. I'm using XDocument with for-loops to achieve this but it gets incredibly slow after a couple of 1000 rows of data.

Reading in on how XDocument works tells me that XElement.Add actually copies the entire xml-code and adds the new element as it pastes everything back into the file. If this is true then that's probably where the problem lies.

This is the part that reads and writes the data from Access to Xml, take a look and see if there's any way of saving it. Converting a database with 27 columns and 12 256 rows takes almost 30 minutes while a smaller one with a mere 500 rows takes roughly 5 seconds.

private void ReadWrite(string file)
{
    using (_Connection = new OleDbConnection(string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Mode=12;Data Source={0}", pathAccess)))
    {
        _Connection.Open();
        //Gives me values from the AccessDB: tableName, columnName, colCount, rowCount and listOfTimeStamps.
        GetValues(pathAccess);

        XDocument doc = new XDocument(new XDeclaration("1.0", "utf-8", "true"), new XElement(tableName));
        for (int rowInt = 0; rowInt < rowCount; rowInt++)
        {
            XElement item = new XElement("Item", new XAttribute("Time", listOfTimestamps[rowInt].ToString().Replace(" ", "_")));
            doc.Root.Add(item);

            //colCount"-1" prevents the timestamp from beeing written again.
            for (int colInt = 0; colInt < colCount - 1; colInt++)
            {
                using (OleDbCommand cmnd = new OleDbCommand(string.Format("SELECT {0} FROM {1} Where TimeStamp = #{2}#", columnName[colInt] , tableName, listOfTimestamps[rowInt]), _Connection))
                {
                    XElement value = new XElement(columnName[colInt], cmnd.ExecuteScalar().ToString());
                    item.Add(value);
                }
            }
            //Updates progressbar
            backgroundWorker1.ReportProgress(rowInt);
        }
        backgroundWorker1.ReportProgress(0);
        doc.Save(file);
    }
}

This is this code from my old converter. This code is pretty unaffected by the size of the database, the 12 556 database only takes a second to convert. Could there possably be a way to merge these two?

public void ReadWrite2(string file)
{
    DataSet dataSet = new DataSet();
    using (_Connection = new OleDbConnection(string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Mode=12;Data Source={0}", file)))
    {
        _Connection.Open();

        DataTable schemaTable = _Connection.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, new object[] { null, null, null, "TABLE" });

        foreach (DataRow dataTableRow in schemaTable.Rows)
        {
            string tableName = dataTableRow["Table_Name"].ToString();

            DataTable dataTable = dataSet.Tables.Add(tableName);
            using (OleDbCommand readRows = new OleDbCommand("SELECT * from " + tableName, _Connection))
            {
                OleDbDataAdapter adapter = new OleDbDataAdapter(readRows);
                adapter.Fill(dataTable);
            }
        }
    }
    dataSet.WriteXml(file.Replace(".mdb", ".xml"));
}

EDIT: Just to clarify, the application slows down as it's executed. As in the first 500 takes 5 seconds no matter how big the database is.

UPDATE: Okay so I've come back after the weekend now and I made a small adjustment in the code to seperate the reading and the writing by filling a jagged array with the values in one loop and writing them in another. This has proven my theory wrong and it's infact the reading that takes so much time. Any ideas on how to fill an array with the values without hitting the database inside the loop?

UPDATE2: This is the end result after switching to a DataReader.Read()-loop and collecting all the data right away.

public void ReadWrite3(string Save, string Load)
    {
        using (_Connection = new OleDbConnection(string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Mode=12;Data Source={0}", Load)))
        {
            _Connection.Open();
            GetValues(_Connection);

            _Command = new OleDbCommand(String.Format("SELECT {0} FROM {1}", strColumns, tables), _Connection);
            XDocument doc = new XDocument(new XDeclaration("1.0", "utf-8", "true"), new XElement("plmslog", new XAttribute("machineid", root)));
            using (_DataReader = _Command.ExecuteReader())
            {
                for (int rowInt = 0; _DataReader.Read(); rowInt++ )
                {
                    for (int logInt = 0; logInt < colCount; logInt++)
                    {

                        XElement log = new XElement("log");
                        doc.Root.Add(log);

                        elementValues = updateElementValues(rowInt, logInt);

                        for (int valInt = 0; valInt < elements.Length; valInt++)
                        {
                            XElement value = new XElement(elements[valInt], elementValues[valInt]);
                            log.Add(value);
                        }
                    }
                }
            }
            doc.Save(Save);
        }
    }
Was it helpful?

Solution

Forgive me, but I think you're making your life more complicated than it needs to be. If you use an OleDbDataReader object you can just open it and read through the Access table row-by-row without having to cache the row data in an array (since you already have it in the DataReader).

For example, I have some sample data

dbID    dbName  dbCreated
----    ------  ---------
bar     barDB   2013-04-08 14:19:27
foo     fooDB   2013-04-05 11:23:02

and the following code runs through the table...

static void Main(string[] args)
{
    OleDbConnection conn = new OleDbConnection(@"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=C:\Documents and Settings\Administrator\Desktop\Database1.accdb;");
    conn.Open();

    OleDbCommand cmd = new OleDbCommand("SELECT * FROM myTable", conn);
    OleDbDataReader rdr = cmd.ExecuteReader();

    int rowNumber = 0;
    while (rdr.Read())
    {
        rowNumber++;
        Console.WriteLine("Row " + rowNumber.ToString() + ":");
        for (int colIdx = 0; colIdx < rdr.FieldCount; colIdx++)
        {
            string colName = rdr.GetName(colIdx);
            Console.WriteLine("    rdr[\"" + colName + "\"]: " + rdr[colName].ToString());
        }
    }
    rdr.Close();
    conn.Close();

    Console.WriteLine("Done.");
}

...and produces the result...

Row 1:
    rdr["dbID"]: foo
    rdr["dbName"]: fooDB
    rdr["dbCreated"]: 2013-04-05 11:23:02
Row 2:
    rdr["dbID"]: bar
    rdr["dbName"]: barDB
    rdr["dbCreated"]: 2013-04-08 14:19:27
Done.

OTHER TIPS

You are hitting the database from inside a nested loop (all rows and columns)

 using (OleDbCommand cmnd = new OleDbCommand(string.Format("SELECT {0} FROM {1} 

You might be better off keeping the data in an array or collection and then accessing the database.

simple math calculation will tell you why. (amount of data)

27 * 12256 = 330912

27 * 500 = 13500

330912 / 13500 = 24512

so your big statement is 24512 times bigger!

(time wise)

30*60 = 1800

1800 / 5 = 360

so your time is 360 times bigger!

you can see that your code doesn't seem to be doing a bad thing.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top