Question

In a simple test, protoBuf v2 is much slower than BinaryFormatter, when the principal data is Dictionary<int, string>. protoBuf time is 931ms, and the stream size is 7,950,000. BinaryFormatter time is 52ms, and the stream size is 193,798. So it's a big difference.

Here's the full test code:

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Runtime.Serialization;
using System.Runtime.Serialization.Formatters.Binary;
using ProtoBuf;

namespace SerDeser
{
    class Program
    {
        static void Main(string[] args)
        {
            const int HOW_MANY = 10000;
            var data = new TestClass[HOW_MANY];
            var d1 = new Dictionary<int, string>();
             for (var i = 1; i <= 100; i++)
            {
                d1.Add(i, i.ToString());
            }
            for (var i = 0; i < HOW_MANY; i++)
            {
                data[i] = new TestClass() {InnerData = d1};
            }
            SerDeser(data, "BinaryFormatter", false);
            SerDeser(data, "ProtoBuf", true);
            Console.ReadLine();
        }

        private static void SerDeser(TestClass[] data, string label, bool useProtoBuf)
        {
            var timer = new Stopwatch();
            IFormatter formatter = null;
            using (var ms = new MemoryStream())
            {
                timer.Start();
                if (useProtoBuf)
                {
                    Serializer.Serialize<TestClass[]>(ms, data);
                }
                else
                {
                    formatter = new BinaryFormatter();
                    formatter.Serialize(ms, data);
                }
                TestClass[] clone = null;
                var serTime = timer.ElapsedMilliseconds;
                timer.Reset();
                timer.Start();
                ms.Position = 0;
                if (useProtoBuf)
                {
                    clone = Serializer.Deserialize<TestClass[]>(ms);
                }
                else
                {
                    clone = formatter.Deserialize(ms) as TestClass[];
                }
                timer.Stop();
                Console.WriteLine(string.Format("{0}: ser {1}ms; deser {2}ms; total {3}ms; length {4}", label, serTime, timer.ElapsedMilliseconds, serTime + timer.ElapsedMilliseconds, ms.Length));
                if (clone.Length != data.Length) Console.WriteLine("length error.");
                if (clone[1].InnerData.Count != data[1].InnerData.Count) Console.WriteLine("Content error 1");
                if (clone[1].InnerData[1] != data[1].InnerData[1]) Console.WriteLine("Content error 2");
            }
        }

    }

    [ProtoContract]
    [Serializable]
    public class TestClass
    {
        public TestClass()
        {
        }
        [ProtoMemberAttribute(1)] public Dictionary<int, string> InnerData;

    }

}

I create an array of 10,000 of these, and the InnerData property of each of them is a dictionary with 100 entries. My test serializes and deserializes the array.

For the protobuf test, I simply use Serializer.Serialize(ms) where ms is a MemoryStream. (and Serializer.DeSerialize to deserialize). I can post my test driver code if asked.

So my question is, under what conditions should I expect protoBuf to be slower than or faster than BinaryFormatter?

Was it helpful?

Solution

This is because you are repeating the exact same InnerData instance every time. BinaryFormatter always tracks references. protobuf-net can track references, but it does not do so by default, because the google specification has no notion of this.

However, I doubt that serializing the same instance lots of times is a useful test - it rarely represents real usage for either serializer.

To give a realistic test, generate a different InnerData instance per HOW_MANY, and also ensure that the strings are different string instances each time (again, BinaryFormatter will track references on strings too).

Or alternatively, enable reference-tracking in protobuf-net. Unfortunately, you can't easily do this for strings that are the immediate children of Dictionary<int,string>.

Here's my change:

static void Main(string[] args)
{
    const int HOW_MANY = 10000;
    var data = new TestClass[HOW_MANY];
    for (var i = 0; i < HOW_MANY; i++)
    {
        Dictionary<int, string> d1 = InventData();
        data[i] = new TestClass() { InnerData = d1 };
    }
    SerDeser(data, "BinaryFormatter", false);
    SerDeser(data, "ProtoBuf", true);
    Console.ReadLine();
}

private static Dictionary<int, string> InventData()
{
    var d1 = new Dictionary<int, string>();
    for (var i = 1; i <= 100; i++)
    {
        d1.Add(i, i.ToString());
    }

    return d1;
}

with results (I added thousand separators for easy comparison):

BinaryFormatter: ser 3,413ms; deser 30,833ms; total 34,246ms; length 23,821,447
ProtoBuf: ser 450ms; deser 591ms; total 1,041ms; length 7,950,000

I know which I'd prefer!

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top