Question

Here is the main part of my code to Serialize using Protobuf-net. I have a very large number of records that I loop through and write to file.

I now want to make all records a FIXED SIZE so that later in Deserialization I can skip several records at once.

How to I modify this code to write FIXED LENGTH records?

       List<SP> SortedData = Data.OrderBy(o => o.DT).ToList();

        string LastdatFileName = "";
        FileStream outBin = null;

        foreach (var d in SortedData)
        {
            string binFileName = "n" + symbol + d.DT.ToString("yyyyMMdd") + ".dat";

            if (!datFileName.Equals(LastdatFileName))
            {
                if (outBin != null)
                {
                    outBin.Close();
                }

                outBin = File.Create(dbDirectory + @"\" + binFileName, 2048, FileOptions.None);
                LastdatFileName = datFileName;
            }

            Serializer.SerializeWithLengthPrefix(outBin, d.ToTickRecord(),PrefixStyle.Base128);

        }

        outBin.Close();

The record

  [ProtoContract]
    public class TickRecord
    {
        [ProtoMember(1)]
        public DateTime DT;
        [ProtoMember(2)]
        public double BidPrice;
        [ProtoMember(3)]
        public double AskPrice;
        [ProtoMember(4)]
        public int BidSize;
        [ProtoMember(5)]
        public int AskSize;

        public TickRecord(DateTime DT, double BidPrice, double AskPrice, int BidSize, int AskSize)
        {
            this.DT = DT;
            this.BidPrice = BidPrice;
            this.AskPrice = AskPrice;
            this.BidSize = BidSize;
            this.AskSize = AskSize;

        }
}

Deserialize

             long skipRate = 10;


                    while ((tr = Serializer.DeserializeWithLengthPrefix<TickRecord>(fs, PrefixStyle.Base128)) != null) //fs.Length > fs.Position)
                    {

                        count++;

                        fs.Position += (38 * skipRate);
                        if (fs.Position > fs.Length)
                            break;

                        //Console.WriteLine("> " + tr.ToString());

                    }

SSCCE for Marc Gravell

You will need to create 2 buttons Serialize and Deserialize.

Serialize creates a dummy data file.

Deserialize reads through it.

Comment out the fs.Position line to see raw read through of entire file. Takes 12 secs on my machine. Then uncomment it and file will skip 10 records each time. Was hoping for a factor 10 improvement in speed, BUT takes 8 secs on my machine. So I assume change fs.Position is expensive.

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using ProtoBuf;
using System.IO;
using System.Diagnostics;

namespace BinTest3
{


    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        private void Serialize_Click(object sender, EventArgs e)
        {

            FileStream outBin = null;

            string binFileName = @"C:\binfile.dft";
            outBin = File.Create(binFileName, 2048, FileOptions.None);

            DateTime d = DateTime.Now;

            TickRecord tr = new TickRecord(d, 1.02, 1.03,200,300);

            for (int i =0; i < 20000000; i++)
            {
                tr.BidPrice += 1;
                Serializer.SerializeWithLengthPrefix(outBin, tr, PrefixStyle.Base128);
            }

            outBin.Close();
            label1.Text = "Done ";
        }

        private void Deserialize_Click(object sender, EventArgs e)
        {
            Stopwatch sw = new Stopwatch();
            sw.Start();

            FileStream fs;
            string binFileName = @"C:\binfile.dft";

            fs = new FileStream(binFileName, FileMode.Open, FileAccess.Read, FileShare.Read, 4 * 4096);
            long skipRate =10;
            int count = 0;
            TickRecord tr;

            long skip = (38*skipRate);
            try
            {
                while ((tr = Serializer.DeserializeWithLengthPrefix<TickRecord>(fs, PrefixStyle.Base128)) != null) //fs.Length > fs.Position)
                {
                    count++;

                    fs.Position += skip;  //Comment out this line to see raw speed

                }
            }
            catch (Exception)
            {

            }

            fs.Close();

            sw.Stop();
            label1.Text = "Time taken: " + sw.Elapsed + " Count: " + count.ToString("n0");

        }
    }


    [ProtoContract]
    public class TickRecord
    {

        [ProtoMember(1, DataFormat = DataFormat.FixedSize)]
        public DateTime DT;
        [ProtoMember(2)]
        public double BidPrice;
        [ProtoMember(3)]
        public double AskPrice;
        [ProtoMember(4, DataFormat = DataFormat.FixedSize)]
        public int BidSize;
        [ProtoMember(5, DataFormat = DataFormat.FixedSize)]
        public int AskSize;

        public TickRecord()
        {

        }

        public TickRecord(DateTime DT, double BidPrice, double AskPrice, int BidSize, int AskSize)
        {
            this.DT = DT;
            this.BidPrice = BidPrice;
            this.AskPrice = AskPrice;
            this.BidSize = BidSize;
            this.AskSize = AskSize;

        }



    }
}
Was it helpful?

Solution

After a quick look at the documentation, I think you want something like:

[ProtoMember(1, DataFormat = DataFormat.FixedSize)]
public DateTime DT;
[ProtoMember(2,)]
public double BidPrice;
[ProtoMember(3)]
public double AskPrice;
[ProtoMember(4, DataFormat = DataFormat.FixedSize)]
public int BidSize;
[ProtoMember(5, DataFormat = DataFormat.FixedSize)]
public int AskSize;

That should be fine for the numeric values - I'm not sure whether the DataFormat attribute will work for the DateTime field. An alternative would be to have a long Ticks which is serialized with a FixedSize data format, and then a property which converted to/from DateTime. Looking at the code though, I think it will be okay as written above. There's no need to specify the data format for double as that's always written as a fixed-size value anyway.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top