Your biggest bottleneck is going to be IO, which has to be performed with exclusive access to the file. The actual byte-crunching for this will be fast - you are going to do just as well writing it directly to the file (noting that the FileStream
itself has a buffer, or you can add an extra layer with BufferedStream
) than you would by serializing different parts in-memory and then copying each in-memory part to the stream separately.
My advice: just write the data in a single thread. Frankly I'm not sure I'd even bother with async
(hint: async code adds overhead), especially if the buffer is keeping up. I also wouldn't use BiaryWriter
/ BinaryReader
- I'd just write it raw. One tricky you could do is to use some unsafe
code to copy the data in blocks, to avoid having to even look at individual objects, but that is at the harder end of things... I'll try to do an example.
Here's an example of read/write, noting performance first:
Write: 2012ms
Read: 1089ms
File: 838,860,804 bytes
Code:
[DllImport("msvcrt.dll", EntryPoint = "memcpy", CallingConvention = CallingConvention.Cdecl, SetLastError = false)]
public static extern IntPtr memcpy(IntPtr dest, IntPtr src, UIntPtr count);
unsafe static st[] Read(string path)
{
using (var file = File.OpenRead(path))
{
int size = sizeof(st);
const int BLOCK_SIZE = 512; // process at a time
byte[] buffer = new byte[BLOCK_SIZE * size];
UIntPtr bufferLen = new UIntPtr((uint)buffer.Length);
fixed (byte* bufferPtr = buffer)
{
Fill(file, buffer, 0, 4);
int len = ((int*)bufferPtr)[0];
st[] result = new st[len];
fixed (st* dataPtr = result)
{
st* rawPtr = dataPtr;
IntPtr source= new IntPtr(bufferPtr);
while (len >= BLOCK_SIZE)
{
Fill(file, buffer, 0, buffer.Length);
memcpy(new IntPtr(rawPtr), source, bufferLen);
len -= BLOCK_SIZE;
rawPtr += BLOCK_SIZE;
}
if (len > 0)
{
Fill(file, buffer, 0, len * size);
memcpy(new IntPtr(rawPtr), source, new UIntPtr((uint)(len * size)));
}
}
return result;
}
}
}
static void Fill(Stream source, byte[] buffer, int offset, int count)
{
int read;
while (count > 0 && (read = source.Read(buffer, offset, count)) > 0)
{
offset += read;
count -= read;
}
if (count > 0) throw new EndOfStreamException();
}
unsafe static void Write(st[] data, string path)
{
using (var file = File.Create(path))
{
int size = sizeof(st);
const int BLOCK_SIZE = 512; // process at a time
byte[] buffer = new byte[BLOCK_SIZE * size];
int len = data.Length;
UIntPtr bufferLen = new UIntPtr((uint)buffer.Length);
fixed (st* dataPtr = data)
fixed (byte* bufferPtr = buffer)
{
// write the number of elements
((int*)bufferPtr)[0] = data.Length;
file.Write(buffer, 0, 4);
st* rawPtr = dataPtr;
IntPtr destination = new IntPtr(bufferPtr);
// write complete blocks of BLOCK_SIZE
while (len >= BLOCK_SIZE)
{
memcpy(destination, new IntPtr(rawPtr), bufferLen);
len -= BLOCK_SIZE;
rawPtr += BLOCK_SIZE;
file.Write(buffer, 0, buffer.Length);
}
if (len > 0)
{ // write an incomplete block, if necessary
memcpy(destination, new IntPtr(rawPtr), new UIntPtr((uint)(len * size)));
file.Write(buffer, 0, len * size);
}
}
}
}