For the record, here is the BitStream implementation I ended up with. It is mostly macro-based, and uses a 32-bit accumulator. Bits are stored in the accumulator from the most significant bit to the least significant bit. For instance, to test if the next bit is set, one would do (accumulator & 0x80000000). This makes it very practical to do multiple tests without having to manipulate the BitStream much. For writing, I am accumulating bits in the accumulator, and automatically flush them to the output buffer when it is full. Flushing can also be manually triggered when all bits have been written. Your mileage may vary, but I am quite satisfied with it. I have used it to implement Microsoft Point to Point Compression (MPPC), which uses huffman coding, and performance is great.
struct _wBitStream
{
BYTE* buffer;
BYTE* pointer;
DWORD position;
DWORD length;
DWORD capacity;
UINT32 mask;
UINT32 offset;
UINT32 prefetch;
UINT32 accumulator;
};
typedef struct _wBitStream wBitStream;
#define BitStream_Prefetch(_bs) do { \
(_bs->prefetch) = 0; \
if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 4)) \
(_bs->prefetch) |= (*(_bs->pointer + 4) << 24); \
if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 5)) \
(_bs->prefetch) |= (*(_bs->pointer + 5) << 16); \
if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 6)) \
(_bs->prefetch) |= (*(_bs->pointer + 6) << 8); \
if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 7)) \
(_bs->prefetch) |= (*(_bs->pointer + 7) << 0); \
} while(0)
#define BitStream_Fetch(_bs) do { \
(_bs->accumulator) = 0; \
if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 0)) \
(_bs->accumulator) |= (*(_bs->pointer + 0) << 24); \
if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 1)) \
(_bs->accumulator) |= (*(_bs->pointer + 1) << 16); \
if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 2)) \
(_bs->accumulator) |= (*(_bs->pointer + 2) << 8); \
if (((UINT32) (_bs->pointer - _bs->buffer)) <(_bs->capacity + 3)) \
(_bs->accumulator) |= (*(_bs->pointer + 3) << 0); \
BitStream_Prefetch(_bs); \
} while(0)
#define BitStream_Flush(_bs) do { \
if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 0)) \
*(_bs->pointer + 0) = (_bs->accumulator >> 24); \
if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 1)) \
*(_bs->pointer + 1) = (_bs->accumulator >> 16); \
if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 2)) \
*(_bs->pointer + 2) = (_bs->accumulator >> 8); \
if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 3)) \
*(_bs->pointer + 3) = (_bs->accumulator >> 0); \
} while(0)
#define BitStream_Shift(_bs, _nbits) do { \
_bs->accumulator <<= _nbits; \
_bs->position += _nbits; \
_bs->offset += _nbits; \
if (_bs->offset < 32) { \
_bs->mask = ((1 << _nbits) - 1); \
_bs->accumulator |= ((_bs->prefetch >> (32 - _nbits)) & _bs->mask); \
_bs->prefetch <<= _nbits; \
} else { \
_bs->mask = ((1 << _nbits) - 1); \
_bs->accumulator |= ((_bs->prefetch >> (32 - _nbits)) & _bs->mask); \
_bs->prefetch <<= _nbits; \
_bs->offset -= 32; \
_bs->pointer += 4; \
BitStream_Prefetch(_bs); \
if (_bs->offset) { \
_bs->mask = ((1 << _bs->offset) - 1); \
_bs->accumulator |= ((_bs->prefetch >> (32 - _bs->offset)) & _bs->mask); \
_bs->prefetch <<= _bs->offset; \
} \
} \
} while(0)
#define BitStream_Write_Bits(_bs, _bits, _nbits) do { \
_bs->position += _nbits; \
_bs->offset += _nbits; \
if (_bs->offset < 32) { \
_bs->accumulator |= (_bits << (32 - _bs->offset)); \
} else { \
_bs->offset -= 32; \
_bs->mask = ((1 << (_nbits - _bs->offset)) - 1); \
_bs->accumulator |= ((_bits >> _bs->offset) & _bs->mask); \
BitStream_Flush(bs); \
_bs->accumulator = 0; \
_bs->pointer += 4; \
if (_bs->offset) { \
_bs->mask = ((1 << _bs->offset) - 1); \
_bs->accumulator |= ((_bits & _bs->mask) << (32 - _bs->offset)); \
} \
} \
} while(0)
void BitStream_Attach(wBitStream* bs, BYTE* buffer, UINT32 capacity)
{
bs->position = 0;
bs->buffer = buffer;
bs->offset = 0;
bs->accumulator = 0;
bs->pointer = bs->buffer;
bs->capacity = capacity;
bs->length = bs->capacity * 8;
}
wBitStream* BitStream_New()
{
wBitStream* bs = NULL;
bs = (wBitStream*) calloc(1, sizeof(wBitStream));
return bs;
}
void BitStream_Free(wBitStream* bs)
{
free(bs);
}