You could try something like this. I don't believe there is a quicker way. You will likely want to pull some of the constant values and the table out of the performance crittle part of the code.
__m128i v; // todo: set v to something here
// to check
int n; // todo: set n to the zero-indexed bit to check
__m128i chkmask = _mm_slli_epi16(_mm_set1_epi16(1), n & 0xF);
int movemask = (1 << (n >> 3));
int isSet = (_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_and_si128(chkmask, v), _mm_setzero_si128())) & movemask) ^ movemask;
// to set
int m; // todo: set m to the zero-indexed bit to set
__m128i shuf = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
shuf = _mm_add_epi8(shuf, _mm_set1_epi8(16 - (m >> 3)));
shuf = _mm_and_si128(shuf, _mm_set1_epi8(0x0F));
__m128i setmask = _mm_shuffle_epi8(_mm_cvtsi32_si128(1 << (m & 0x7)), shuf);
v = _mm_or_si128(v, setmask);
// or to try the look-up table approach to check and set
__declspec(align(16)) __m128i lut[] = {
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000001),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000002),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000004),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000008),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000010),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000020),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000040),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000080),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000100),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000200),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000400),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000800),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00001000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00002000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00004000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00008000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00010000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00020000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00040000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00080000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00100000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00200000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00400000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00800000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x01000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x02000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x04000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x08000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x10000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x20000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x40000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x80000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000001, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000002, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000004, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000008, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000010, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000020, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000040, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000080, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000100, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000200, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000400, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00000800, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00001000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00002000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00004000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00008000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00010000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00020000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00040000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00080000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00100000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00200000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00400000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x00800000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x01000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x02000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x04000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x08000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x10000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x20000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x40000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000000, 0x80000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000001, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000002, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000004, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000008, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000010, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000020, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000040, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000080, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000100, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000200, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000400, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00000800, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00001000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00002000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00004000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00008000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00010000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00020000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00040000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00080000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00100000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00200000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00400000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x00800000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x01000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x02000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x04000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x08000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x10000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x20000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x40000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000000, 0x80000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000001, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000002, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000004, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000008, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000010, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000020, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000040, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000080, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000100, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000200, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000400, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00000800, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00001000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00002000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00004000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00008000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00010000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00020000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00040000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00080000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00100000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00200000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00400000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x00800000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x01000000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x02000000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x04000000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x08000000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x10000000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x20000000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x40000000, 0x00000000, 0x00000000, 0x00000000),
_mm_set_epi32(0x80000000, 0x00000000, 0x00000000, 0x00000000)
};
// to check with look-up table
movemask = (1 << (n >> 3));
isSet = (_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_and_si128(v, _mm_load_si128(lut + m)), _mm_setzero_si128())) & movemask) ^ movemask;
// to set with look-up table
v = _mm_or_si128(v, _mm_load_si128(lut + m));