To answser your question about how to load a 64-bit value into the lower 64-bits of a XMM register while zeroing the upper 64-bits _mm_loadl_epi64(&x)
will do exactly what you want.
In regards to _mm_set_epi64
I said once that looking at the source code of Agner Fog's Vector Class Library can answer 95% of the question on SSE/AVX on SO. Agner implemented this (from the file vectori128.h) for multiple compilers and for 64-bit and 32-bit. Note that the solution for MSVC 32-bit Agner says "this is inefficient, but other solutions are worse". I guess that's what Mysticial means by "There isn't a good way to do it.".
Vec2q(int64_t i0, int64_t i1) {
#if defined (_MSC_VER) && ! defined(__INTEL_COMPILER)
// MS compiler has no _mm_set_epi64x in 32 bit mode
#if defined(__x86_64__) // 64 bit mode
#if _MSC_VER < 1700
__m128i x0 = _mm_cvtsi64_si128(i0); // 64 bit load
__m128i x1 = _mm_cvtsi64_si128(i1); // 64 bit load
xmm = _mm_unpacklo_epi64(x0,x1); // combine
#else
xmm = _mm_set_epi64x(i1, i0);
#endif
#else // MS compiler in 32-bit mode
union {
int64_t q[2];
int32_t r[4];
} u;
u.q[0] = i0; u.q[1] = i1;
// this is inefficient, but other solutions are worse
xmm = _mm_setr_epi32(u.r[0], u.r[1], u.r[2], u.r[3]);
#endif // __x86_64__
#else // Other compilers
xmm = _mm_set_epi64x(i1, i0);
#endif
};