cudaMemcpyToSymbol use details

Question

Where is your kernel? d_simparam is in the same "translation unit" as your host code from compiler point of view - you don't need any "extern" declarations here. You may need one in the source file where your kernel is.

This is what works for me:

device.h - file with device symbols:

#include <stdio.h>

struct SimpleStruct {
    int a;
    float b;
};

__constant__ SimpleStruct variable = { 10, 0.3f };

__global__ void kernel() {
    printf("%d %f\n", variable.a, variable.b);
}

host.cu - host code:

#include <stdio.h>
#include <stdlib.h>

#define CUDA_CHECK_RETURN(value) {                                      \
    cudaError_t _m_cudaStat = value;                                    \
    if (_m_cudaStat != cudaSuccess) {                                   \
        fprintf(stderr, "Error %s at line %d in file %s\n",             \
                cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__);   \
    exit(1);                                                            \
} }

#include "device.h"

int main(void) {
    const SimpleStruct n = { 7, 0.5f };

    CUDA_CHECK_RETURN(cudaMemcpyToSymbol(variable, &n, sizeof(SimpleStruct)));

    kernel<<<1, 1>>>();

    CUDA_CHECK_RETURN(cudaThreadSynchronize()); // Wait for the GPU launched work to complete
    CUDA_CHECK_RETURN(cudaGetLastError());
    CUDA_CHECK_RETURN(cudaDeviceReset());

    return 0;
}

Update: Separate compilation is only supported on SM 2.0 devices and newer. It is not possible to use separate compilation for SM 1.3 code.