I am trying to convert a c++ program I have which uses random library which is a C++11 feature. After having read through a couple of similar posts here, I tried by separating out the code into three files. At the outset I would like to say that I am not very conversant at C/C++ and mostly use R at work.
The main file looks as follows.
#ifndef _KERNEL_SUPPORT_
#define _KERNEL_SUPPORT_
#include <complex>
#include <random>
#include <iostream>
#include "my_code_header.h"
using namespace std;
std::default_random_engine generator;
std::normal_distribution<double> distribution(0.0,1.0);
const int rand_mat_length = 24561;
double rand_mat[rand_mat_length];// = {0};
void create_std_norm(){
for(int i = 0 ; i < rand_mat_length ; i++)
::rand_mat[i] = distribution(generator);
}
.
.
.
int main(void)
{
...
...
call_global();
return 0;
}
#endif
The header file looks as follows.
#ifndef mykernel_h
#define mykernel_h
void call_global();
void two_d_example(double *a, double *b, double *my_result, size_t length, size_t width);
#endif
And the .cu file looks like the following.
#ifndef _MY_KERNEL_
#define _MY_KERNEL_
#include <iostream>
#include "my_code_header.h"
#define TILE_WIDTH 8
using namespace std;
__global__ void two_d_example(double *a, double *b, double *my_result, size_t length, size_t width)
{
unsigned int row = blockIdx.y*blockDim.y + threadIdx.y;
unsigned int col = blockIdx.x*blockDim.x + threadIdx.x;
if ((row>length) || (col>width)) {
return;
}
...
}
void call_global()
{
const size_t imageLength = 528;
const size_t imageWidth = 528;
const dim3 threadsPerBlock(TILE_WIDTH,TILE_WIDTH);
const dim3 numBlocks(((imageLength) / threadsPerBlock.x), ((imageWidth) / threadsPerBlock.y));
double *d_a, *d_b, *mys ;
...
cudaMalloc((void**)&d_a, sizeof(double) * imageLength);
cudaMalloc((void**)&d_b, sizeof(double) * imageWidth);
cudaMalloc((void**)&mys, sizeof(double) * imageLength * imageWidth);
two_d_example<<<numBlocks,threadsPerBlock>>>(d_a, d_b, mys, imageLength, imageWidth);
...
cudaFree(d_a);
cudaFree(d_b);
}
#endif
Please note that the __global__
has been removed from .h since I was getting the following error owing to it being compiled by g++.
In file included from my_code_main.cpp:12:0:
my_code_header.h:5:1: error: ‘__global__’ does not name a type
When I compile the .cu file with nvcc it is all fine and generates a my_code_kernel.o. But since I am using C++11 in my .cpp I am trying to compile it with g++ and I am getting the following error.
/tmp/ccR2rXzf.o: In function `main':
my_code_main.cpp:(.text+0x1c4): undefined reference to `call_global()'
collect2: ld returned 1 exit status
I understand that this might not have to do anything with CUDA as such and may just be the wrong use of including the header at both places. Also what is the right way to compile and most importantly link the my_code_kernel.o and my_code_main.o(hopefully)? Sorry if this question is too trivial!