Below is a working version of your code. The issues in your code were:
Your kernel was depending on being launched with 512x512 threads, but you were only launching with 64x1 threads.
Your kernel was writing to unaligned addresses with surf2Dwrite().
You were setting up double buffering in OpenGL but you were not swapping the buffers. (
glutSwapBuffers()
).You were initializing an
uchar4
with floats.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#ifdef _WIN32
# define WINDOWS_LEAN_AND_MEAN
# define NOMINMAX
# include <windows.h>
#endif
// OpenGL Graphics includes
#include <GL/glew.h>
#if defined (__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/freeglut.h>
#endif
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#include <vector_types.h>
const unsigned int window_width = 512;
const unsigned int window_height = 512;
GLuint viewGLTexture;
cudaGraphicsResource_t viewCudaResource;
#define check(ans) { _check((ans), __FILE__, __LINE__); }
inline void _check(cudaError_t code, char *file, int line)
{
if (code != cudaSuccess) {
fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), file, line);
exit(code);
}
}
void initGLandCUDA() {
int argc = 0;
char** argv = NULL;
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGBA);
glutInitWindowSize(window_width, window_height);
glutCreateWindow("CUDA GL Interop");
glewInit();
glEnable(GL_TEXTURE_2D);
glGenTextures(1, &viewGLTexture);
glBindTexture(GL_TEXTURE_2D, viewGLTexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, window_width, window_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_2D, 0);
check(cudaGLSetGLDevice(0));
check(cudaGraphicsGLRegisterImage(&viewCudaResource, viewGLTexture, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsWriteDiscard));
}
__global__ void renderingKernel(cudaSurfaceObject_t image) {
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
uchar4 color = make_uchar4(x / 2, y / 2, 0, 127);
surf2Dwrite(color, image, x * sizeof(color), y, cudaBoundaryModeClamp);
}
void callCUDAKernel(cudaSurfaceObject_t image) {
dim3 block(256, 1, 1);
dim3 grid(2, 512, 1);
renderingKernel<<<grid, block>>>(image);
check(cudaPeekAtLastError());
check(cudaDeviceSynchronize());
}
void renderFrame() {
check(cudaGraphicsMapResources(1, &viewCudaResource));
cudaArray_t viewCudaArray;
check(cudaGraphicsSubResourceGetMappedArray(&viewCudaArray, viewCudaResource, 0, 0));
cudaResourceDesc viewCudaArrayResourceDesc;
memset(&viewCudaArrayResourceDesc, 0, sizeof(viewCudaArrayResourceDesc));
viewCudaArrayResourceDesc.resType = cudaResourceTypeArray;
viewCudaArrayResourceDesc.res.array.array = viewCudaArray;
cudaSurfaceObject_t viewCudaSurfaceObject;
check(cudaCreateSurfaceObject(&viewCudaSurfaceObject, &viewCudaArrayResourceDesc));
callCUDAKernel(viewCudaSurfaceObject);
check(cudaDestroySurfaceObject(viewCudaSurfaceObject));
check(cudaGraphicsUnmapResources(1, &viewCudaResource));
check(cudaStreamSynchronize(0));
glBindTexture(GL_TEXTURE_2D, viewGLTexture);
{
glBegin(GL_QUADS);
{
glTexCoord2f(0.0f, 0.0f); glVertex2f(-1.0f, -1.0f);
glTexCoord2f(1.0f, 0.0f); glVertex2f(+1.0f, -1.0f);
glTexCoord2f(1.0f, 1.0f); glVertex2f(+1.0f, +1.0f);
glTexCoord2f(0.0f, 1.0f); glVertex2f(-1.0f, +1.0f);
}
glEnd();
}
glBindTexture(GL_TEXTURE_2D, 0);
glFinish();
}
int main(int argc, char **argv)
{
initGLandCUDA();
glutDisplayFunc(renderFrame);
//glutKeyboardFunc(keyboard);
//glutMouseFunc(mouse);
glutMainLoop();
}
Output: