I'm not sure if this is a bug, or if I've just made a simple mistake, but it appears that reading an OpenCL buffer into a 2d vector causes some weird behaviour.
By that I mean that it either segfaults or crashes with a msg such as "corrupted double-linked list" or "free(): invalid size" after the queue.enqueueReadBuffer call. I can provide the backtrace / memory map if it would help.
Reading into a 1d vector works as expected.
I am running Linux 3.8.0-35 (x86_64) with AMD Catalyst 13.25.5 and I compiled this using: g++ -I/opt/AMDAPP/include main.cpp OpenCl.cpp -lOpenCL
Minimal working example:
main.cpp
// System headers
#include <iostream>
#include <vector>
// Third-party headers
#include <CL/cl.hpp>
// Project headers
#include "OpenCl.h"
int main(int argc, char* argv[])
{
OpenCl opencl;
const unsigned int num_rows = 241;
const unsigned int num_cols = 886;
const unsigned int num_elements = num_rows * num_cols;
const size_t array_sz = num_elements * sizeof(cl_float);
const std::vector<cl_float> A_1d(num_elements, 1.2345f);
std::vector<cl_float> B_1d(num_elements, 0);
const std::vector<std::vector<cl_float> > A_2d(num_rows, std::vector<cl_float>(num_cols, 1.2345f));
std::vector<std::vector<cl_float> > B_2d(num_rows, std::vector<cl_float>(num_cols, 0));
// Works as expected
std::cout << "START 1D TEST\n";
opencl.test1D(A_1d, B_1d, array_sz);
std::cout << "1D TEST COMPLETE\n";
// Crashes
std::cout << "START 2D TEST\n";
opencl.test2D(A_2d, B_2d, array_sz);
std::cout << "2D TEST COMPLETE\n";
return 0;
}
OpenCl.h
#pragma once
#define __CL_ENABLE_EXCEPTIONS
// Third-party headers
#include <CL/cl.hpp>
class OpenCl {
public:
OpenCl();
void test1D(const std::vector<cl_float> &A,
std::vector<cl_float> &B,
const size_t array_sz);
void test2D(const std::vector<std::vector<cl_float> > &A,
std::vector<std::vector<cl_float> > &B,
const size_t array_sz);
private:
cl::Context context;
cl::CommandQueue queue;
};
OpenCl.cpp
// Class header
#include "OpenCl.h"
// System headers
#include <iostream>
#include <vector>
// Third-party headers
#include <CL/cl.hpp>
OpenCl::OpenCl()
{
// Get available platforms
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
// Select the default platform and create a context using the GPU
cl_context_properties cps[] = {
CL_CONTEXT_PLATFORM,
(cl_context_properties)(platforms[0])(),
0
};
context = cl::Context(CL_DEVICE_TYPE_GPU, cps);
// Get a list of devices on this platform
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
// Create a command queue and use the first device
queue = cl::CommandQueue(context, devices[0]);
}
void OpenCl::test1D(const std::vector<cl_float> &A,
std::vector<cl_float> &B,
const size_t array_sz)
{
try {
// Initialize device buffer
cl::Buffer A_d = cl::Buffer(context, CL_MEM_READ_ONLY, array_sz);
// Transfer data to device
queue.enqueueWriteBuffer(A_d, CL_TRUE, 0, array_sz, &A[0]);
// Transfer data from device
std::cout << "B[0]: " << B[0] << "\n";
queue.enqueueReadBuffer(A_d, CL_TRUE, 0, array_sz, &B[0]);
std::cout << "B[0]: " << B[0] << "\n";
} catch(cl::Error &error) {
std::cout << error.what() << "(" << error.err() << ")" << std::endl;
std::cout << "Program failed!\n";
}
}
void OpenCl::test2D(const std::vector<std::vector<cl_float> > &A,
std::vector<std::vector<cl_float> > &B,
const size_t array_sz)
{
try {
// Initialize device buffer
cl::Buffer A_d = cl::Buffer(context, CL_MEM_READ_ONLY, array_sz);
// Transfer data to device
queue.enqueueWriteBuffer(A_d, CL_TRUE, 0, array_sz, &A[0][0]);
// Transfer data from device
std::cout << "B[0][0]: " << B[0][0] << "\n";
queue.enqueueReadBuffer(A_d, CL_TRUE, 0, array_sz, &B[0][0]);
std::cout << "B[0][0]: " << B[0][0] << "\n";
} catch(cl::Error &error) {
std::cout << error.what() << "(" << error.err() << ")" << std::endl;
std::cout << "Program failed!\n";
}
}