سؤال

I'm not sure if this is a bug, or if I've just made a simple mistake, but it appears that reading an OpenCL buffer into a 2d vector causes some weird behaviour.

By that I mean that it either segfaults or crashes with a msg such as "corrupted double-linked list" or "free(): invalid size" after the queue.enqueueReadBuffer call. I can provide the backtrace / memory map if it would help.

Reading into a 1d vector works as expected.

I am running Linux 3.8.0-35 (x86_64) with AMD Catalyst 13.25.5 and I compiled this using: g++ -I/opt/AMDAPP/include main.cpp OpenCl.cpp -lOpenCL

Minimal working example:

main.cpp

// System headers
#include <iostream>
#include <vector>

// Third-party headers
#include <CL/cl.hpp>

// Project headers
#include "OpenCl.h"

int main(int argc, char* argv[])
{
    OpenCl opencl;

    const unsigned int num_rows = 241;
    const unsigned int num_cols = 886;
    const unsigned int num_elements = num_rows * num_cols;
    const size_t array_sz = num_elements * sizeof(cl_float);
    const std::vector<cl_float> A_1d(num_elements, 1.2345f);
    std::vector<cl_float> B_1d(num_elements, 0);
    const std::vector<std::vector<cl_float> > A_2d(num_rows, std::vector<cl_float>(num_cols, 1.2345f));
    std::vector<std::vector<cl_float> > B_2d(num_rows, std::vector<cl_float>(num_cols, 0));

    // Works as expected
    std::cout << "START 1D TEST\n";
    opencl.test1D(A_1d, B_1d, array_sz);
    std::cout << "1D TEST COMPLETE\n";

    // Crashes
    std::cout << "START 2D TEST\n";
    opencl.test2D(A_2d, B_2d, array_sz);
    std::cout << "2D TEST COMPLETE\n";

    return 0;
}

OpenCl.h

#pragma once
#define __CL_ENABLE_EXCEPTIONS

// Third-party headers
#include <CL/cl.hpp>

class OpenCl {
  public:
    OpenCl();
    void test1D(const std::vector<cl_float> &A,
                std::vector<cl_float> &B,
                const size_t array_sz);
    void test2D(const std::vector<std::vector<cl_float> > &A,
                std::vector<std::vector<cl_float> > &B,
                const size_t array_sz);
  private:
    cl::Context context;
    cl::CommandQueue queue;
};

OpenCl.cpp

// Class header
#include "OpenCl.h"

// System headers
#include <iostream>
#include <vector>

// Third-party headers
#include <CL/cl.hpp>

OpenCl::OpenCl()
{
    // Get available platforms
    std::vector<cl::Platform> platforms;
    cl::Platform::get(&platforms);

    // Select the default platform and create a context using the GPU
    cl_context_properties cps[] = {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)(platforms[0])(),
        0
    };
    context = cl::Context(CL_DEVICE_TYPE_GPU, cps);

    // Get a list of devices on this platform
    std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();

    // Create a command queue and use the first device
    queue = cl::CommandQueue(context, devices[0]);
}

void OpenCl::test1D(const std::vector<cl_float> &A,
                    std::vector<cl_float> &B,
                    const size_t array_sz)
{
    try {
        // Initialize device buffer
        cl::Buffer A_d = cl::Buffer(context, CL_MEM_READ_ONLY, array_sz);

        // Transfer data to device
        queue.enqueueWriteBuffer(A_d, CL_TRUE, 0, array_sz, &A[0]);

        // Transfer data from device
        std::cout << "B[0]: " << B[0] << "\n";
        queue.enqueueReadBuffer(A_d, CL_TRUE, 0, array_sz, &B[0]);
        std::cout << "B[0]: " << B[0] << "\n";
    } catch(cl::Error &error) {
        std::cout << error.what() << "(" << error.err() << ")" << std::endl;
        std::cout << "Program failed!\n";
    }
}

void OpenCl::test2D(const std::vector<std::vector<cl_float> > &A,
                    std::vector<std::vector<cl_float> > &B,
                    const size_t array_sz)
{
    try {
        // Initialize device buffer
        cl::Buffer A_d = cl::Buffer(context, CL_MEM_READ_ONLY, array_sz);

        // Transfer data to device
        queue.enqueueWriteBuffer(A_d, CL_TRUE, 0, array_sz, &A[0][0]);

        // Transfer data from device
        std::cout << "B[0][0]: " << B[0][0] << "\n";
        queue.enqueueReadBuffer(A_d, CL_TRUE, 0, array_sz, &B[0][0]);
        std::cout << "B[0][0]: " << B[0][0] << "\n";
    } catch(cl::Error &error) {
        std::cout << error.what() << "(" << error.err() << ")" << std::endl;
        std::cout << "Program failed!\n";
    }
}
هل كانت مفيدة؟

المحلول

Memory held by std::vector< std::vector<float> > is not continuos, so you can not copy it in one operation. You would have to

size_t row_size = A[0].size() * sizeof(A[0][0]);

for(size_t row = 0; row < A.size(); ++row)
    queue.enqueueWriteBuffer(A_d, CL_TRUE, /*offset=*/row * row_size, /*size=*/row_size, &A[row][0]);

But you would be better off in terms of performance if your data was laid out in continuous array anyway (as in your test1D).

مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top