Frage

My android app passes in an OpenGL texture2D to my OpenCL kernel, however the pixels values being read by my kernel are out of bounds (>255).

I create my OpenGL texture like this:

    GLES20.glGenTextures ( 2, targetTex, 0 );
    GLES20.glBindTexture(GLES20.GL_TEXTURE_2D, targetTex[0]);
    GLES20.glTexParameteri(GLES20.GL_TEXTURE_2D, GLES20.GL_TEXTURE_MIN_FILTER, GLES20.GL_LINEAR);
    GLES20.glTexParameteri(GLES20.GL_TEXTURE_2D, GLES20.GL_TEXTURE_MAG_FILTER, GLES20.GL_LINEAR);
    GLES20.glTexImage2D(GLES20.GL_TEXTURE_2D, 0, GLES20.GL_RGBA, image_width, image_height, 0, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE, null);
    GLES20.glBindTexture(GLES20.GL_TEXTURE_2D, 0);

The texture is then rendered to by binding it with a FBO:

    targetFramebuffer = IntBuffer.allocate(1);
    GLES20.glGenFramebuffers(1, targetFramebuffer);
    GLES20.glBindFramebuffer(GLES20.GL_FRAMEBUFFER, targetFramebuffer.get(0));
    GLES20.glFramebufferTexture2D(GLES20.GL_FRAMEBUFFER, GLES20.GL_COLOR_ATTACHMENT0, GLES20.GL_TEXTURE_2D, targetTex[0], 0);
    GLES20.glBindFramebuffer(GLES20.GL_FRAMEBUFFER, 0);

I create the cl memory object like so:

    mem_images[0] = clCreateFromGLTexture2D(m_clContext, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, in_tex, &err);

and this is my OpenCL kernel:

const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;

kernel void transfer_data(__read_only image2d_t input_image, __global float* debug) {
    int2 pos;
    uint4 pixel;
    for (pos.y = get_global_id(1); pos.y < HEIGHT; pos.y += get_global_size(1)) {
        for (pos.x = get_global_id(0); pos.x < WIDTH; pos.x += get_global_size(0)) {
            pixel = read_imageui(input_image, sampler, pos);
            debug[(pos.x + pos.y*WIDTH)*NUM_CHANNELS + 0] = pixel.x;
            debug[(pos.x + pos.y*WIDTH)*NUM_CHANNELS + 1] = pixel.y;
            debug[(pos.x + pos.y*WIDTH)*NUM_CHANNELS + 2] = pixel.z;        
        }
    }
}

This is how I am enqueing the kernel:

    local2Dsize[0] = 4;
    local2Dsize[1] = 4;
    global2Dsize[0] = clamp(image_width, 0, max_work_items[0]);
    global2Dsize[1] = clamp(image_height, 0, max_work_items[1]);

    global2Dsize[0] = ceil((float)global2Dsize[0]/(float)local2Dsize[0])*local2Dsize[0];
    global2Dsize[1] = ceil((float)global2Dsize[1]/(float)local2Dsize[1])*local2Dsize[1];

    twoDlocal_sizes["transfer_data"] = local2Dsize;
    twoDglobal_sizes["transfer_data"] = global2Dsize;

    kernels["transfer_data"] = clCreateKernel(m_program, "transfer_data", &err);

    err  = clSetKernelArg(kernels["transfer_data"], 0, sizeof(cl_mem), &mem_images[0]);
    err |= clSetKernelArg(kernels["transfer_data"], 1, sizeof(cl_mem), &mems["image"]);

err = clEnqueueAcquireGLObjects(m_queue, 1, &mem_images[0], 0, 0, 0);
CHECK_ERROR_OCL(err, "acquiring GL objects", return false);

    err = clEnqueueNDRangeKernel(m_queue, kernels["transfer_data"], 2, NULL, twoDglobal_sizes["transfer_data"], twoDlocal_sizes["transfer_data"], 0, NULL, NULL);

    err = clFinish(m_queue);

err = clEnqueueReleaseGLObjects(m_queue, 1, &mem_images[0], 0, 0, 0);
CHECK_ERROR_OCL(err, "releasing GL objects", return false);

Now back on host when I print out these pixel values (from the array debug), they are way out of bounds and I don't understand why that is the case.

If you need more insight:

The overall aim of my project is to obtain camera frames in form of an OpenGL texture, process them using OpenCL and render the output back to the screen. However the texture obtained from android camera can only be bound to GL_TEXTURE_EXTERNAL_OES (http://developer.android.com/reference/android/graphics/SurfaceTexture.html), and this is not a valid texture to create an OpenCL memory object from. Therefore I am rendering the camera output to a GL_TEXTURE_2D and passing that to OpenCL.

I am sure that the pixels are being rendered to the texture correctly, because when I display the texture on the screen (without any OpenCL involved) it displays the image properly.

I did some debugging by creating a texture (as opposed to getting data from the camera) and passing that to opencl. So these are the mappings I get:

    0 -> 0
    1 -> 7172
    2 -> 8196
    3 -> 8710
    4 -> 9220
    5 -> 9477
    6 -> 9734
    7 -> 9991
    8 -> 10244
    9 -> 10372
    10 -> 10501
    11 -> 10629
    12 -> 10758
    13 -> 10886
    14 -> 11015
    15 -> 11143
    16 -> 11268
    17 -> 11332
    18 -> 11396
    19 -> 11460
    20 -> 11525
    21 -> 11589
    22 -> 11653
    23 -> 11717
    24 -> 11782
    25 -> 11846
    26 -> 11910
    27 -> 11974
    28 -> 12039
    29 -> 12103
    30 -> 12167
    31 -> 12231
    32 -> 12292
    33 -> 12324
    34 -> 12356
    35 -> 12388
    36 -> 12420
    37 -> 12452
    38 -> 12484
    39 -> 12516
    40 -> 12549
    41 -> 12581
    42 -> 12613
    43 -> 12645
    44 -> 12677
    45 -> 12709
    46 -> 12741
    47 -> 12773
    48 -> 12806
    49 -> 12838
    50 -> 12870
    51 -> 12902
    52 -> 12934
    53 -> 12966
    54 -> 12998
    55 -> 13030
    56 -> 13063
    57 -> 13095
    58 -> 13127
    59 -> 13159
    60 -> 13191
    61 -> 13223
    62 -> 13255
    63 -> 13287
    64 -> 13316
    65 -> 13332
    66 -> 13348
    67 -> 13364
    68 -> 13380
    69 -> 13396
    70 -> 13412
    71 -> 13428
    72 -> 13444
    73 -> 13460
    74 -> 13476
    75 -> 13492
    76 -> 13508
    77 -> 13524
    78 -> 13540
    79 -> 13556
    80 -> 13573
    81 -> 13589
    82 -> 13605
    83 -> 13621
    84 -> 13637
    85 -> 13653
    86 -> 13669
    87 -> 13685
    88 -> 13701
    89 -> 13717
    90 -> 13733
    91 -> 13749
    92 -> 13765
    93 -> 13781
    94 -> 13797
    95 -> 13813
    96 -> 13830
    97 -> 13846
    98 -> 13862
    99 -> 13878
    100 -> 13894
    101 -> 13910
    102 -> 13926
    103 -> 13942
    104 -> 13958
    105 -> 13974
    106 -> 13990
    107 -> 14006
    108 -> 14022
    109 -> 14038
    110 -> 14054
    111 -> 14070
    112 -> 14087
    113 -> 14103
    114 -> 14119
    115 -> 14135
    116 -> 14151
    117 -> 14167
    118 -> 14183
    119 -> 14199
    120 -> 14215
    121 -> 14231
    122 -> 14247
    123 -> 14263
    124 -> 14279
    125 -> 14295
    126 -> 14311
    127 -> 14327
    128 -> 14340
    129 -> 14348
    130 -> 14356
    131 -> 14364
    132 -> 14372
    133 -> 14380
    134 -> 14388
    135 -> 14396
    136 -> 14404
    137 -> 14412
    138 -> 14420
    139 -> 14428
    140 -> 14436
    141 -> 14444
    142 -> 14452
    143 -> 14460
    144 -> 14468
    145 -> 14476
    146 -> 14484
    147 -> 14492
    148 -> 14500
    149 -> 14508
    150 -> 14516
    151 -> 14524
    152 -> 14532
    153 -> 14540
    154 -> 14548
    155 -> 14556
    156 -> 14564
    157 -> 14572
    158 -> 14580
    159 -> 14588
    160 -> 14597
    161 -> 14605
    162 -> 14613
    163 -> 14621
    164 -> 14629
    165 -> 14637
    166 -> 14645
    167 -> 14653
    168 -> 14661
    169 -> 14669
    170 -> 14677
    171 -> 14685
    172 -> 14693
    173 -> 14701
    174 -> 14709
    175 -> 14717
    176 -> 14725
    177 -> 14733
    178 -> 14741
    179 -> 14749
    180 -> 14757
    181 -> 14765
    182 -> 14773
    183 -> 14781
    184 -> 14789
    185 -> 14797
    186 -> 14805
    187 -> 14813
    188 -> 14821
    189 -> 14829
    190 -> 14837
    191 -> 14845
    192 -> 14854
    193 -> 14862
    194 -> 14870
    195 -> 14878
    196 -> 14886
    197 -> 14894
    198 -> 14902
    199 -> 14910
    200 -> 14918
    201 -> 14926
    202 -> 14934
    203 -> 14942
    204 -> 14950
    205 -> 14958
    206 -> 14966
    207 -> 14974
    208 -> 14982
    209 -> 14990
    210 -> 14998
    211 -> 15006
    212 -> 15014
    213 -> 15022
    214 -> 15030
    215 -> 15038
    216 -> 15046
    217 -> 15054
    218 -> 15062
    219 -> 15070
    220 -> 15078
    221 -> 15086
    222 -> 15094
    223 -> 15102
    224 -> 15111
    225 -> 15119
    226 -> 15127
    227 -> 15135
    228 -> 15143
    229 -> 15151
    230 -> 15159
    231 -> 15167
    232 -> 15175
    233 -> 15183
    234 -> 15191
    235 -> 15199
    236 -> 15207
    237 -> 15215
    238 -> 15223
    239 -> 15231
    240 -> 15239
    241 -> 15247
    242 -> 15255
    243 -> 15263
    244 -> 15271
    245 -> 15279
    246 -> 15287
    247 -> 15295
    248 -> 15303
    249 -> 15311
    250 -> 15319
    251 -> 15327
    252 -> 15335
    253 -> 15343
    254 -> 15351
    255 -> 15359

On the left is the colour value that I input in the OpenGL texture and on the left is the corresponding value I get when I read the values in OpenCL.

War es hilfreich?

Lösung 3

Okay so I think it's a bug in the OpenCL implementation.

With a bit of help from Wolfram Alpha, I created a function to reverse the above mappings and obtain values in the range of 0 and 255.

    float GL_to_CL(uint val) {
        if (val >= 14340) return round(0.1245790*val - 1658.44);    //>=128
        if (val >= 13316) return round(0.0622869*val - 765.408);    //>=64
        if (val >= 12292) return round(0.0311424*val - 350.800);    //>=32
        if (val >= 11268) return round(0.0155702*val - 159.443);    //>=16

        float v = (float) val;
        return round(0.0000000000000125922*pow(v,4.f) - 0.00000000026729*pow(v,3.f) + 0.00000198135*pow(v,2.f) - 0.00496681*v - 0.0000808829);
    }

So the GL_to_CL() is a combination of 4 linear functions and a Quartic function. I tried creating just a single function using polynomial interpolation, however the degree of the polynomial was too large and therefore more computationally expensive to solve than the combination of the 5 functions proposed above.

Another alternate is to use a 15k sized array to achieve constant time, however that would require me uploading roughly 15k Bytes to GPU's global memory. Considering I am using the kernels to do image processing, I would be slightly pushing it. Also accessing from global memory in OpenCL is often more expensive than performing some simple calculations.

Andere Tipps

The GL_UNSIGNED_BYTE texture is being mapped to OpenCL as CL_UNORM_INT8. You need to use read_imagef to read from these images rather than read_imageui. The values you are seeing when you use read_imageui are the raw bits of the internal floating point format.

You are not acquiring and releasing the GL objects before accessing them. This causes the kernel not to read to data in the GL internal buffer but a local CL copy of it.

Proper code: (BTW you should check those "err" values for errors)

local2Dsize[0] = 4;
local2Dsize[1] = 4;
global2Dsize[0] = clamp(image_width, 0, max_work_items[0]);
global2Dsize[1] = clamp(image_height, 0, max_work_items[1]);

global2Dsize[0] = ceil((float)global2Dsize[0]/(float)local2Dsize[0])*local2Dsize[0];
global2Dsize[1] = ceil((float)global2Dsize[1]/(float)local2Dsize[1])*local2Dsize[1];

twoDlocal_sizes["transfer_data"] = local2Dsize;
twoDglobal_sizes["transfer_data"] = global2Dsize;

kernels["transfer_data"] = clCreateKernel(m_program, "transfer_data", &err);

err  = clSetKernelArg(kernels["transfer_data"], 0, sizeof(cl_mem), &mem_images[0]);
err |= clSetKernelArg(kernels["transfer_data"], 1, sizeof(cl_mem), &mems["image"]);

err = clEnqueueAcquireGLObjects(m_queue, 1, mem_images, NULL, NULL, NULL);
err = clEnqueueNDRangeKernel(m_queue, kernels["transfer_data"], 2, NULL, twoDglobal_sizes["transfer_data"], twoDlocal_sizes["transfer_data"], 0, NULL, NULL);
err = clEnqueueReleaseGLObjects (m_queue, 1, mem_images, NULL, NULL, NULL);

err = clFinish(m_queue);
Lizenziert unter: CC-BY-SA mit Zuschreibung
Nicht verbunden mit StackOverflow
scroll top