JOCL Char not returning all chars

163 Views Asked by At
package parallelencode;

import org.jocl.*;
import static org.jocl.CL.*;

public class ParallelEncode {
    /**
     * The source code of the OpenCL program to execute
     */
    private static String programSource =
        "__kernel void "+
        "sampleKernel(__global const float *a,"+
        "             __global const float *b,"+
        "             __global uchar16 *c,"+
        "             __global char *d)"+
        "{"+
        "    int gid = get_global_id(0);"+
        "    c[gid] = 'q';"+
        "    "+
        "    d[gid] = 'm';"+
        "}";


    /**
     * The entry point of this sample
     * 
     * @param args Not used
     */
    public static void main(String args[])
    {
        // Create input- and output data 
        int n = 17;
        float srcArrayA[] = new float[n];
        float srcArrayB[] = new float[n];
        char dstArray[] = new char[n];
        char charArray[] = new char[n];
        for (int i=0; i<n; i++)
        {
            srcArrayA[i] = i;
            srcArrayB[i] = i;
        }
        Pointer srcA = Pointer.to(srcArrayA);
        Pointer srcB = Pointer.to(srcArrayB); 
        Pointer dst = Pointer.to(dstArray);
        Pointer cArr = Pointer.to(charArray);

        // The platform, device type and device number
        // that will be used
        final int platformIndex = 0;
        final long deviceType = CL_DEVICE_TYPE_ALL;
        final int deviceIndex = 0;

        // Enable exceptions and subsequently omit error checks in this sample
        CL.setExceptionsEnabled(true);

        // Obtain the number of platforms
        int numPlatformsArray[] = new int[1];
        clGetPlatformIDs(0, null, numPlatformsArray);
        int numPlatforms = numPlatformsArray[0];

        // Obtain a platform ID
        cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
        clGetPlatformIDs(platforms.length, platforms, null);
        cl_platform_id platform = platforms[platformIndex];

        // Initialize the context properties
        cl_context_properties contextProperties = new cl_context_properties();
        contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);

        // Obtain the number of devices for the platform
        int numDevicesArray[] = new int[1];
        clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
        int numDevices = numDevicesArray[0];

        // Obtain a device ID 
        cl_device_id devices[] = new cl_device_id[numDevices];
        clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
        cl_device_id device = devices[deviceIndex];

        // Create a context for the selected device
        cl_context context = clCreateContext(
            contextProperties, 1, new cl_device_id[]{device}, 
            null, null, null);

        // Create a command-queue for the selected device
        cl_command_queue commandQueue = 
            clCreateCommandQueue(context, device, 0, null);

        // Allocate the memory objects for the input- and output data
        cl_mem memObjects[] = new cl_mem[4];
        memObjects[0] = clCreateBuffer(context, 
            CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
            Sizeof.cl_float * n, srcA, null);
        memObjects[1] = clCreateBuffer(context, 
            CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
            Sizeof.cl_float * n, srcB, null);
        memObjects[2] = clCreateBuffer(context, 
            CL_MEM_READ_WRITE, 
            Sizeof.cl_char * n, null, null);
        memObjects[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, Sizeof.cl_char * n, null, null);

        //char *h_rp = (char*)malloc(length);
        //cl_mem d_rp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length, h_rp, &err);
        //err = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), &d_rp)

        // Create the program from the source code
        cl_program program = clCreateProgramWithSource(context,
            1, new String[]{ programSource }, null, null);

        // Build the program
        clBuildProgram(program, 0, null, null, null, null);

        // Create the kernel
        cl_kernel kernel = clCreateKernel(program, "sampleKernel", null);

        // Set the arguments for the kernel
        clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(memObjects[0]));
        clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(memObjects[1]));
        clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[2]));
        clSetKernelArg(kernel, 3, Sizeof.cl_mem, Pointer.to(memObjects[3]));

        // Set the work-item dimensions
        long global_work_size[] = new long[]{n};
        long local_work_size[] = new long[]{1};

        // Execute the kernel
        clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
            global_work_size, local_work_size, 0, null, null);

        // Read the output data
        clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0,
            n * Sizeof.cl_char, dst, 0, null, null);

        clEnqueueReadBuffer(commandQueue, memObjects[3], CL_TRUE, 0,
                n * Sizeof.cl_char, cArr, 0, null, null);

        // Release kernel, program, and memory objects
        clReleaseMemObject(memObjects[0]);
        clReleaseMemObject(memObjects[1]);
        clReleaseMemObject(memObjects[2]);
        clReleaseMemObject(memObjects[3]);
        clReleaseKernel(kernel);
        clReleaseProgram(program);
        clReleaseCommandQueue(commandQueue);
        clReleaseContext(context);

        System.out.println(java.util.Arrays.toString(dstArray));
        System.out.println(java.util.Arrays.toString(charArray));
    }
}

Result:

[?, ?, ?, ?, ?, ?, ?, ?, q,  ,  ,  ,  ,  ,  ,  ,  ]
[?, ?, ?, ?, ?, ?, ?, ?, m,  ,  ,  ,  ,  ,  ,  ,  ]

Why does it not produce a q for every one in the array, and what are the question marks? I tried changing some things, like the int gid = get_global_id(0); to int gid = get_global_id(1); and the end result was something like [q, , , ...] and [m, , , ...]. Can someone explain this, and how to pass multiple chars as input to an OpenCL kernel?

2

There are 2 best solutions below

6
huseyin tugrul buyukisik On
 int n = 17;

this is okay except for buffer copies.

clEnqueueReadBuffer(commandQueue, memObjects[3], CL_TRUE, 0,
            n * Sizeof.cl_char, cArr, 0, null, null);

this reads 8 and a half char values or 17 bytes. A mismatch between java char (being 2 bytes) and device-side char (1 byte).

Thats why you see correct q at 17th byte or 9th element.

Similar error is made with java bool arrays too.

Also uchar16 means 16 bytes.

    clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0,
        n * Sizeof.cl_char, dst, 0, null, null);

this needs multiplication by 16 unless each element works on all 16 elements. If you have meant 17 elements each 16-bytes, then n*16 should be there and host-side(java) should give array of bytes.

2
Marco13 On

The most important point was already mentioned in the answer by huseyin tugrul buyukisik:

A java char consists of two bytes (16 bits). In C and OpenCL, a char is one byte (8 bits).

It's not entirely clear what your program is supposed to do in the end, but as huseyin also said: I'm pretty sure that you intended to use a char2 instead of char16 (assuming that this will be some UTF16-related program - and otherwise, the host code would not make sense). Also, you seem to have mixed the input- and output arrays somehow.


Note:

If you now change this to use uchar2, you may also encounter a limitation of OpenCL: According to the restrictions that are mentioned on the Khronos Website :

Built-in types that are less than 32-bits in size i.e. char, uchar, char2, uchar2, short, ushort, and half have the following restriction:

Writes to a pointer (or arrays) of type char, uchar, char2, uchar2, short, ushort, and half ... are not supported.


If you describe more clearly what the program is supposed to do (and drop me a note here, as a comment), I'll try to add a small example that demonstrates how this could be approached.