package parallelencode;
import org.jocl.*;
import static org.jocl.CL.*;
public class ParallelEncode {
/**
* The source code of the OpenCL program to execute
*/
private static String programSource =
"__kernel void "+
"sampleKernel(__global const float *a,"+
" __global const float *b,"+
" __global uchar16 *c,"+
" __global char *d)"+
"{"+
" int gid = get_global_id(0);"+
" c[gid] = 'q';"+
" "+
" d[gid] = 'm';"+
"}";
/**
* The entry point of this sample
*
* @param args Not used
*/
public static void main(String args[])
{
// Create input- and output data
int n = 17;
float srcArrayA[] = new float[n];
float srcArrayB[] = new float[n];
char dstArray[] = new char[n];
char charArray[] = new char[n];
for (int i=0; i<n; i++)
{
srcArrayA[i] = i;
srcArrayB[i] = i;
}
Pointer srcA = Pointer.to(srcArrayA);
Pointer srcB = Pointer.to(srcArrayB);
Pointer dst = Pointer.to(dstArray);
Pointer cArr = Pointer.to(charArray);
// The platform, device type and device number
// that will be used
final int platformIndex = 0;
final long deviceType = CL_DEVICE_TYPE_ALL;
final int deviceIndex = 0;
// Enable exceptions and subsequently omit error checks in this sample
CL.setExceptionsEnabled(true);
// Obtain the number of platforms
int numPlatformsArray[] = new int[1];
clGetPlatformIDs(0, null, numPlatformsArray);
int numPlatforms = numPlatformsArray[0];
// Obtain a platform ID
cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
clGetPlatformIDs(platforms.length, platforms, null);
cl_platform_id platform = platforms[platformIndex];
// Initialize the context properties
cl_context_properties contextProperties = new cl_context_properties();
contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
// Obtain the number of devices for the platform
int numDevicesArray[] = new int[1];
clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
int numDevices = numDevicesArray[0];
// Obtain a device ID
cl_device_id devices[] = new cl_device_id[numDevices];
clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
cl_device_id device = devices[deviceIndex];
// Create a context for the selected device
cl_context context = clCreateContext(
contextProperties, 1, new cl_device_id[]{device},
null, null, null);
// Create a command-queue for the selected device
cl_command_queue commandQueue =
clCreateCommandQueue(context, device, 0, null);
// Allocate the memory objects for the input- and output data
cl_mem memObjects[] = new cl_mem[4];
memObjects[0] = clCreateBuffer(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
Sizeof.cl_float * n, srcA, null);
memObjects[1] = clCreateBuffer(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
Sizeof.cl_float * n, srcB, null);
memObjects[2] = clCreateBuffer(context,
CL_MEM_READ_WRITE,
Sizeof.cl_char * n, null, null);
memObjects[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, Sizeof.cl_char * n, null, null);
//char *h_rp = (char*)malloc(length);
//cl_mem d_rp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length, h_rp, &err);
//err = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), &d_rp)
// Create the program from the source code
cl_program program = clCreateProgramWithSource(context,
1, new String[]{ programSource }, null, null);
// Build the program
clBuildProgram(program, 0, null, null, null, null);
// Create the kernel
cl_kernel kernel = clCreateKernel(program, "sampleKernel", null);
// Set the arguments for the kernel
clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(memObjects[0]));
clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(memObjects[1]));
clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[2]));
clSetKernelArg(kernel, 3, Sizeof.cl_mem, Pointer.to(memObjects[3]));
// Set the work-item dimensions
long global_work_size[] = new long[]{n};
long local_work_size[] = new long[]{1};
// Execute the kernel
clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
global_work_size, local_work_size, 0, null, null);
// Read the output data
clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0,
n * Sizeof.cl_char, dst, 0, null, null);
clEnqueueReadBuffer(commandQueue, memObjects[3], CL_TRUE, 0,
n * Sizeof.cl_char, cArr, 0, null, null);
// Release kernel, program, and memory objects
clReleaseMemObject(memObjects[0]);
clReleaseMemObject(memObjects[1]);
clReleaseMemObject(memObjects[2]);
clReleaseMemObject(memObjects[3]);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
System.out.println(java.util.Arrays.toString(dstArray));
System.out.println(java.util.Arrays.toString(charArray));
}
}
Result:
[?, ?, ?, ?, ?, ?, ?, ?, q, , , , , , , , ]
[?, ?, ?, ?, ?, ?, ?, ?, m, , , , , , , , ]
Why does it not produce a q for every one in the array, and what are the question marks? I tried changing some things, like the int gid = get_global_id(0); to int gid = get_global_id(1); and the end result was something like [q, , , ...] and [m, , , ...]. Can someone explain this, and how to pass multiple chars as input to an OpenCL kernel?
this is okay except for buffer copies.
this reads 8 and a half
charvalues or 17 bytes. A mismatch between java char (being 2 bytes) and device-side char (1 byte).Thats why you see correct
qat 17th byte or 9th element.Similar error is made with java bool arrays too.
Also uchar16 means 16 bytes.
this needs multiplication by 16 unless each element works on all 16 elements. If you have meant 17 elements each 16-bytes, then n*16 should be there and host-side(java) should give array of bytes.