My kernel function has to store an intermediate private array with 7500 values. While running the code my screen goes blank for a second and when it resumes results are not displayed. Assuming that there is not enough private memory, I changed the code a little. Now it compares each new array value with the biggest value computed so far. So that I need not create an array of 7000 values. Instead I store only the biggest value. But still I get the same issue. So what might the reason for my screen going blank? This is my kernel:
__kernel void sampleKernel(
const uint trgr,
const uint trgc,
__global const float *TRG,
__global const float *ENT,
__global float *RES1,
__global float *RES2)
{
int pred, tars, preds;
float big1, big2;
float g1 = 0, g2 = 0;
float tol = 0.5f, val = 0.0f;
int i = get_global_id(0);
for(pred = 0; pred<trgr; pred++)
{
val = 0.0f;
for(tars = 0; tars<trgc; tars++)
{
for(preds = 0; preds<trgc; preds++)
{
if(TRG[pred*trgc+preds] > (TRG[pred*trgc+tars]-tol) && TRG[pred*trgc+preds]>(TRG[pred*trgc+tars]+tol) )
val = val+1;
}
}
val = ENT[pred]*val;
if(pred == 0)
{
big1 = val;
g1 = pred;
}
else if(pred == 1)
{
if(val>big1)
{
big2 = big1;
g2 = g1;
big1 = val;
g1 = pred;
}
}
else
{
if(val>big1)
{
big2 = big1;
g2 = g1;
big1 = val;
g1 = pred;
}
else if(val>big2)
{
big2 = val;
g2 = pred;
}
}
}
RES1[i] = g1;
RES2[i] = g2;
}
stored in private static String programSource;. And here is the code:
//writing to GPU
clSetKernelArg(kernel, 0, Sizeof.cl_uint, Pointer.to(new int[]{7000}));
clSetKernelArg(kernel, 1, Sizeof.cl_uint, Pointer.to(new int[]{36}));
clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[0]));
clSetKernelArg(kernel, 3, Sizeof.cl_mem, Pointer.to(memObjects[1]));
clSetKernelArg(kernel, 4, Sizeof.cl_mem, Pointer.to(memObjects[2]));
clSetKernelArg(kernel, 5, Sizeof.cl_mem, Pointer.to(memObjects[3]));
//reading from GPU
clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0, m * n * Sizeof.cl_float, pres1, 0, null, null);
clEnqueueReadBuffer(commandQueue, memObjects[3], CL_TRUE, 0, m * n * Sizeof.cl_float, pres2, 0, null, null);