Am I overloading my opencl kernel? My screen goes blank while running the code

182 Views Asked by At

My kernel function has to store an intermediate private array with 7500 values. While running the code my screen goes blank for a second and when it resumes results are not displayed. Assuming that there is not enough private memory, I changed the code a little. Now it compares each new array value with the biggest value computed so far. So that I need not create an array of 7000 values. Instead I store only the biggest value. But still I get the same issue. So what might the reason for my screen going blank? This is my kernel:

__kernel void sampleKernel(
    const uint trgr,
    const uint trgc,
    __global const float *TRG,
    __global const float *ENT,
    __global float *RES1,
    __global float *RES2)
{
    int pred, tars, preds;
    float big1, big2;
    float g1 = 0, g2 = 0;
    float tol = 0.5f, val = 0.0f;
    int i  =  get_global_id(0);
    for(pred = 0; pred<trgr; pred++)
    {
        val = 0.0f;
        for(tars = 0; tars<trgc; tars++) 
            {
            for(preds = 0; preds<trgc; preds++)
            {
                if(TRG[pred*trgc+preds] > (TRG[pred*trgc+tars]-tol) && TRG[pred*trgc+preds]>(TRG[pred*trgc+tars]+tol) )
                    val = val+1;
            }
        }

        val = ENT[pred]*val;
        if(pred == 0) 
        {
            big1 = val;
            g1 = pred;
        }
        else if(pred == 1)
        {
            if(val>big1)
            {
                big2 = big1;
                g2 = g1;
                big1 = val;
                g1 = pred;
            }
        }
        else
        {
            if(val>big1)
            {
                big2 = big1;
                g2 = g1;
                big1 = val;
                g1 = pred;
            }
            else if(val>big2)
            {
                big2 = val;
                g2 = pred;
            }
        }
    }
    RES1[i] = g1;
    RES2[i] = g2; 
}

stored in private static String programSource;. And here is the code:

 //writing to GPU
 clSetKernelArg(kernel, 0, Sizeof.cl_uint, Pointer.to(new int[]{7000}));
 clSetKernelArg(kernel, 1, Sizeof.cl_uint, Pointer.to(new int[]{36}));
 clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[0]));
 clSetKernelArg(kernel, 3, Sizeof.cl_mem, Pointer.to(memObjects[1]));  
 clSetKernelArg(kernel, 4, Sizeof.cl_mem, Pointer.to(memObjects[2]));
 clSetKernelArg(kernel, 5, Sizeof.cl_mem, Pointer.to(memObjects[3]));

 //reading from GPU
 clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0, m * n * Sizeof.cl_float, pres1, 0, null, null);
 clEnqueueReadBuffer(commandQueue, memObjects[3], CL_TRUE, 0, m * n * Sizeof.cl_float, pres2, 0, null, null);
0

There are 0 best solutions below