I have a piece of code:
#include <CL/cl.h>
#include <stdlib.h>
void axpy(float a, float* x, float* y, int n) {
cl_context context;
cl_command_queue queue;
cl_mem x_buffer;
cl_mem y_buffer;
cl_program program;
cl_kernel kernel;
// Get the list of available OpenCL devices.
cl_device_id* devices;
cl_uint num_devices;
clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
devices = (cl_device_id*)malloc(num_devices * sizeof(cl_device_id));
clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL);
// Create a context and command queue.
cl_int err;
context = clCreateContext(NULL, num_devices, devices, NULL, NULL, &err);
queue = clCreateCommandQueue(context, devices[0], 0, &err);
// Create buffers for the input and output vectors.
x_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, n * sizeof(float), NULL, &err);
y_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, n * sizeof(float), NULL, &err);
// Write the input vectors to the buffers.
clEnqueueWriteBuffer(queue, x_buffer, CL_TRUE, 0, n * sizeof(float), x, 0, NULL, NULL);
// Create the kernel object.
const char* kernel_source =
"__kernel void axpy(__global float* x, __global float* y, float a, int n) {\
int i = get_global_id(0);\
if (i < n) {\
y[i] = a * x[i] + y[i];\
}\
}";
program = clCreateProgramWithSource(context, 1, &kernel_source, NULL, &err);
err = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
// Create the kernel object.
kernel = clCreateKernel(program, "axpy", &err);
// Set the kernel arguments.
clSetKernelArg(kernel, 0, sizeof(cl_mem), &x_buffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &y_buffer);
clSetKernelArg(kernel, 2, sizeof(float), &a);
clSetKernelArg(kernel, 3, sizeof(int), &n);
// Execute the kernel.
size_t global_work_size = n;
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
// Wait for the kernel to finish executing.
clFinish(queue);
// Read the output vector from the buffer.
clEnqueueReadBuffer(queue, y_buffer, CL_TRUE, 0, n * sizeof(float), y, 0, NULL, NULL);
// Release the resources.
clReleaseMemObject(x_buffer);
clReleaseMemObject(y_buffer);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
// Free the list of devices.
free(devices);
}
int main(int argc, char** argv) {
// Your code here
return 0;
}
that I am trying to compile with
cl /EHsc axpy.cpp /I"%CUDA_PATH%\include" /link "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/lib/x64"
however, I get the error message:
LINK : fatal error LNK1181: cannot open input file 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\lib\x64.obj'
I would appreciate it if you could help me know how to resolve the problem.
I think I have figured my problem out. I should have specified the libraries that I need to link to specifically, instead of just pointing to the directory. Here is a correct way to specify the link options for
cl.exe:In case we need to link with multiple libraries, we can specify them one after another:
and to get rid of the
warning, I simply added
at the top of the code.