白嫖来的C端代码:
matrix.c:
#include#include #include #include #pragma warning( disable : 4996 ) int main() { cl_int error; cl_platform_id platforms; cl_device_id devices; cl_context context; FILE *program_handle; size_t program_size; char *program_buffer; cl_program program; size_t log_size; char *program_log; char kernel_name[] = "createBuffer"; cl_kernel kernel; cl_command_queue queue; //获取平台 error = clGetPlatformIDs(1, &platforms, NULL); if (error != 0) { printf("Get platform failed!"); return -1; } //获取设备 error = clGetDeviceIDs(platforms, CL_DEVICE_TYPE_GPU, 1, &devices, NULL); if (error != 0) { printf("Get device failed!"); return -1; } //创建上下文 context = clCreateContext(NULL,1,&devices,NULL,NULL,&error); if (error != 0) { printf("Creat context failed!"); return -1; } //创建程序;注意要用"rb" program_handle = fopen("kernel.cl","rb"); if (program_handle == NULL) { printf("The kernle can not be opened!"); return -1; } fseek(program_handle,0,SEEK_END); program_size = ftell(program_handle); rewind(program_handle); program_buffer = (char *)malloc(program_size+1); program_buffer[program_size] = ' '; error=fread(program_buffer,sizeof(char),program_size,program_handle); if (error == 0) { printf("Read kernel failed!"); return -1; } fclose(program_handle); program = clCreateProgramWithSource(context,1,(const char **)&program_buffer, &program_size,&error); if (error < 0) { printf("Couldn't create the program!"); return -1; } //编译程序 error = clBuildProgram(program,1,&devices,NULL,NULL,NULL); if (error < 0) { //确定日志文件的大小 clGetProgramBuildInfo(program,devices,CL_PROGRAM_BUILD_LOG,0,NULL,&log_size); program_log = (char *)malloc(log_size+1); program_log[log_size] = ' '; //读取日志 clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG, log_size+1, program_log, NULL); printf("%sn",program_log); free(program_log); return -1; } free(program_buffer); //创建命令队列 queue = clCreateCommandQueue(context, devices, CL_QUEUE_PROFILING_ENABLE, &error); if (error < 0) { printf("Coudn't create the command queue"); return -1; } //创建内核 kernel = clCreateKernel(program,kernel_name,&error); if (kernel==NULL) { printf("Couldn't create kernel!n"); return -1; } //初始化参数 float result[100]; float a_in[100]; float b_in[100]; for (int i = 0; i < 100; i++) { a_in[i] = i; b_in[i] = i*2.0; } //创建缓存对象 cl_mem memObject1 = clCreateBuffer(context,CL_MEM_READ_onLY|CL_MEM_COPY_HOST_PTR,sizeof(float)*100,a_in,&error); if (error < 0) { printf("Creat memObject1 failed!n"); return -1; } cl_mem memObject2 = clCreateBuffer(context, CL_MEM_READ_onLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * 100, b_in, &error); if (error < 0) { printf("Creat memObject2 failed!n"); return -1; } cl_mem memObject3 = clCreateBuffer(context, CL_MEM_WRITE_onLY , sizeof(float) * 100, NULL, &error); if (error < 0) { printf("Creat memObject3 failed!n"); return -1; } //设置内核参数 error = clSetKernelArg(kernel,0,sizeof(cl_mem),&memObject1); error|= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObject2); error |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObject3); if (error != CL_SUCCESS) { printf("Error setting kernel arguments!n"); return -1; } //执行内核 size_t globalWorkSize[1] = {100}; size_t localWorkSize[1] = {1}; error = clEnqueueNDRangeKernel(queue,kernel,1,NULL,globalWorkSize, localWorkSize,0,NULL,NULL); if (error != CL_SUCCESS) { printf("Error queuing kernel for execution!n"); return -1; } //读取执行结果 error = clEnqueueReadBuffer(queue,memObject3,CL_TRUE,0,100*sizeof(float), result,0,NULL,NULL); if (error != CL_SUCCESS) { printf("Error reading result buffer!n"); return -1; } //显示结果 for (int i = 0; i < 100; i++) { printf("%f ",result[i]); } printf("n"); //释放资源 clReleaseDevice(devices); clReleaseContext(context); clReleaseCommandQueue(queue); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseMemObject(memObject1); clReleaseMemObject(memObject2); clReleaseMemObject(memObject3); return 0; }
OpenCL代码:
kernel.cl:
__kernel void createBuffer(__global const float *a_in,
__global const float *b_in,
__global float *result) {
int gid = get_global_id(0);
result[gid] = a_in[gid] + b_in[gid];
}
执行逻辑是,用opencl开发的kernel.cl程序,在HOST端的C程序中被动态加载,运行时编译,投递到GPU中运行,跑出结果后在从GPU MEM中读回来打印。
验证:
结合a_in,b_in初始化值和kernel.cl的逻辑,可以知道正确的结果应该是首项为0,公差为3的等差数列,我们编译运行,看一下结果是否符合我们预期:
编译命令:
gcc -I/usr/local/cuda-11.5/targets/x86_64-linux/include matrix.c -o main -L/usr/local/cuda-11.5/targets/x86_64-linux/lib/ -lOpenCL
符合预期,说明程序是对的。
结束!


