Firefly开源社区

标题: Firefly rk3288 OpenCL [打印本页]

作者: bunchen    时间: 2015-10-9 09:46
标题: Firefly rk3288 OpenCL
本帖最后由 bunchen 于 2015-10-9 10:20 编辑

Firefly rk3288采用Mali-T764的GPU,该GPU支持OpeCL 1.1。下面一步一步介绍android下OpenCL开发。

1.首先我们需要OpenCL的头文件和库。头文件可以在khronos的网站上下载:https://www.khronos.org/registry/cl/ 由于Mali-T764支持OpenCL 1.1所以我们下载1.1版的头文件并放对位置。新建include目录,把opencl.h放在include目录下,把cl_d3d10.h 、 cl_ext.h 、 cl_gl_ext.h 、 cl_gl.h 、 cl.h 、 cl.hpp 、 cl_platform.h放到include/CL目录下。在OpenCL的库文件在firefly rk3288源代码的device/rockchip/common/gpu/libMali-T760/libGLES_mali.so。

2.然后可以在这基础下开发了。下面给出一个打印OpenCL信息的Demo。由于libGLES_mali.so并没有放入的android的/system/lib路径下,所以我们要在Demo的Android.mk文件中要对libGLES_mali.so做预编译,Android.mk文件如下:
  1. LOCAL_PATH := $(call my-dir)


  2. include $(CLEAR_VARS)
  3. LOCAL_PREBUILT_LIBS := \
  4.     ../opencl/lib/libGLES_mali.so
  5. include $(BUILD_MULTI_PREBUILT)

  6. include $(CLEAR_VARS)

  7. LOCAL_C_INCLUDES := \
  8.         $(LOCAL_PATH)/../opencl/include

  9. LOCAL_SRC_FILES := \
  10.         opencl-info.c
  11.         
  12. LOCAL_SHARED_LIBRARIES := \
  13.         libGLES_mali

  14. LOCAL_MODULE := opencl-info

  15. include $(BUILD_EXECUTABLE)
复制代码

在opencl-info.c中我们打印一些OpenCL的信息:
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <opencl.h>

  4. void printPlatformInfo(cl_int ret ,cl_uint i, const char* part , char* buf)
  5. {
  6.     if(ret==CL_SUCCESS){
  7.             printf("\t platform index=%d %s : %s\n",i,part,buf);
  8.     }else if(ret==CL_INVALID_PLATFORM){
  9.             printf("\t platform index=%d %s : invalid platform.\n",i,part);
  10.         }else if(ret==CL_INVALID_VALUE){
  11.             printf("\t platform index=%d %s : invalid value.\n",i,part);
  12.         }else if(ret==CL_OUT_OF_HOST_MEMORY){
  13.             printf("\t platform index=%d %s : out of host memory.\n",i,part);
  14.         }else{
  15.             printf("\t platform index=%d %s : i don't know why.\n",i,part);
  16.         }

  17. }

  18. void printDevice(cl_device_id device)
  19. {
  20.     char buf[128];
  21.     size_t size = 0 ;
  22.     cl_device_type type;
  23.     cl_int ret = CL_SUCCESS;
  24.     printf("\t   ######################\n");
  25.     ret = clGetDeviceInfo(device,CL_DEVICE_NAME,128,buf,&size);
  26.     if(ret==CL_SUCCESS){
  27.         printf("\t   device name=%s\n",buf);
  28.     }else{
  29.         printf("\t   get device name fail !\n");
  30.     }
  31.    
  32.     ret = clGetDeviceInfo(device,CL_DEVICE_TYPE,sizeof(cl_device_type),&type,&size);
  33.     if(ret==CL_SUCCESS){
  34.         printf("\t   device type=%lu\n",type);
  35.     }else{
  36.         printf("\t   get device type fail !\n");
  37.     }
  38.    
  39.    
  40.     ret = clGetDeviceInfo(device,CL_DEVICE_VENDOR,128,buf,&size);
  41.     if(ret==CL_SUCCESS){
  42.         printf("\t   device vendor=%s\n",buf);
  43.     }else{
  44.         printf("\t   get device vendor fail !\n");
  45.     }
  46.     ret = clGetDeviceInfo(device,CL_DRIVER_VERSION,128,buf,&size);
  47.     if(ret==CL_SUCCESS){
  48.         printf("\t   device version=%s\n",buf);
  49.     }else{
  50.         printf("\t   get device version fail !\n");
  51.     }
  52.     ret = clGetDeviceInfo(device,CL_DEVICE_PROFILE,128,buf,&size);
  53.     if(ret==CL_SUCCESS){
  54.         printf("\t   device profile=%s\n",buf);
  55.     }else{
  56.         printf("\t   get device profile fail !\n");
  57.     }
  58.     printf("\t   ######################\n");
  59. }

  60. void printDevices(cl_platform_id platform , cl_device_type device_type)
  61. {
  62.     cl_device_id devices[8];
  63.     cl_uint i ;
  64.     cl_uint num = 0 ;
  65.     cl_int ret = clGetDeviceIDs(platform,device_type,8,devices,&num);
  66.     if(ret==CL_SUCCESS){
  67.         printf("\t  getDeviceIDs success ! num=%d\n",num);
  68.         
  69.         for(i=0;i<num;i++){
  70.             printDevice(devices[i]);
  71.         }
  72.     }else{
  73.         printf("\t  getDeviceIDs fail !\n");
  74.     }
  75.    
  76. }

  77. void printOpenCL()
  78. {
  79.         cl_platform_id platforms[8];
  80.         cl_uint num = 0 ;
  81.         cl_uint i=0;
  82.         cl_int err = clGetPlatformIDs(8,platforms,&num);
  83.         if(err==CL_SUCCESS){
  84.                 printf("printOpenCL got %d platforms : \n",num);
  85.                 for(i=0;i<num;i++){
  86.                     char buf[128];
  87.                     size_t size = 0 ;
  88.                     cl_int ret = 0 ;
  89.                     
  90.                     ret = clGetPlatformInfo(platforms[i],CL_PLATFORM_PROFILE,128,buf,&size);
  91.                     printPlatformInfo(ret,i,"profile",buf);
  92.                
  93.                 ret = clGetPlatformInfo(platforms[i],CL_PLATFORM_VERSION,128,buf,&size);
  94.                     printPlatformInfo(ret,i,"version",buf);
  95.                     
  96.                 ret = clGetPlatformInfo(platforms[i],CL_PLATFORM_NAME,128,buf,&size);
  97.                 printPlatformInfo(ret,i,"name",buf);
  98.                     
  99.                     ret = clGetPlatformInfo(platforms[i],CL_PLATFORM_VENDOR,128,buf,&size);
  100.                 printPlatformInfo(ret,i,"vendor",buf);
  101.                
  102.                 ret = clGetPlatformInfo(platforms[i],CL_PLATFORM_EXTENSIONS,128,buf,&size);
  103.                 printPlatformInfo(ret,i,"extensions",buf);
  104.                
  105.                 printf("Device cpu:\n");
  106.                 printDevices(platforms[i],CL_DEVICE_TYPE_CPU);
  107.                
  108.                 printf("Device gpu:\n");
  109.                 printDevices(platforms[i],CL_DEVICE_TYPE_GPU);
  110.                
  111.                 printf("Device accelerator:\n");
  112.                 printDevices(platforms[i],CL_DEVICE_TYPE_ACCELERATOR);
  113.                
  114.                 printf("Device default:\n");
  115.                 printDevices(platforms[i],CL_DEVICE_TYPE_DEFAULT);
  116.                
  117.                 printf("Device all :\n");
  118.                 printDevices(platforms[i],CL_DEVICE_TYPE_ALL);
  119.                
  120.                 }
  121.         }else if(err==CL_INVALID_VALUE){
  122.                 printf("printOpenCL invalid value.\n");
  123.         }else if(err==CL_OUT_OF_HOST_MEMORY){
  124.                 printf("printOpenCL out of host memory.\n");
  125.         }else{
  126.                 printf("printOpenCL i don't know why.\n");
  127.         }

  128. }


  129. int main(){
  130.         printOpenCL();
  131.         return 0;
  132. }
复制代码
这里打印的信息请参考opencl文档 : https://www.khronos.org/registry/cl/specs/opencl-1.1.pdf
把编译好的opencl-info push到/system/bin目录下,把libGLES_mali.so push到/system/lib目录下,就可以在adb shell中运行opencl-info了。


从打印的信息可以看到,rk3288支持OpenCL 1.1,支持的设备是gpu的Mali-T764。

Android-OpenCL-v1.1.zip

42.56 KB, 下载次数: 127, 下载积分: 灯泡 -1 , 经验 -1

把libGLES_mali.so放到opencl/lib目录下


作者: bunchen    时间: 2015-10-9 10:08
本帖最后由 bunchen 于 2015-10-28 16:23 编辑

这里贴出一个简单的向量加法的例子,和容易出错的地方。先上代码:
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <opencl.h>

  4. void printDeviceWorkInfo(cl_device_id device)
  5. {
  6.     cl_uint nMaxComputeUnits = 0;
  7.     cl_uint nMaxWorkItemDims = 0;
  8.     cl_uint i = 0;
  9.     size_t* nMaxWorkItemSizes = NULL;
  10.     size_t nMaxWorkGroupSize = 0;
  11.     size_t size = 0 ;
  12.     cl_int err ;
  13.     err = clGetDeviceInfo(device,CL_DEVICE_MAX_COMPUTE_UNITS,sizeof(cl_uint),&nMaxComputeUnits,&size);
  14.     if(err==CL_SUCCESS){
  15.         printf("nMaxComputeUnits=%d\n",nMaxComputeUnits);
  16.     }
  17.    
  18.     err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(cl_uint),&nMaxWorkItemDims,&size);
  19.     if(err==CL_SUCCESS){
  20.         printf("nMaxWorkItemDims=%d\n",nMaxWorkItemDims);
  21.         nMaxWorkItemSizes = (size_t*)malloc(sizeof(size_t)*nMaxWorkItemDims);
  22.         err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*nMaxWorkItemDims,nMaxWorkItemSizes,&size);
  23.         if(err==CL_SUCCESS){
  24.             for(i=0;i<nMaxWorkItemDims;i++){
  25.                 printf("nMaxWorkItemSizes[%d]=%d\n",i,nMaxWorkItemSizes[i]);
  26.             }
  27.         }
  28.         free(nMaxWorkItemSizes);
  29.     }
  30.    
  31.     err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),&nMaxWorkGroupSize,&size);
  32.     if(err==CL_SUCCESS){
  33.         printf("nMaxWorkGroupSize=%d\n",nMaxWorkGroupSize);
  34.     }
  35. }

  36. const char* program_src = ""
  37. "__kernel void vector_add_gpu (__global const float* src_a,\n"
  38. "   __global const float* src_b,\n"
  39. "   __global float* res,\n"
  40. "   const int num)\n"
  41. "{\n"
  42. "   int idx = get_global_id(0);\n"
  43. "   if(idx<num){"
  44. "       res[idx]=src_a[idx]+src_b[idx];\n"
  45. "   }\n"
  46. "}\n"
  47. ;

  48. static const cl_int vect_len = 10000000;

  49. static float* vect_a = NULL ;
  50. static float* vect_b = NULL ;
  51. static float* vect_c = NULL ;

  52. void initVects()
  53. {
  54.     cl_int i;
  55.     vect_a = (float*)malloc(sizeof(float)*vect_len);
  56.     vect_b = (float*)malloc(sizeof(float)*vect_len);
  57.     vect_c = (float*)malloc(sizeof(float)*vect_len);
  58.     for(i=0;i<vect_len;i++){
  59.         vect_a[i]=(float)rand()/RAND_MAX;
  60.         vect_b[i]=(float)rand()/RAND_MAX;
  61.         vect_c[i]=0.0f;
  62.     }
  63. }

  64. void printVects()
  65. {
  66.     cl_int i;
  67.     if(vect_a && vect_b && vect_c){
  68.         printf("######################\n");
  69.         for(i=0;i<4;i++){
  70.             printf("%08d : %f,%f,%f\n",i,vect_a[i],vect_b[i],vect_c[i]);
  71.         }
  72.         printf("    ...    \n");
  73.         for(i=vect_len-4;i<vect_len;i++){
  74.             printf("%08d : %f,%f,%f\n",i,vect_a[i],vect_b[i],vect_c[i]);
  75.         }
  76.         printf("######################\n");
  77.     }
  78. }

  79. void releaseVects()
  80. {
  81.     if(vect_a){
  82.         free(vect_a);
  83.         vect_a=NULL;
  84.     }
  85.     if(vect_b){
  86.         free(vect_b);
  87.         vect_b=NULL;
  88.     }
  89.     if(vect_c){
  90.         free(vect_c);
  91.         vect_c=NULL;
  92.     }
  93. }

  94. size_t shrRoundUp(size_t f , size_t s)
  95. {
  96.     return (s+f-1)/f*f;
  97. }



  98. void test()
  99. {
  100.     cl_int error = 0 ;
  101.     cl_platform_id platform;
  102.     cl_context context;
  103.     cl_command_queue queue;
  104.     cl_device_id device;
  105.     cl_mem inbuf_a ;
  106.     cl_mem inbuf_b ;
  107.     cl_mem outbuf_r ;
  108.     const cl_int size = vect_len;
  109.     cl_int i ;
  110.     const size_t mem_size = sizeof(float)*size;
  111.     size_t program_len = strlen(program_src);
  112.     char build_log[1024];
  113.     size_t log_size;
  114.     size_t local_ws;
  115.     size_t global_ws;
  116.     cl_kernel vector_add_kernel;
  117.    
  118.     error = clGetPlatformIDs(1,&platform,NULL);
  119.     if(error != CL_SUCCESS){
  120.         printf("get platform id fail !\n");
  121.         exit(1);
  122.     }
  123.    
  124.     error = clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);
  125.     if(error != CL_SUCCESS){
  126.         printf("get gpu device fail !\n");
  127.         exit(1);
  128.     }
  129.    
  130.     printDeviceWorkInfo(device);
  131.    
  132.     cl_context_properties properties[]={
  133.         CL_CONTEXT_PLATFORM,
  134.         (cl_context_properties)platform,
  135.         0
  136.     };
  137.    
  138.     // 这里要配置properties
  139.     context = clCreateContext(properties,1,&device,NULL,NULL,&error);
  140.     if(error != CL_SUCCESS){
  141.         printf("create context fail !\n");
  142.         exit(1);
  143.     }
  144.    
  145.     queue = clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,&error);
  146.     if(error != CL_SUCCESS){
  147.         printf("create command queue fail !\n");
  148.         exit(1);
  149.     }
  150.    
  151.     initVects();
  152.     printVects();
  153.    
  154.     inbuf_a = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_a,&error);
  155.     if(error!=CL_SUCCESS){
  156.         printf("create buffer inbuf_a fail !\n");
  157.         exit(1);
  158.     }
  159.     inbuf_b = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_b,&error);
  160.     if(error!=CL_SUCCESS){
  161.         printf("create buffer inbuf_b fail !\n");
  162.         exit(1);
  163.     }
  164.     outbuf_r = clCreateBuffer(context,CL_MEM_WRITE_ONLY,mem_size,NULL,&error);
  165.     if(error!=CL_SUCCESS){
  166.         printf("create buffer outbuf_r fail !\n");
  167.         exit(1);
  168.     }
  169.    
  170.     cl_program program = clCreateProgramWithSource(context,1,&program_src,&program_len,&error);
  171.     if(error!=CL_SUCCESS){
  172.         printf("create program fail !\n");
  173.         exit(1);
  174.     }
  175.     error = clBuildProgram(program,1,&device,NULL,NULL,NULL);
  176.     if(error!=CL_SUCCESS){
  177.         printf("build program fail !\n");
  178.         clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,1024,build_log,&log_size);
  179.         printf("build_log : %s\n",build_log);
  180.         exit(1);
  181.     }
  182.    
  183.     vector_add_kernel = clCreateKernel(program,"vector_add_gpu",&error);
  184.     if(error!=CL_SUCCESS){
  185.         printf("create kernel fail !\n");
  186.         exit(1);
  187.     }
  188.    
  189.     error = clSetKernelArg(vector_add_kernel,0,sizeof(cl_mem),&inbuf_a);
  190.     error |= clSetKernelArg(vector_add_kernel,1,sizeof(cl_mem),&inbuf_b);
  191.     error |= clSetKernelArg(vector_add_kernel,2,sizeof(cl_mem),&outbuf_r);
  192.     error |= clSetKernelArg(vector_add_kernel,3,sizeof(cl_int),&size);
  193.     if(error!=CL_SUCCESS){
  194.         printf("set kernel arg fail !\n");
  195.         exit(1);
  196.     }
  197.    
  198.     local_ws = 256;  //我们使用一维的clEnqueueNDRangeKernel,这里local_ws选择nMaxWorkItemSizes[0]=256
  199.     global_ws = shrRoundUp(local_ws,size); //这里是线程总数,应该是local_ws的倍数。
  200.     printf("local_ws=%d,global_ws=%d\n",local_ws,global_ws);
  201.    
  202.     error = clEnqueueNDRangeKernel(queue,vector_add_kernel,1,NULL,&global_ws,&local_ws,0,NULL,NULL);
  203.     if(error!=CL_SUCCESS){
  204.         printf("enqueue kernel fail !\n");
  205.         exit(1);
  206.     }
  207.    
  208.     clEnqueueReadBuffer(queue,outbuf_r,CL_TRUE,0,mem_size,vect_c,0,NULL,NULL);
  209.     printVects();
  210.    
  211.     clReleaseKernel(vector_add_kernel);
  212.     clReleaseProgram(program);
  213.     clReleaseCommandQueue(queue);
  214.     clReleaseContext(context);
  215.     clReleaseMemObject(inbuf_a);
  216.     clReleaseMemObject(inbuf_b);
  217.     clReleaseMemObject(outbuf_r);
  218.     releaseVects();
  219. }

  220. int main(){
  221.         test();
  222.         return 0;
  223. }
复制代码

这里说一下容易出错的地方:
(1)clCreateContext,网上有些例子把第一个参数置成0,经试验这样不行,要设置properties。
(2)clEnqueueNDRangeKernel的global_work_size和local_work_size。我们使用一维的clEnqueueNDRangeKernel,这里local_work_size选择nMaxWorkItemSizes[0]=256,global_work_size是线程总数,应该是local_ws的倍数。
一维情况下:


二维情况下:



(3)kernel代码中
size_t get_global_id (uint dimindx)
返回这个线程的global_id,参数是维度索引,我们一维的情况下参数是0。


最后运行结果:


在加一个nv12转rgb的例子:
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <opencl.h>
  4. #include <sys/time.h>

  5. void printDeviceWorkInfo(cl_device_id device)
  6. {
  7.     cl_uint nMaxComputeUnits = 0;
  8.     cl_uint nMaxWorkItemDims = 0;
  9.     cl_uint i = 0;
  10.     size_t* nMaxWorkItemSizes = NULL;
  11.     size_t nMaxWorkGroupSize = 0;
  12.     size_t size = 0 ;
  13.     cl_int err ;
  14.     err = clGetDeviceInfo(device,CL_DEVICE_MAX_COMPUTE_UNITS,sizeof(cl_uint),&nMaxComputeUnits,&size);
  15.     if(err==CL_SUCCESS){
  16.         printf("nMaxComputeUnits=%d\n",nMaxComputeUnits);
  17.     }
  18.    
  19.     err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(cl_uint),&nMaxWorkItemDims,&size);
  20.     if(err==CL_SUCCESS){
  21.         printf("nMaxWorkItemDims=%d\n",nMaxWorkItemDims);
  22.         nMaxWorkItemSizes = (size_t*)malloc(sizeof(size_t)*nMaxWorkItemDims);
  23.         err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*nMaxWorkItemDims,nMaxWorkItemSizes,&size);
  24.         if(err==CL_SUCCESS){
  25.             for(i=0;i<nMaxWorkItemDims;i++){
  26.                 printf("nMaxWorkItemSizes[%d]=%d\n",i,nMaxWorkItemSizes[i]);
  27.             }
  28.         }
  29.         free(nMaxWorkItemSizes);
  30.     }
  31.    
  32.     err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),&nMaxWorkGroupSize,&size);
  33.     if(err==CL_SUCCESS){
  34.         printf("nMaxWorkGroupSize=%d\n",nMaxWorkGroupSize);
  35.     }
  36. }

  37. const char* program_src = ""
  38. "__kernel void nv12_to_rgb (__global const unsigned char* nv12,\n"
  39. "   __global unsigned char* rgb,\n"
  40. "   const int width,\n"
  41. "   const int height)\n"
  42. "{\n"
  43. "   int idi = get_global_id(0);\n"
  44. "   int idj = get_global_id(1);\n"
  45. "   int k = 0 ;\n"
  46. "   int y = 0 ;\n"
  47. "   int u = 0 ;\n"
  48. "   int v = 0 ;\n"
  49. "   int t = 0 ;\n"
  50. "   if(idi<width && idj<height){\n"
  51. "       y=nv12[idj*width+idi];\n"
  52. "       k=width*height+((idj/2)*(width/2)+idi/2)*2;\n"
  53. "       u=nv12[k];\n"
  54. "       v=nv12[k+1];\n"
  55. "       k=(idj*width+idi)*3;\n"
  56. "       t=(int)(y+1.370705*v-175.4502);\n"
  57. "       rgb[k]=t>255?255:t<0?0:t;\n"
  58. "       t=(int)(y-0.698001*v-0.337633*u+132.56124);\n"
  59. "       rgb[k+1]=t>255?255:t<0?0:t;\n"
  60. "       t=(int)(y+1.732446*u-221.7531);"
  61. "       rgb[k+2]=t>255?255:t<0?0:t;\n"
  62. "   }\n"
  63. "}\n"
  64. ;

  65. size_t shrRoundUp(size_t f , size_t s)
  66. {
  67.     return (s+f-1)/f*f;
  68. }

  69. void nv12_to_rgb(uint8_t* rgb , uint8_t* nv12 , int width , int height)
  70. {
  71.     cl_int error = 0 ;
  72.     cl_platform_id platform;
  73.     cl_context context;
  74.     cl_command_queue queue;
  75.     cl_device_id device;
  76.     cl_mem inbuf_nv12 ;
  77.     cl_mem outbuf_rgb ;
  78.     cl_int i ;
  79.     size_t program_len = strlen(program_src);
  80.     char build_log[1024];
  81.     size_t log_size;
  82.     size_t local_ws;
  83.     size_t global_ws;
  84.     cl_kernel nv12_to_rgb;
  85.    
  86.     cl_int nv12_size = width*height*3/2;
  87.     cl_int rgb_size = width*height*3;
  88.    
  89.     error = clGetPlatformIDs(1,&platform,NULL);
  90.     if(error != CL_SUCCESS){
  91.         printf("get platform id fail !\n");
  92.         exit(1);
  93.     }
  94.    
  95.     error = clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);
  96.     if(error != CL_SUCCESS){
  97.         printf("get gpu device fail !\n");
  98.         exit(1);
  99.     }
  100.    
  101.     printDeviceWorkInfo(device);
  102.    
  103.    
  104.     cl_context_properties properties[]={
  105.         CL_CONTEXT_PLATFORM,
  106.         (cl_context_properties)platform,
  107.         0
  108.     };
  109.    
  110.     // 这里要配置properties
  111.     context = clCreateContext(properties,1,&device,NULL,NULL,&error);
  112.     if(error != CL_SUCCESS){
  113.         printf("create context fail !\n");
  114.         exit(1);
  115.     }
  116.    
  117.     queue = clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,&error);
  118.     if(error != CL_SUCCESS){
  119.         printf("create command queue fail !\n");
  120.         exit(1);
  121.     }
  122.    
  123.     cl_program program = clCreateProgramWithSource(context,1,&program_src,&program_len,&error);
  124.     if(error!=CL_SUCCESS){
  125.         printf("create program fail !\n");
  126.         exit(1);
  127.     }
  128.     error = clBuildProgram(program,1,&device,NULL,NULL,NULL);
  129.     if(error!=CL_SUCCESS){
  130.         printf("build program fail !\n");
  131.         clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,1024,build_log,&log_size);
  132.         printf("build_log : %s\n",build_log);
  133.         exit(1);
  134.     }
  135.    
  136.     nv12_to_rgb = clCreateKernel(program,"nv12_to_rgb",&error);
  137.     if(error!=CL_SUCCESS){
  138.         printf("create kernel fail !\n");
  139.         exit(1);
  140.     }

  141.     struct timeval val;
  142.     gettimeofday(&val,NULL);
  143.     long t1 = val.tv_sec*1000000 + val.tv_usec;

  144.     inbuf_nv12 = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,nv12_size,nv12,&error);
  145.     if(error!=CL_SUCCESS){
  146.         printf("create buffer inbuf_a fail !\n");
  147.         exit(1);
  148.     }
  149.     outbuf_rgb = clCreateBuffer(context,CL_MEM_WRITE_ONLY,rgb_size,NULL,&error);
  150.     if(error!=CL_SUCCESS){
  151.         printf("create buffer outbuf_r fail !\n");
  152.         exit(1);
  153.     }
  154.    
  155.     error = clSetKernelArg(nv12_to_rgb,0,sizeof(cl_mem),&inbuf_nv12);
  156.     error |= clSetKernelArg(nv12_to_rgb,1,sizeof(cl_mem),&outbuf_rgb);
  157.     error |= clSetKernelArg(nv12_to_rgb,2,sizeof(cl_int),&width);
  158.     error |= clSetKernelArg(nv12_to_rgb,3,sizeof(cl_int),&height);
  159.    
  160.     if(error!=CL_SUCCESS){
  161.         printf("set kernel arg fail !\n");
  162.         exit(1);
  163.     }
  164.    
  165.     size_t lws[2] = {16,16};
  166.     size_t gws[2];
  167.     gws[0] = shrRoundUp(lws[0],width);
  168.     gws[1] = shrRoundUp(lws[1],height);
  169.     printf("lws={%d,%d},gws={%d,%d}\n",lws[0],lws[1],gws[0],gws[1]);
  170.    
  171.     error = clEnqueueNDRangeKernel(queue,nv12_to_rgb,2,NULL,gws,lws,0,NULL,NULL);
  172.     if(error!=CL_SUCCESS){
  173.         printf("enqueue kernel fail !\n");
  174.         exit(1);
  175.     }
  176.    
  177.     clEnqueueReadBuffer(queue,outbuf_rgb,CL_TRUE,0,rgb_size,rgb,0,NULL,NULL);
  178.    

  179.     clReleaseMemObject(inbuf_nv12);
  180.     clReleaseMemObject(outbuf_rgb);   

  181.    
  182.     gettimeofday(&val,NULL);
  183.     long t2 = val.tv_sec*1000000 + val.tv_usec;

  184.     printf("nv12_to_rgb spend %ld (us)\n",t2-t1);

  185.     clReleaseKernel(nv12_to_rgb);
  186.     clReleaseProgram(program);
  187.     clReleaseCommandQueue(queue);
  188.     clReleaseContext(context);
  189. }

  190. int main(){
  191.     uint8_t* nv12;
  192.     uint8_t* rgb;
  193.     int width=1920;
  194.     int height=1080;
  195.     int i,j,k;
  196.     nv12=(uint8_t*)malloc(width*height*3/2);
  197.     rgb=(uint8_t*)malloc(width*height*3);
  198.    
  199.     memset(nv12,0,width*height*3/2);
  200.    
  201.    
  202.     memset(rgb,0,width*height*3);
  203.     struct timeval val;
  204.     gettimeofday(&val,NULL);
  205.     long t1 = val.tv_sec*1000000 + val.tv_usec;
  206.         nv12_to_rgb(rgb,nv12,width,height);
  207.     gettimeofday(&val,NULL);
  208.     long t2 = val.tv_sec*1000000 + val.tv_usec;
  209.     printf("nv12_to_rgb spend %ld (us)\n",t2-t1);
  210.         for(j=0;j<8;j++){
  211.             for(i=0;i<8;i++){
  212.                 k=(j*width+i)*3;
  213.                 printf("(%02x,%02x,%02x) ",rgb[k],rgb[k+1],rgb[k+2]);
  214.             }
  215.             printf("\n");
  216.             }
  217.         printf("......\n");
  218.         for(j=height-8;j<height;j++){
  219.             for(i=width-8;i<width;i++){
  220.                 k=(j*width+i)*3;
  221.                 printf("(%02x,%02x,%02x) ",rgb[k],rgb[k+1],rgb[k+2]);
  222.             }
  223.             printf("\n");
  224.             }

  225.         
  226.         free(nv12);
  227.         free(rgb);
  228.         return 0;
  229. }
复制代码

这是一个二维的例子,local_work_item[0]*local_work_item[1]要小于最大的workitem数,即 local_work_item[0]*local_work_item[1]<256,所以取
local_work_item[0]=local_work_item[1]=16
作者: duoduomu    时间: 2015-10-9 10:15
bunchen 发表于 2015-10-9 10:08
二楼待用

不错,不过还可以参考http://malideveloper.arm.com/dow ... .0.0a36a7_Linux.tgz 这个SDK,里面都有,还有demo!
作者: bunchen    时间: 2015-10-9 10:25
duoduomu 发表于 2015-10-9 10:15
不错,不过还可以参考http://malideveloper.arm.com/downloads/tools/oclsdk/Mali_OpenCL_SDK_v1.1.0.0a3 ...

谢谢,之前一直想下载mali的sdk来看,奈何下载页面总是进不去,谢谢分享。
作者: duoduomu    时间: 2015-10-9 11:01
bunchen 发表于 2015-10-9 10:25
谢谢,之前一直想下载mali的sdk来看,奈何下载页面总是进不去,谢谢分享。

哈哈 的确你按常理下载是不行的 !
作者: fxlsunny    时间: 2015-10-15 14:50
不错正打算做opencl的事,不过想在Ubuntu下做,不知3288是否可行?
作者: ff_20150814    时间: 2015-12-7 15:30
楼主大神你好,请问你的那个NV12转RGB耗时多少呢,GPU的频率是跑的多少M?
作者: tk1user    时间: 2016-3-23 09:36
太厉害了,顶起!
作者: 总有刁民想害朕    时间: 2016-4-26 09:43
printOpenCL got 1 platforms :
         platform index=0 profile : FULL_PROFILE
         platform index=0 version : OpenCL 1.1 v1.r6p0-02rel0.0f4218be5cc66c20a4
f31b6cc856ee46
         platform index=0 name : ARM Platform
         platform index=0 vendor : ARM
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
         platform index=0 extensions : out of host memory.
Device cpu:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
          getDeviceIDs fail !
Device gpu:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
          getDeviceIDs fail !
Device accelerator:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
          getDeviceIDs fail !
Device default:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
          getDeviceIDs fail !
Device all :
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
          getDeviceIDs fail !
作者: Eric.y    时间: 2016-4-30 16:59
duoduomu 发表于 2015-10-9 10:15
不错,不过还可以参考http://malideveloper.arm.com/downloads/tools/oclsdk/Mali_OpenCL_SDK_v1.1.0.0a3 ...

Android-OpenCL-v1.1.zip和ARM官网的SDK一样吗,我没灯泡下载不了
作者: Eric.y    时间: 2016-4-30 17:00
duoduomu 发表于 2015-10-9 10:15
不错,不过还可以参考http://malideveloper.arm.com/downloads/tools/oclsdk/Mali_OpenCL_SDK_v1.1.0.0a3 ...

Android-OpenCL-v1.1.zip和ARM官网的SDK一样吗,我没灯泡下载不了
作者: jerry    时间: 2016-5-7 20:49
这个功能大有用途:)
作者: xindongzhang    时间: 2017-6-20 11:49
请教一下各位,编译的时候出现 error: undefined reference to 'clGetPlatformInfo',是什么问题呢?
作者: dianziit    时间: 2017-6-20 17:19
不错正打算做opencl的事,不过想在Ubuntu下做,不知3288是否可行?
作者: llwgg    时间: 2018-7-25 16:00
谢谢分享
作者: lugq    时间: 2019-4-18 19:00
我可以下载吗
作者: 爱吃小浣熊的土    时间: 2019-6-4 08:53
为什么我下载不了?
作者: 咖喱给给    时间: 2019-12-5 15:09
这个在哪能下载呢?

作者: qinqin04    时间: 2019-12-10 15:48
下载要灯泡?
作者: 大橘子    时间: 2020-1-8 16:40
这个怎么用啊
作者: diaodiao    时间: 2020-4-22 10:37
很好的分享
作者: 高丰考博    时间: 2020-9-18 04:16
好东西呀
作者: dongwuzhe    时间: 2022-6-19 10:51
强无敌




欢迎光临 Firefly开源社区 (https://dev.t-firefly.com/) Powered by Discuz! X3.1