美文网首页
OpenCL_LAB2

OpenCL_LAB2

作者: Bing2464 | 来源:发表于2017-04-15 20:57 被阅读0次

    运行hello.cpp & 运行vadd.cpp

    矩阵乘法

    #include <iostream>
    #include <fstream>
    #include <cmath>
    #include <cstring>
    #if defined(__APPLE__) || defined(__MACOSX)
    #include <OpenCL/cl.h>
    #else
    #include <CL/cl.h>
    #endif
    
    // 把文本文件读入一个 string 中
    int convertToString(const char *filename, std::string &s) {
        size_t size;
        char *str;
    
        std::fstream f(filename, (std::fstream::in | std::fstream::binary));
    
        if (f.is_open()) {
            size_t fileSize;
            f.seekg(0, std::fstream::end);
            size = fileSize = (size_t) f.tellg();
            f.seekg(0, std::fstream::beg);
    
            str = new char[size + 1];
    
            f.read(str, fileSize);
            f.close();
            str[size] = '\0';
    
            s = str;
            delete[] str;
            return 0;
        }
        printf("Error: Failed to open file %s\n", filename);
        return 1;
    }
    
    
    int main(int argc, char *argv[]) {
        double cputime, gputime;
        clock_t timestamp;
    
        const int W = 100;
        const int mat_size = W * W;
    
        // 在 host 内存中创建三个缓冲区
        float *const buf1 = (float *) malloc(mat_size * sizeof(float));
        float *const buf2 = (float *) malloc(mat_size * sizeof(float));
        float *const buf = (float *) malloc(mat_size * sizeof(float));
        float *const op_data = (float *) malloc(mat_size * sizeof(float));
    
        // 初始化矩阵
        srand((unsigned int) time(NULL));
        for (int i = 0; i < mat_size; i++)
            buf1[i] = float(rand() % 1000) * M_PI;
    
        srand((unsigned int) time(NULL) + 1000);
        for (int i = 0; i < mat_size; i++)
            buf2[i] = float(rand() % 1000) * M_PI;
    
        // 时间戳
        timestamp = clock();
    
        for (int i = 0; i < mat_size; i++) {
            float tmp = 0.0;
            for (int k = 0; k < W; k++)
                tmp += buf1[i * W + k] * buf2[k * W + i];
            buf[i * W + i] = tmp;
        }
    
        cputime = (double) (clock() - timestamp) / CLOCKS_PER_SEC * 1000;
        printf("串行执行时间:%8.3f ms\n", cputime);
    
        cl_platform_id platform;
        cl_event prof_event;
    
        // 创建平台对象
        clGetPlatformIDs(1, &platform, NULL);
        cl_device_id device;
    
        // 创建 GPU 设备
        clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
        // 创建 context
        cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
        // 创建命令队列
        cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL);
    
        // 创建三个 OpenCL 内存对象
        cl_mem objects[3];
        objects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * mat_size, buf1,
                                       NULL);
        objects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * mat_size, buf2,
                                       NULL);
        objects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * mat_size, buf,
                                       NULL);
    
        const char *filename = "mul.cl";
        std::string sourceStr;
        convertToString(filename, sourceStr);
        const char *source = sourceStr.c_str();
        size_t sourceSize[] = {strlen(source)};
    
        cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
        // 编译程序对象
        clBuildProgram(program, 1, &device, NULL, NULL, NULL);
    
        // 创建 Kernel 对象
        cl_kernel kernel = clCreateKernel(program, "matrix_mult", NULL);
    
        // 设置 Kernel 参数
        clSetKernelArg(kernel, 0, sizeof(int), &W);
        clSetKernelArg(kernel, 1, sizeof(cl_mem), &objects[0]);
        clSetKernelArg(kernel, 2, sizeof(cl_mem), &objects[1]);
        clSetKernelArg(kernel, 3, sizeof(cl_mem), &objects[2]);
    
    
        //执行 kernel
        cl_ulong ev_start_time = (cl_ulong) 0;
        cl_ulong ev_end_time = (cl_ulong) 0;
        size_t global[1];
        global[0] = (size_t) W;
        clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, NULL, 0, NULL, &prof_event);
    
        clFinish(queue);
    
        //读取时间
        clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &ev_start_time, NULL);
        clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &ev_end_time, NULL);
        gputime = (double) (ev_end_time - ev_start_time) * 1e-6;
        printf("OpenCL 执行时间:%8.3f ms\n", gputime);
    
        //数据拷回 host 内存
        clEnqueueReadBuffer(queue, objects[2], CL_TRUE, 0, sizeof(float) * mat_size, op_data, 0, NULL, NULL);
    
        // 验证 GPU 计算结果
        for (int i = 0; i < mat_size; i++) {
            if (fabs(buf[i] - op_data[i]) > 0.0001) {
                printf("check failed\n");
                break;
            }
        }
    
        if (buf1) 
            free(buf1);
        if (buf2) 
            free(buf2);
        if (buf) 
            free(buf);
        if (op_data) 
            free(op_data);
    
        // 删除 OpenCL 资源对象
        clReleaseMemObject(objects[2]);
        clReleaseMemObject(objects[1]);
        clReleaseMemObject(objects[0]);
        clReleaseProgram(program);
        clReleaseCommandQueue(queue);
        clReleaseContext(context);
    
        return 0;
    }
    
    

    相关文章

      网友评论

          本文标题:OpenCL_LAB2

          本文链接:https://www.haomeiwen.com/subject/dxjcattx.html