oneDNN/v1.0/gpu_opencl_interop_8cpp-example.html

/*******************************************************************************
* Copyright 2019 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
// [Prologue]
#include <CL/cl.h>
#include <mkldnn.hpp>
// Optional header to access debug functions like `mkldnn_status2str()`
#include "mkldnn_debug.h"
#include <iostream>
#include <numeric>
#include <sstream>
using namespace mkldnn;
using namespace std;
// [Prologue]
#define OCL_CHECK(x)                                                      \
    do {                                                                  \
        cl_int s = (x);                                                   \
        if (s != CL_SUCCESS) {                                            \
            printf("OpenCL error: %d at %s:%d\n", s, __FILE__, __LINE__); \
            exit(1);                                                      \
        }                                                                 \
    } while (0)
cl_kernel create_init_opencl_kernel(
        cl_context ocl_ctx, const char *kernel_name, const char *ocl_code) {
    cl_int err;
    const char *sources[] = { ocl_code };
    cl_program ocl_program
            = clCreateProgramWithSource(ocl_ctx, 1, sources, nullptr, &err);
    OCL_CHECK(err);
    OCL_CHECK(
            clBuildProgram(ocl_program, 0, nullptr, nullptr, nullptr, nullptr));
    cl_kernel ocl_kernel = clCreateKernel(ocl_program, kernel_name, &err);
    OCL_CHECK(err);
    OCL_CHECK(clReleaseProgram(ocl_program));
    return ocl_kernel;
}
void gpu_opencl_interop_tutorial() {
    // [Initialize engine]
    engine eng(engine::kind::gpu, 0);
    // [Initialize engine]
    // [Initialize stream]
    mkldnn::stream strm(eng);
    // [Initialize stream]
    //  [memory alloc]
    memory::dims tz_dims = { 2, 3, 4, 5 };
    const size_t N = std::accumulate(tz_dims.begin(), tz_dims.end(), (size_t)1,
            std::multiplies<size_t>());
    memory::desc mem_d(tz_dims, memory::data_type::f32,
            memory::format_tag::nchw);
    memory mem(mem_d, eng);
    //  [memory alloc]
    //  [ocl kernel]
    const char *ocl_code
            = "__kernel void init(__global float *data) {"
              "    int id = get_global_id(0);"
              "    data[id] = (id % 2) ? -id : id;"
              "}";
    //  [ocl kernel]
    // [oclkernel create]
    const char *kernel_name = "init";
    cl_kernel ocl_init_kernel = create_init_opencl_kernel(
            eng.get_ocl_context(), kernel_name, ocl_code);
    //  [oclkernel create]
    // [oclexecution]
    cl_mem ocl_buf = mem.get_ocl_mem_object();
    OCL_CHECK(clSetKernelArg(ocl_init_kernel, 0, sizeof(ocl_buf), &ocl_buf));
    cl_command_queue ocl_queue = strm.get_ocl_command_queue();
    OCL_CHECK(clEnqueueNDRangeKernel(ocl_queue, ocl_init_kernel, 1, nullptr, &N,
            nullptr, 0, nullptr, nullptr));
    // [oclexecution]
    //  [relu creation]
    auto relu_d = eltwise_forward::desc(prop_kind::forward,
            algorithm::eltwise_relu, mem_d, 0.0f);
    auto relu_pd = eltwise_forward::primitive_desc(relu_d, eng);
    auto relu = eltwise_forward(relu_pd);
    //  [relu creation]
    // [relu exec]
    relu.execute(strm, { { MKLDNN_ARG_SRC, mem }, { MKLDNN_ARG_DST, mem } });
    strm.wait();
    // [relu exec]
    // [Check the results]
    float *mapped_data = mem.map_data<float>();
    for (size_t i = 0; i < N; i++) {
        float expected = (i % 2) ? 0.0f : (float)i;
        if (mapped_data[i] != expected)
        throw std::string("Unexpected output, find a negative value after the ReLU execution");
    }
    mem.unmap_data(mapped_data);
    // [Check the results]
    OCL_CHECK(clReleaseKernel(ocl_init_kernel));
}
// [Main]
int main(int argc, char **argv) {
    try {
        gpu_opencl_interop_tutorial();
    } catch (mkldnn::error &e) {
        std::cerr << "Intel MKL-DNN error: " << e.what() << std::endl
            << "Error status: " << mkldnn_status2str(e.status) << std::endl;
        return 1;
    } catch (std::string &e) {
        std::cerr << "Error in the example: " << e << std::endl;
        return 2;
    }
    std::cout << "Example passes" << std::endl;
    return 0;
}
// [Main]