gpu_opencl_interop.cpp

This C++ API example demonstrates programming for Intel(R) Processor Graphics with OpenCL* extensions API in oneDNN. Annotated version: Getting started on GPU with OpenCL extensions API

This C++ API example demonstrates programming for Intel(R) Processor Graphics with OpenCL* extensions API in oneDNN. Annotated version: Getting started on GPU with OpenCL extensions API

/*******************************************************************************
* Copyright 2019-2020 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/



// [Prologue]
#include <iostream>
#include <numeric>
#include <stdexcept>

#include <CL/cl.h>

#include "oneapi/dnnl/dnnl.hpp"
#include "oneapi/dnnl/dnnl_ocl.hpp"

#include "example_utils.hpp"

using namespace dnnl;
using namespace std;
// [Prologue]

#define OCL_CHECK(x) \
    do { \
        cl_int s = (x); \
        if (s != CL_SUCCESS) { \
            std::cout << "[" << __FILE__ << ":" << __LINE__ << "] '" << #x \
                      << "' failed (status code: " << s << ")." << std::endl; \
            exit(1); \
        } \
    } while (0)

cl_kernel create_init_opencl_kernel(
        cl_context ocl_ctx, const char *kernel_name, const char *ocl_code) {
    cl_int err;
    const char *sources[] = {ocl_code};
    cl_program ocl_program
            = clCreateProgramWithSource(ocl_ctx, 1, sources, nullptr, &err);
    OCL_CHECK(err);

    OCL_CHECK(
            clBuildProgram(ocl_program, 0, nullptr, nullptr, nullptr, nullptr));

    cl_kernel ocl_kernel = clCreateKernel(ocl_program, kernel_name, &err);
    OCL_CHECK(err);

    OCL_CHECK(clReleaseProgram(ocl_program));
    return ocl_kernel;
}

void gpu_opencl_interop_tutorial() {
    // [Initialize engine]
    engine eng(engine::kind::gpu, 0);
    // [Initialize engine]

    // [Initialize stream]
    dnnl::stream strm(eng);
    // [Initialize stream]

    //  [memory alloc]
    memory::dims tz_dims = {2, 3, 4, 5};
    const size_t N = std::accumulate(tz_dims.begin(), tz_dims.end(), (size_t)1,
            std::multiplies<size_t>());

    memory::desc mem_d(
            tz_dims, memory::data_type::f32, memory::format_tag::nchw);

    memory mem(mem_d, eng);
    //  [memory alloc]

    //  [ocl kernel]
    const char *ocl_code
            = "__kernel void init(__global float *data) {"
              "    int id = get_global_id(0);"
              "    data[id] = (id % 2) ? -id : id;"
              "}";
    //  [ocl kernel]

    // [oclkernel create]
    const char *kernel_name = "init";
    cl_kernel ocl_init_kernel = create_init_opencl_kernel(
            ocl_interop::get_context(eng), kernel_name, ocl_code);
    //  [oclkernel create]

    // [oclexecution]
    cl_mem ocl_buf = ocl_interop::get_mem_object(mem);
    OCL_CHECK(clSetKernelArg(ocl_init_kernel, 0, sizeof(ocl_buf), &ocl_buf));

    cl_command_queue ocl_queue = ocl_interop::get_command_queue(strm);
    OCL_CHECK(clEnqueueNDRangeKernel(ocl_queue, ocl_init_kernel, 1, nullptr, &N,
            nullptr, 0, nullptr, nullptr));
    // [oclexecution]

    //  [relu creation]
    auto relu_d = eltwise_forward::desc(
            prop_kind::forward, algorithm::eltwise_relu, mem_d, 0.0f);
    auto relu_pd = eltwise_forward::primitive_desc(relu_d, eng);
    auto relu = eltwise_forward(relu_pd);
    //  [relu creation]

    // [relu exec]
    relu.execute(strm, {{DNNL_ARG_SRC, mem}, {DNNL_ARG_DST, mem}});
    strm.wait();
    // [relu exec]

    // [Check the results]
    std::vector<float> mem_data(N);
    read_from_dnnl_memory(mem_data.data(), mem);
    for (size_t i = 0; i < N; i++) {
        float expected = (i % 2) ? 0.0f : (float)i;
        if (mem_data[i] != expected) {
            std::cout << "Expect " << expected << " but got " << mem_data[i]
                      << "." << std::endl;
            throw std::logic_error("Accuracy check failed.");
        }
    }
    // [Check the results]

    OCL_CHECK(clReleaseKernel(ocl_init_kernel));
}

int main(int argc, char **argv) {
    return handle_example_errors(
            {engine::kind::gpu}, gpu_opencl_interop_tutorial);
}