gpu_opencl_interop.cpp
======================

This C++ API example demonstrates programming for Intel(R) Processor Graphics with OpenCL\* extensions API in oneDNN.

Annotated version: :ref:`Getting started on GPU with OpenCL extensions API `

.. ref-code-block:: cpp

    /*******************************************************************************
    * Copyright 2019-2020 Intel Corporation
    *
    * Licensed under the Apache License, Version 2.0 (the "License");
    * you may not use this file except in compliance with the License.
    * You may obtain a copy of the License at
    *
    *     http://www.apache.org/licenses/LICENSE-2.0
    *
    * Unless required by applicable law or agreed to in writing, software
    * distributed under the License is distributed on an "AS IS" BASIS,
    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    * See the License for the specific language governing permissions and
    * limitations under the License.
    *******************************************************************************/

    // [Prologue]
    #include <algorithm>
    #include <cmath>
    #include <iostream>
    #include <string>
    #include <vector>

    #include "oneapi/dnnl/dnnl.hpp"
    #include "oneapi/dnnl/dnnl_ocl.hpp"

    #include "example_utils.hpp"

    using namespace :ref:`dnnl `;
    using namespace :ref:`std `;
    // [Prologue]

    #define OCL_CHECK(x) \
        do { \
            cl_int s = (x); \
            if (s != CL_SUCCESS) { \
                std::cout << "[" << __FILE__ << ":" << __LINE__ << "] '" << #x \
                          << "' failed (status code: " << s << ")." << std::endl; \
                exit(1); \
            } \
        } while (0)

    cl_kernel create_init_opencl_kernel(
            cl_context ocl_ctx, const char *kernel_name, const char *ocl_code) {
        cl_int err;
        const char *sources[] = {ocl_code};
        cl_program ocl_program
                = clCreateProgramWithSource(ocl_ctx, 1, sources, nullptr, &err);
        OCL_CHECK(err);
        OCL_CHECK(
                clBuildProgram(ocl_program, 0, nullptr, nullptr, nullptr, nullptr));

        cl_kernel ocl_kernel = clCreateKernel(ocl_program, kernel_name, &err);
        OCL_CHECK(err);

        OCL_CHECK(clReleaseProgram(ocl_program));
        return ocl_kernel;
    }

    void gpu_opencl_interop_tutorial() {
        // [Initialize engine]
        :ref:`engine ` eng(:ref:`engine::kind::gpu `, 0);
        // [Initialize engine]

        // [Initialize stream]
        :ref:`dnnl::stream ` strm(eng);
        // [Initialize stream]

        // [memory alloc]
        :ref:`memory::dims ` tz_dims = {2, 3, 4, 5};
        const size_t N = std::accumulate(tz_dims.begin(), tz_dims.end(), (size_t)1,
                std::multiplies<size_t>());

        :ref:`memory::desc ` mem_d(
                tz_dims, :ref:`memory::data_type::f32 `, :ref:`memory::format_tag::nchw `);

        :ref:`memory ` mem(mem_d, eng);
        // [memory alloc]

        // [ocl kernel]
        const char *ocl_code
                = "__kernel void init(__global float *data) {"
                  "    int id = get_global_id(0);"
                  "    data[id] = (id % 2) ? -id : id;"
                  "}";
        // [ocl kernel]

        // [oclkernel create]
        const char *kernel_name = "init";
        cl_kernel ocl_init_kernel = create_init_opencl_kernel(
                :ref:`ocl_interop::get_context `(eng), kernel_name, ocl_code);
        // [oclkernel create]

        // [oclexecution]
        cl_mem ocl_buf = :ref:`ocl_interop::get_mem_object `(mem);
        OCL_CHECK(clSetKernelArg(ocl_init_kernel, 0, sizeof(ocl_buf), &ocl_buf));

        cl_command_queue ocl_queue = :ref:`ocl_interop::get_command_queue `(strm);
        OCL_CHECK(clEnqueueNDRangeKernel(ocl_queue, ocl_init_kernel, 1, nullptr, &N,
                nullptr, 0, nullptr, nullptr));
        // [oclexecution]

        // [relu creation]
        auto relu_d = :ref:`eltwise_forward::desc `(
                :ref:`prop_kind::forward `, :ref:`algorithm::eltwise_relu `, mem_d, 0.0f);
        auto relu_pd = :ref:`eltwise_forward::primitive_desc `(relu_d, eng);
        auto relu = :ref:`eltwise_forward `(relu_pd);
        // [relu creation]

        // [relu exec]
        relu.execute(strm, {{:ref:`DNNL_ARG_SRC `, mem}, {:ref:`DNNL_ARG_DST `, mem}});
        strm.wait();
        // [relu exec]

        // [Check the results]
        std::vector<float> mem_data(N);
        read_from_dnnl_memory(mem_data.data(), mem);
        for (size_t i = 0; i < N; i++) {
            float expected = (i % 2) ? 0.0f : (float)i;
            if (mem_data[i] != expected) {
                std::cout << "Expect " << expected << " but got " << mem_data[i]
                          << "." << std::endl;
                throw std::logic_error("Accuracy check failed.");
            }
        }
        // [Check the results]

        OCL_CHECK(clReleaseKernel(ocl_init_kernel));
    }

    int main(int argc, char **argv) {
        return handle_example_errors(
                {:ref:`engine::kind::gpu `}, gpu_opencl_interop_tutorial);
    }