Intel(R) Math Kernel Library for Deep Neural Networks (Intel(R) MKL-DNN)  1.0.4
Performance library for Deep Learning
gpu_getting_started.cpp

This C++ API example demonstrates programming for Intel(R) Processor Graphics with Intel(R) MKL-DNN.

Annotated version: Getting started on GPU

/*******************************************************************************
* Copyright 2019 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <iostream>
#include <sstream>
// [Prologue]
#include "mkldnn.hpp"
// Optional header to access debug functions like `mkldnn_status2str()`
#include "mkldnn_debug.h"
using namespace mkldnn;
using namespace std;
// [Prologue]
size_t product(const memory::dims adims) {
size_t n_elems = 1;
for (size_t d = 0; d < adims.size(); ++d) {
n_elems *= (size_t)adims[d];
}
return n_elems;
}
void fill(const memory &mem, const memory::dims adims) {
float *array = mem.map_data<float>();
for (size_t e = 0; e < adims.size(); ++e) {
array[e] = e % 7 ? 1.0f : -1.0f;
}
mem.unmap_data(array);
}
int find_negative(const memory &mem, const memory::dims adims) {
int negs = 0;
float *array = mem.map_data<float>();
for (size_t e = 0; e < adims.size(); ++e) {
negs += array[e] < 0.0f;
}
mem.unmap_data(array);
return negs;
}
void gpu_getting_started_tutorial() {
// [Initialize engine]
auto cpu_engine = engine(engine::kind::cpu, 0);
auto gpu_engine = engine(engine::kind::gpu, 0);
// [Initialize engine]
// [Initialize stream]
auto stream_gpu = stream(gpu_engine);
// [Initialize stream]
// [reorder cpu2gpu]
const auto tz = memory::dims{ 2, 16, 1, 1 };
auto m_cpu = memory(
cpu_engine);
auto m_gpu = memory(
gpu_engine);
fill(m_cpu, tz);
auto r1 = reorder(m_cpu, m_gpu);
// [reorder cpu2gpu]
// [Create a ReLU primitive]
// ReLU op descriptor (uses a GPU memory as source memory.
// no engine- or implementation-specific information)
// ReLU primitive descriptor, which corresponds to a particular
// implementation in the library. Specify engine type for the ReLU
// primitive. Use a GPU engine here.
auto relu_pd = eltwise_forward::primitive_desc(relu_d, gpu_engine);
// ReLU primitive
auto relu = eltwise_forward(relu_pd);
// [Create a ReLU primitive]
// [reorder gpu2cpu]
auto r2 = reorder(m_gpu, m_cpu);
// [reorder gpu2cpu]
// [Execute primitives]
// wrap source data from CPU to GPU
r1.execute(stream_gpu, m_cpu, m_gpu);
// Execute ReLU on a GPU stream
relu.execute(stream_gpu,
{ { MKLDNN_ARG_SRC, m_gpu }, { MKLDNN_ARG_DST, m_gpu } });
// Get result data from GPU to CPU
r2.execute(stream_gpu, m_gpu, m_cpu);
stream_gpu.wait();
// [Execute primitives]
// [Check the results]
if (find_negative(m_cpu, tz) != 0) {
std::stringstream ss;
ss << "Unexpected output, find a negative value after the ReLU execution";
throw ss.str();
}
// [Check the results]
}
// [Main]
int main(int argc, char **argv) {
try {
gpu_getting_started_tutorial();
} catch (mkldnn::error &e) {
std::cerr << "Intel MKL-DNN error: " << e.what() << std::endl
<< "Error status: " << mkldnn_status2str(e.status) << std::endl;
return 1;
} catch (std::string &e) {
std::cerr << "Error in the example: " << e << std::endl;
return 2;
}
std::cout << "Example passes" << std::endl;
return 0;
}
// [Main]