Intel(R) Math Kernel Library for Deep Neural Networks (Intel(R) MKL-DNN)  1.0.4
Performance library for Deep Learning
cpu_getting_started.cpp

This C++ API example demonstrates basics of Intel MKL-DNN programming model.

Annotated version: Getting started

/*******************************************************************************
* Copyright 2019 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <iostream>
#include <sstream>
#include <cmath>
#include <numeric>
#include <string>
#include <vector>
// [Prologue]
#include "mkldnn.hpp"
// Optional header to access debug functions like `mkldnn_status2str()`
#include "mkldnn_debug.h"
using namespace mkldnn;
// [Prologue]
void cpu_getting_started_tutorial() {
// [Initialize engine]
engine cpu_engine(engine::kind::cpu, 0);
// [Initialize engine]
// [Initialize stream]
stream cpu_stream(cpu_engine);
// [Initialize stream]
// [Create user's data]
const int N = 1, H = 13, W = 13, C = 3;
// Compute physical strides for each dimension
const int stride_N = H * W * C;
const int stride_H = W * C;
const int stride_W = C;
const int stride_C = 1;
// An auxiliary function that maps logical index to the physical offset
auto offset = [=](int n, int h, int w, int c)
{ return n * stride_N + h * stride_H + w * stride_W + c * stride_C; };
// The image size
const int image_size = N * H * W * C;
// Allocate a buffer for the image
std::vector<float> image(image_size);
// Initialize the image with some values
for (int n = 0; n < N; ++n)
for (int h = 0; h < H; ++h)
for (int w = 0; w < W; ++w)
for (int c = 0; c < C; ++c) {
int off = offset(n, h, w, c); // Get the physical offset of a pixel
image[off] = -std::cos(off / 10.f);
}
// [Create user's data]
// [Init src_md]
{N, C, H, W}, // logical dims, the order is defined by a primitive
memory::data_type::f32, // tensor's data type
memory::format_tag::nhwc // memory format, NHWC in this case
);
// [Init src_md]
// [Init alt_src_md]
auto alt_src_md = memory::desc(
{N, C, H, W}, // logical dims, the order is defined by a primitive
memory::data_type::f32, // tensor's data type
{stride_N, stride_C, stride_H, stride_W} // the strides
);
// Sanity check: the memory descriptors should be the same
if (src_md != alt_src_md)
throw std::string("memory descriptor initialization mismatch");
// [Init alt_src_md]
// [Create memory objects]
// src_mem refers to a buffer owned by the `image` vector
auto src_mem = memory(src_md, cpu_engine, image.data());
// For dst_mem the library allocates buffer
auto dst_mem = memory(src_md, cpu_engine);
// [Create memory objects]
// [Create a ReLU primitive]
// ReLU op descriptor (no engine- or implementation-specific information)
auto relu_d = eltwise_forward::desc(
src_md, // the memory descriptor for an operation to work on
0.f, // alpha parameter means negative slope in case of ReLU
0.f // beta parameter is ignored in case of ReLU
);
// ReLU primitive descriptor, which corresponds to a particular
// implementation in the library
relu_d, // an operation descriptor
cpu_engine // an engine the primitive will be created for
);
// ReLU primitive
auto relu = eltwise_forward(relu_pd); // !!! this can take quite some time
// [Create a ReLU primitive]
// [Execute ReLU primitive]
// Execute ReLU (out-of-place)
relu.execute(
cpu_stream, // The execution stream
{ // A map with all inputs and outputs
{MKLDNN_ARG_SRC, src_mem}, // Source tag and memory obj
{MKLDNN_ARG_DST, dst_mem}, // Destination tag and memory obj
});
// Wait the stream to complete the execution
cpu_stream.wait();
// [Execute ReLU primitive]
// [Execute ReLU primitive in-place]
// Execute ReLU (in-place)
// relu.execute(cpu_stream, {
// {MKLDNN_ARG_SRC, src_mem},
// {MKLDNN_ARG_DST, src_mem},
// });
// [Execute ReLU primitive in-place]
// [Check the results]
// Obtain a buffer for the `dst_mem` and cast it to `float *`.
// This is safe since we created `dst_mem` as f32 tensor with known
// memory format.
float *relu_image = static_cast<float *>(dst_mem.get_data_handle());
// Check the results
for (int n = 0; n < N; ++n)
for (int h = 0; h < H; ++h)
for (int w = 0; w < W; ++w)
for (int c = 0; c < C; ++c) {
int off = offset(n, h, w, c); // get the physical offset of a pixel
float expected = image[off] < 0 ? 0.f : image[off]; // expected value
if (relu_image[off] != expected) {
std::stringstream ss;
ss << "Unexpected output at index("
<< n << ", " << c << ", " << h << ", " << w << "): "
<< "Expect " << expected << " "
<< "Got " << relu_image[off];
throw ss.str();
}
}
// [Check the results]
}
// [Main]
int main(int argc, char **argv) {
try {
cpu_getting_started_tutorial();
} catch (mkldnn::error &e) {
std::cerr << "Intel MKL-DNN error: " << e.what() << std::endl
<< "Error status: " << mkldnn_status2str(e.status) << std::endl;
return 1;
} catch (std::string &e) {
std::cerr << "Error in the example: " << e << std::endl;
return 2;
}
std::cout << "Example passes" << std::endl;
return 0;
}
// [Main]