This C++ API example demonstrates how to build an AlexNet neural network topology for forward-pass inference.
#include <assert.h>
#include <chrono>
#include <numeric>
#include <vector>
#include <unordered_map>
#include "example_utils.hpp"
return std::accumulate(dims.begin(), dims.end(), (
memory::dim)1,
std::multiplies<memory::dim>());
}
void simple_net(
engine::kind engine_kind,
int times = 100) {
std::vector<primitive> net;
std::vector<std::unordered_map<int, memory>> net_args;
std::vector<float> user_src(batch * 3 * 227 * 227);
std::vector<float> user_dst(batch * 1000);
std::vector<float> conv1_weights(product(conv1_weights_tz));
std::vector<float> conv1_bias(product(conv1_bias_tz));
auto user_src_memory =
memory({{conv1_src_tz}, dt::f32, tag::nchw}, eng);
write_to_dnnl_memory(user_src.data(), user_src_memory);
auto user_weights_memory
=
memory({{conv1_weights_tz}, dt::f32, tag::oihw}, eng);
write_to_dnnl_memory(conv1_weights.data(), user_weights_memory);
auto conv1_user_bias_memory
=
memory({{conv1_bias_tz}, dt::f32, tag::x}, eng);
write_to_dnnl_memory(conv1_bias.data(), conv1_user_bias_memory);
auto conv1_src_md =
memory::desc({conv1_src_tz}, dt::f32, tag::any);
auto conv1_bias_md =
memory::desc({conv1_bias_tz}, dt::f32, tag::any);
auto conv1_weights_md =
memory::desc({conv1_weights_tz}, dt::f32, tag::any);
auto conv1_dst_md =
memory::desc({conv1_dst_tz}, dt::f32, tag::any);
conv1_bias_md, conv1_dst_md, conv1_strides, conv1_padding,
conv1_padding);
auto conv1_src_memory = user_src_memory;
if (conv1_prim_desc.src_desc() != user_src_memory.get_desc()) {
conv1_src_memory =
memory(conv1_prim_desc.src_desc(), eng);
net.push_back(
reorder(user_src_memory, conv1_src_memory));
}
auto conv1_weights_memory = user_weights_memory;
if (conv1_prim_desc.weights_desc() != user_weights_memory.get_desc()) {
conv1_weights_memory =
memory(conv1_prim_desc.weights_desc(), eng);
reorder(user_weights_memory, conv1_weights_memory)
.
execute(s, user_weights_memory, conv1_weights_memory);
}
auto conv1_dst_memory =
memory(conv1_prim_desc.dst_desc(), eng);
const float negative1_slope = 1.0f;
negative1_slope);
const float alpha1 = 0.0001f;
const float beta1 = 0.75f;
const float k1 = 1.0f;
local1_size, alpha1, beta1, k1);
auto lrn1_dst_memory =
memory(lrn1_prim_desc.dst_desc(), eng);
auto pool1_dst_md =
memory::desc({pool1_dst_tz}, dt::f32, tag::any);
pool1_strides, pool1_kernel, pool_padding, pool_padding);
auto pool1_dst_memory =
memory(pool1_pd.dst_desc(), eng);
std::vector<float> conv2_weights(product(conv2_weights_tz));
std::vector<float> conv2_bias(product(conv2_bias_tz));
auto conv2_user_weights_memory
=
memory({{conv2_weights_tz}, dt::f32, tag::goihw}, eng);
write_to_dnnl_memory(conv2_weights.data(), conv2_user_weights_memory);
auto conv2_user_bias_memory
=
memory({{conv2_bias_tz}, dt::f32, tag::x}, eng);
write_to_dnnl_memory(conv2_bias.data(), conv2_user_bias_memory);
auto conv2_src_md =
memory::desc({conv2_src_tz}, dt::f32, tag::any);
auto conv2_bias_md =
memory::desc({conv2_bias_tz}, dt::f32, tag::any);
auto conv2_weights_md =
memory::desc({conv2_weights_tz}, dt::f32, tag::any);
auto conv2_dst_md =
memory::desc({conv2_dst_tz}, dt::f32, tag::any);
conv2_bias_md, conv2_dst_md, conv2_strides, conv2_padding,
conv2_padding);
auto conv2_src_memory = pool1_dst_memory;
if (conv2_prim_desc.src_desc() != conv2_src_memory.get_desc()) {
conv2_src_memory =
memory(conv2_prim_desc.src_desc(), eng);
net.push_back(
reorder(pool1_dst_memory, conv2_src_memory));
}
auto conv2_weights_memory = conv2_user_weights_memory;
if (conv2_prim_desc.weights_desc()
!= conv2_user_weights_memory.get_desc()) {
conv2_weights_memory =
memory(conv2_prim_desc.weights_desc(), eng);
reorder(conv2_user_weights_memory, conv2_weights_memory)
.
execute(s, conv2_user_weights_memory, conv2_weights_memory);
}
auto conv2_dst_memory =
memory(conv2_prim_desc.dst_desc(), eng);
const float negative2_slope = 1.0f;
negative2_slope);
const float alpha2 = 0.0001f;
const float beta2 = 0.75f;
const float k2 = 1.0f;
local2_size, alpha2, beta2, k2);
auto lrn2_dst_memory =
memory(lrn2_prim_desc.dst_desc(), eng);
auto pool2_dst_md =
memory::desc({pool2_dst_tz}, dt::f32, tag::any);
pool2_strides, pool2_kernel, pool2_padding, pool2_padding);
auto pool2_dst_memory =
memory(pool2_pd.dst_desc(), eng);
std::vector<float> conv3_weights(product(conv3_weights_tz));
std::vector<float> conv3_bias(product(conv3_bias_tz));
auto conv3_user_weights_memory
=
memory({{conv3_weights_tz}, dt::f32, tag::oihw}, eng);
write_to_dnnl_memory(conv3_weights.data(), conv3_user_weights_memory);
auto conv3_user_bias_memory
=
memory({{conv3_bias_tz}, dt::f32, tag::x}, eng);
write_to_dnnl_memory(conv3_bias.data(), conv3_user_bias_memory);
auto conv3_src_md =
memory::desc({conv3_src_tz}, dt::f32, tag::any);
auto conv3_bias_md =
memory::desc({conv3_bias_tz}, dt::f32, tag::any);
auto conv3_weights_md =
memory::desc({conv3_weights_tz}, dt::f32, tag::any);
auto conv3_dst_md =
memory::desc({conv3_dst_tz}, dt::f32, tag::any);
conv3_bias_md, conv3_dst_md, conv3_strides, conv3_padding,
conv3_padding);
auto conv3_src_memory = pool2_dst_memory;
if (conv3_prim_desc.src_desc() != conv3_src_memory.get_desc()) {
conv3_src_memory =
memory(conv3_prim_desc.src_desc(), eng);
net.push_back(
reorder(pool2_dst_memory, conv3_src_memory));
}
auto conv3_weights_memory = conv3_user_weights_memory;
if (conv3_prim_desc.weights_desc()
!= conv3_user_weights_memory.get_desc()) {
conv3_weights_memory =
memory(conv3_prim_desc.weights_desc(), eng);
reorder(conv3_user_weights_memory, conv3_weights_memory)
.
execute(s, conv3_user_weights_memory, conv3_weights_memory);
}
auto conv3_dst_memory =
memory(conv3_prim_desc.dst_desc(), eng);
const float negative3_slope = 1.0f;
negative3_slope);
std::vector<float> conv4_weights(product(conv4_weights_tz));
std::vector<float> conv4_bias(product(conv4_bias_tz));
auto conv4_user_weights_memory
=
memory({{conv4_weights_tz}, dt::f32, tag::goihw}, eng);
write_to_dnnl_memory(conv4_weights.data(), conv4_user_weights_memory);
auto conv4_user_bias_memory
=
memory({{conv4_bias_tz}, dt::f32, tag::x}, eng);
write_to_dnnl_memory(conv4_bias.data(), conv4_user_bias_memory);
auto conv4_src_md =
memory::desc({conv4_src_tz}, dt::f32, tag::any);
auto conv4_bias_md =
memory::desc({conv4_bias_tz}, dt::f32, tag::any);
auto conv4_weights_md =
memory::desc({conv4_weights_tz}, dt::f32, tag::any);
auto conv4_dst_md =
memory::desc({conv4_dst_tz}, dt::f32, tag::any);
conv4_bias_md, conv4_dst_md, conv4_strides, conv4_padding,
conv4_padding);
auto conv4_src_memory = conv3_dst_memory;
if (conv4_prim_desc.src_desc() != conv4_src_memory.get_desc()) {
conv4_src_memory =
memory(conv4_prim_desc.src_desc(), eng);
net.push_back(
reorder(conv3_dst_memory, conv4_src_memory));
}
auto conv4_weights_memory = conv4_user_weights_memory;
if (conv4_prim_desc.weights_desc()
!= conv4_user_weights_memory.get_desc()) {
conv4_weights_memory =
memory(conv4_prim_desc.weights_desc(), eng);
reorder(conv4_user_weights_memory, conv4_weights_memory)
.
execute(s, conv4_user_weights_memory, conv4_weights_memory);
}
auto conv4_dst_memory =
memory(conv4_prim_desc.dst_desc(), eng);
const float negative4_slope = 1.0f;
negative4_slope);
std::vector<float> conv5_weights(product(conv5_weights_tz));
std::vector<float> conv5_bias(product(conv5_bias_tz));
auto conv5_user_weights_memory
=
memory({{conv5_weights_tz}, dt::f32, tag::goihw}, eng);
write_to_dnnl_memory(conv5_weights.data(), conv5_user_weights_memory);
auto conv5_user_bias_memory
=
memory({{conv5_bias_tz}, dt::f32, tag::x}, eng);
write_to_dnnl_memory(conv5_bias.data(), conv5_user_bias_memory);
auto conv5_src_md =
memory::desc({conv5_src_tz}, dt::f32, tag::any);
auto conv5_weights_md =
memory::desc({conv5_weights_tz}, dt::f32, tag::any);
auto conv5_bias_md =
memory::desc({conv5_bias_tz}, dt::f32, tag::any);
auto conv5_dst_md =
memory::desc({conv5_dst_tz}, dt::f32, tag::any);
conv5_bias_md, conv5_dst_md, conv5_strides, conv5_padding,
conv5_padding);
auto conv5_src_memory = conv4_dst_memory;
if (conv5_prim_desc.src_desc() != conv5_src_memory.get_desc()) {
conv5_src_memory =
memory(conv5_prim_desc.src_desc(), eng);
net.push_back(
reorder(conv4_dst_memory, conv5_src_memory));
}
auto conv5_weights_memory = conv5_user_weights_memory;
if (conv5_prim_desc.weights_desc()
!= conv5_user_weights_memory.get_desc()) {
conv5_weights_memory =
memory(conv5_prim_desc.weights_desc(), eng);
reorder(conv5_user_weights_memory, conv5_weights_memory)
.
execute(s, conv5_user_weights_memory, conv5_weights_memory);
}
auto conv5_dst_memory =
memory(conv5_prim_desc.dst_desc(), eng);
const float negative5_slope = 1.0f;
negative5_slope);
std::vector<float> pool5_dst(product(pool5_dst_tz));
auto pool5_dst_md =
memory::desc({pool5_dst_tz}, dt::f32, tag::any);
pool5_strides, pool5_kernel, pool5_padding, pool5_padding);
auto pool5_dst_memory =
memory(pool5_pd.dst_desc(), eng);
std::vector<float> fc6_weights(product(fc6_weights_tz));
std::vector<float> fc6_bias(product(fc6_bias_tz));
auto fc6_user_weights_memory
=
memory({{fc6_weights_tz}, dt::f32, tag::oihw}, eng);
write_to_dnnl_memory(fc6_weights.data(), fc6_user_weights_memory);
auto fc6_user_bias_memory =
memory({{fc6_bias_tz}, dt::f32, tag::x}, eng);
write_to_dnnl_memory(fc6_bias.data(), fc6_user_bias_memory);
auto fc6_src_md =
memory::desc({fc6_src_tz}, dt::f32, tag::any);
auto fc6_bias_md =
memory::desc({fc6_bias_tz}, dt::f32, tag::any);
auto fc6_weights_md =
memory::desc({fc6_weights_tz}, dt::f32, tag::any);
auto fc6_dst_md =
memory::desc({fc6_dst_tz}, dt::f32, tag::any);
fc6_src_md, fc6_weights_md, fc6_bias_md, fc6_dst_md);
auto fc6_src_memory = pool5_dst_memory;
if (fc6_prim_desc.src_desc() != fc6_src_memory.get_desc()) {
fc6_src_memory =
memory(fc6_prim_desc.src_desc(), eng);
net.push_back(
reorder(pool5_dst_memory, fc6_src_memory));
}
auto fc6_weights_memory = fc6_user_weights_memory;
if (fc6_prim_desc.weights_desc() != fc6_user_weights_memory.get_desc()) {
fc6_weights_memory =
memory(fc6_prim_desc.weights_desc(), eng);
reorder(fc6_user_weights_memory, fc6_weights_memory)
.
execute(s, fc6_user_weights_memory, fc6_weights_memory);
}
auto fc6_dst_memory =
memory(fc6_prim_desc.dst_desc(), eng);
std::vector<float> fc7_weights(product(fc7_weights_tz));
std::vector<float> fc7_bias(product(fc7_bias_tz));
auto fc7_user_weights_memory
=
memory({{fc7_weights_tz}, dt::f32, tag::nc}, eng);
write_to_dnnl_memory(fc7_weights.data(), fc7_user_weights_memory);
auto fc7_user_bias_memory =
memory({{fc7_bias_tz}, dt::f32, tag::x}, eng);
write_to_dnnl_memory(fc7_bias.data(), fc7_user_bias_memory);
auto fc7_bias_md =
memory::desc({fc7_bias_tz}, dt::f32, tag::any);
auto fc7_weights_md =
memory::desc({fc7_weights_tz}, dt::f32, tag::any);
auto fc7_dst_md =
memory::desc({fc7_dst_tz}, dt::f32, tag::any);
fc6_dst_memory.get_desc(), fc7_weights_md, fc7_bias_md, fc7_dst_md);
auto fc7_weights_memory = fc7_user_weights_memory;
if (fc7_prim_desc.weights_desc() != fc7_user_weights_memory.get_desc()) {
fc7_weights_memory =
memory(fc7_prim_desc.weights_desc(), eng);
reorder(fc7_user_weights_memory, fc7_weights_memory)
.
execute(s, fc7_user_weights_memory, fc7_weights_memory);
}
auto fc7_dst_memory =
memory(fc7_prim_desc.dst_desc(), eng);
std::vector<float> fc8_weights(product(fc8_weights_tz));
std::vector<float> fc8_bias(product(fc8_bias_tz));
auto fc8_user_weights_memory
=
memory({{fc8_weights_tz}, dt::f32, tag::nc}, eng);
write_to_dnnl_memory(fc8_weights.data(), fc8_user_weights_memory);
auto fc8_user_bias_memory =
memory({{fc8_bias_tz}, dt::f32, tag::x}, eng);
write_to_dnnl_memory(fc8_bias.data(), fc8_user_bias_memory);
auto user_dst_memory =
memory({{fc8_dst_tz}, dt::f32, tag::nc}, eng);
write_to_dnnl_memory(user_dst.data(), user_dst_memory);
auto fc8_bias_md =
memory::desc({fc8_bias_tz}, dt::f32, tag::any);
auto fc8_weights_md =
memory::desc({fc8_weights_tz}, dt::f32, tag::any);
auto fc8_dst_md =
memory::desc({fc8_dst_tz}, dt::f32, tag::any);
fc7_dst_memory.get_desc(), fc8_weights_md, fc8_bias_md, fc8_dst_md);
auto fc8_weights_memory = fc8_user_weights_memory;
if (fc8_prim_desc.weights_desc() != fc8_user_weights_memory.get_desc()) {
fc8_weights_memory =
memory(fc8_prim_desc.weights_desc(), eng);
reorder(fc8_user_weights_memory, fc8_weights_memory)
.
execute(s, fc8_user_weights_memory, fc8_weights_memory);
}
auto fc8_dst_memory =
memory(fc8_prim_desc.dst_desc(), eng);
if (fc8_dst_memory != user_dst_memory) {
net.push_back(
reorder(fc8_dst_memory, user_dst_memory));
}
for (int j = 0; j < times; ++j) {
assert(net.size() == net_args.size() && "something is missing");
for (size_t i = 0; i < net.size(); ++i)
net.at(i).execute(s, net_args.at(i));
}
s.wait();
}
auto begin = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();
int times = 100;
simple_net(engine_kind, times);
auto end = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();
std::cout << "Use time: " << (end - begin) / (times + 0.0)
<< " ms per iteration." << std::endl;
}
int main(int argc, char **argv) {
return handle_example_errors(
cnn_inference_f32, parse_engine_kind(argc, argv));
}