This C++ API example demonstrates how to build an AlexNet model training using the bfloat16 data type. This C++ API example demonstrates how to build an AlexNet model training using the bfloat16 data type.
The example implements a few layers from AlexNet model.
#include <assert.h>
#include <iostream>
#include <math.h>
#include <numeric>
#include <string>
memory::dim product(const memory::dims &dims) {
return std::accumulate(dims.begin(), dims.end(), (memory::dim)1,
std::multiplies<memory::dim>());
}
void simple_net() {
std::vector<primitive> net_fwd, net_bwd;
std::vector<std::unordered_map<int, memory>> net_fwd_args, net_bwd_args;
const int batch = 32;
std::vector<float> net_src(batch * 3 * 227 * 227);
std::vector<float> net_dst(batch * 96 * 27 * 27);
for (size_t i = 0; i < net_src.size(); ++i)
net_src[i] = sinf((float)i);
memory::dims conv_src_tz = { batch, 3, 227, 227 };
memory::dims conv_weights_tz = { 96, 3, 11, 11 };
memory::dims conv_bias_tz = { 96 };
memory::dims conv_dst_tz = { batch, 96, 55, 55 };
memory::dims conv_strides = { 4, 4 };
memory::dims conv_padding = { 0, 0 };
std::vector<float> conv_weights(product(conv_weights_tz));
std::vector<float> conv_bias(product(conv_bias_tz));
for (size_t i = 0; i < conv_weights.size(); ++i)
conv_weights[i] = sinf((float)i);
for (size_t i = 0; i < conv_bias.size(); ++i)
conv_bias[i] = sinf((float)i);
auto conv_user_src_memory =
memory({ { conv_src_tz }, dt::f32, tag::nchw },
cpu_engine, net_src.data());
auto conv_user_weights_memory
=
memory({ { conv_weights_tz }, dt::f32, tag::oihw }, cpu_engine,
conv_weights.data());
auto conv_user_bias_memory =
memory({ { conv_bias_tz }, dt::f32, tag::x },
cpu_engine, conv_bias.data());
auto conv_src_md =
memory::desc({ conv_src_tz }, dt::bf16, tag::any);
auto conv_bias_md =
memory::desc({ conv_bias_tz }, dt::bf16, tag::any);
auto conv_weights_md =
memory::desc({ conv_weights_tz }, dt::bf16, tag::any);
auto conv_dst_md =
memory::desc({ conv_dst_tz }, dt::bf16, tag::any);
conv_dst_md, conv_strides, conv_padding, conv_padding);
auto conv_src_memory = conv_user_src_memory;
if (conv_pd.src_desc() != conv_user_src_memory.get_desc()) {
conv_src_memory =
memory(conv_pd.src_desc(), cpu_engine);
net_fwd.push_back(
reorder(conv_user_src_memory, conv_src_memory));
net_fwd_args.push_back({ { MKLDNN_ARG_FROM, conv_user_src_memory },
{ MKLDNN_ARG_TO, conv_src_memory } });
}
auto conv_weights_memory = conv_user_weights_memory;
if (conv_pd.weights_desc() != conv_user_weights_memory.
get_desc()) {
conv_weights_memory =
memory(conv_pd.weights_desc(), cpu_engine);
net_fwd.push_back(
reorder(conv_user_weights_memory, conv_weights_memory));
net_fwd_args.push_back({ { MKLDNN_ARG_FROM, conv_user_weights_memory },
{ MKLDNN_ARG_TO, conv_weights_memory } });
}
auto conv_dst_memory =
memory(conv_pd.dst_desc(), cpu_engine);
net_fwd_args.push_back({ { MKLDNN_ARG_SRC, conv_src_memory },
{ MKLDNN_ARG_WEIGHTS, conv_weights_memory },
{ MKLDNN_ARG_BIAS, conv_user_bias_memory },
{ MKLDNN_ARG_DST, conv_dst_memory } });
const float negative_slope = 1.0f;
auto relu_dst_memory =
memory(relu_pd.dst_desc(), cpu_engine);
net_fwd_args.push_back({ { MKLDNN_ARG_SRC, conv_dst_memory },
{ MKLDNN_ARG_DST, relu_dst_memory } });
const uint32_t local_size = 5;
const float alpha = 0.0001f;
const float beta = 0.75f;
const float k = 1.0f;
local_size, alpha, beta, k);
auto lrn_dst_memory =
memory(lrn_pd.dst_desc(), cpu_engine);
auto lrn_workspace_memory =
memory(lrn_pd.workspace_desc(), cpu_engine);
net_fwd_args.push_back({ { MKLDNN_ARG_SRC, relu_dst_memory },
{ MKLDNN_ARG_DST, lrn_dst_memory },
{ MKLDNN_ARG_WORKSPACE, lrn_workspace_memory } });
memory::dims pool_dst_tz = { batch, 96, 27, 27 };
memory::dims pool_kernel = { 3, 3 };
memory::dims pool_strides = { 2, 2 };
memory::dims pool_padding = { 0, 0 };
auto pool_user_dst_memory =
memory({ { pool_dst_tz }, dt::f32, tag::nchw },
cpu_engine, net_dst.data());
auto pool_dst_md =
memory::desc({ pool_dst_tz }, dt::bf16, tag::any);
pool_dst_md, pool_strides, pool_kernel,
pool_padding, pool_padding);
auto pool_workspace_memory =
memory(pool_pd.workspace_desc(), cpu_engine);
net_fwd_args.push_back({ { MKLDNN_ARG_SRC, lrn_dst_memory },
{ MKLDNN_ARG_WORKSPACE, pool_workspace_memory } });
auto pool_dst_memory = pool_user_dst_memory;
if (pool_pd.dst_desc() != pool_user_dst_memory.get_desc()) {
pool_dst_memory =
memory(pool_pd.dst_desc(), cpu_engine);
net_fwd_args.back().insert({ MKLDNN_ARG_DST, pool_dst_memory });
net_fwd.push_back(
reorder(pool_dst_memory, pool_user_dst_memory));
net_fwd_args.push_back({ { MKLDNN_ARG_FROM, pool_dst_memory },
{ MKLDNN_ARG_TO, pool_user_dst_memory } });
} else {
net_fwd_args.back().insert({ MKLDNN_ARG_DST, pool_dst_memory });
}
std::vector<float> net_diff_dst(batch * 96 * 27 * 27);
for (size_t i = 0; i < net_diff_dst.size(); ++i)
net_diff_dst[i] = sinf((float)i);
auto pool_user_diff_dst_memory
=
memory({ { pool_dst_tz }, dt::f32, tag::nchw }, cpu_engine,
net_diff_dst.data());
auto pool_diff_src_md = lrn_dst_memory.
get_desc();
auto pool_diff_dst_md = pool_dst_memory.get_desc();
pool_diff_src_md, pool_diff_dst_md, pool_strides, pool_kernel,
pool_padding, pool_padding);
pool_bwd_desc, cpu_engine, pool_pd);
auto pool_diff_dst_memory = pool_user_diff_dst_memory;
if (pool_dst_memory.get_desc() != pool_user_diff_dst_memory.get_desc()) {
pool_diff_dst_memory =
memory(pool_dst_memory.get_desc(), cpu_engine);
net_bwd.push_back(
reorder(pool_user_diff_dst_memory, pool_diff_dst_memory));
net_bwd_args.push_back({ { MKLDNN_ARG_FROM, pool_user_diff_dst_memory },
{ MKLDNN_ARG_TO, pool_diff_dst_memory } });
}
auto pool_diff_src_memory =
memory(pool_bwd_pd.diff_src_desc(), cpu_engine);
net_bwd.push_back(pooling_backward(pool_bwd_pd));
net_bwd_args.push_back({ { MKLDNN_ARG_DIFF_DST, pool_diff_dst_memory },
{ MKLDNN_ARG_DIFF_SRC, pool_diff_src_memory },
{ MKLDNN_ARG_WORKSPACE, pool_workspace_memory } });
auto lrn_diff_dst_md = lrn_dst_memory.
get_desc();
lrn_pd.src_desc(), lrn_diff_dst_md, local_size, alpha, beta, k);
auto lrn_bwd_pd
auto lrn_diff_src_memory =
memory(lrn_bwd_pd.diff_src_desc(), cpu_engine);
net_bwd_args.push_back({ { MKLDNN_ARG_SRC, relu_dst_memory },
{ MKLDNN_ARG_DIFF_DST, pool_diff_src_memory },
{ MKLDNN_ARG_DIFF_SRC, lrn_diff_src_memory },
{ MKLDNN_ARG_WORKSPACE, lrn_workspace_memory } });
auto relu_diff_dst_md = lrn_diff_src_memory.
get_desc();
auto relu_src_md = conv_pd.dst_desc();
relu_diff_dst_md, relu_src_md, negative_slope);
relu_bwd_desc, cpu_engine, relu_pd);
auto relu_diff_src_memory =
memory(relu_bwd_pd.diff_src_desc(), cpu_engine);
net_bwd_args.push_back({ { MKLDNN_ARG_SRC, conv_dst_memory },
{ MKLDNN_ARG_DIFF_DST, lrn_diff_src_memory },
{ MKLDNN_ARG_DIFF_SRC, relu_diff_src_memory } });
std::vector<float> conv_user_diff_weights_buffer(product(conv_weights_tz));
std::vector<float> conv_diff_bias_buffer(product(conv_bias_tz));
auto conv_user_diff_weights_memory
=
memory({ { conv_weights_tz }, dt::f32, tag::nchw }, cpu_engine,
conv_user_diff_weights_buffer.data());
auto conv_diff_bias_memory =
memory({ { conv_bias_tz }, dt::f32, tag::x },
cpu_engine, conv_diff_bias_buffer.data());
auto conv_bwd_src_md =
memory::desc({ conv_src_tz }, dt::bf16, tag::any);
auto conv_diff_bias_md =
memory::desc({ conv_bias_tz }, dt::bf16, tag::any);
auto conv_diff_weights_md
auto conv_diff_dst_md =
memory::desc({ conv_dst_tz }, dt::bf16, tag::any);
conv_diff_bias_md, conv_diff_dst_md, conv_strides, conv_padding,
conv_padding);
conv_bwd_weights_desc, cpu_engine, conv_pd);
auto conv_bwd_src_memory = conv_src_memory;
if (conv_bwd_weights_pd.src_desc() != conv_src_memory.get_desc()) {
conv_bwd_src_memory
=
memory(conv_bwd_weights_pd.src_desc(), cpu_engine);
net_bwd.push_back(
reorder(conv_src_memory, conv_bwd_src_memory));
net_bwd_args.push_back({ { MKLDNN_ARG_FROM, conv_src_memory },
{ MKLDNN_ARG_TO, conv_bwd_src_memory } });
}
auto conv_diff_dst_memory = relu_diff_src_memory;
if (conv_bwd_weights_pd.diff_dst_desc()
!= relu_diff_src_memory.get_desc()) {
conv_diff_dst_memory
=
memory(conv_bwd_weights_pd.diff_dst_desc(), cpu_engine);
net_bwd.push_back(
reorder(relu_diff_src_memory, conv_diff_dst_memory));
net_bwd_args.push_back({ { MKLDNN_ARG_FROM, relu_diff_src_memory },
{ MKLDNN_ARG_TO, conv_diff_dst_memory } });
}
net_bwd_args.push_back({ { MKLDNN_ARG_SRC, conv_bwd_src_memory },
{ MKLDNN_ARG_DIFF_DST, conv_diff_dst_memory },
{ MKLDNN_ARG_DIFF_BIAS, conv_diff_bias_memory } });
auto conv_diff_weights_memory = conv_user_diff_weights_memory;
if (conv_bwd_weights_pd.diff_weights_desc()
!= conv_user_diff_weights_memory.get_desc()) {
conv_diff_weights_memory
=
memory(conv_bwd_weights_pd.diff_weights_desc(), cpu_engine);
net_bwd_args.back().insert(
{ MKLDNN_ARG_DIFF_WEIGHTS, conv_diff_weights_memory });
conv_diff_weights_memory, conv_user_diff_weights_memory));
net_bwd_args.push_back({ { MKLDNN_ARG_FROM, conv_diff_weights_memory },
{ MKLDNN_ARG_TO, conv_user_diff_weights_memory } });
} else {
net_bwd_args.back().insert(
{ MKLDNN_ARG_DIFF_WEIGHTS, conv_diff_weights_memory });
}
assert(net_fwd.size() == net_fwd_args.size() && "something is missing");
assert(net_bwd.size() == net_bwd_args.size() && "something is missing");
int n_iter = 1;
while (n_iter) {
for (size_t i = 0; i < net_fwd.size(); ++i)
net_fwd.at(i).execute(s, net_fwd_args.at(i));
for (size_t i = 0; i < net_bwd.size(); ++i)
net_bwd.at(i).execute(s, net_bwd_args.at(i));
--n_iter;
}
}
int main(int argc, char **argv) {
try {
simple_net();
std::cout << "passed" << std::endl;
std::cerr <<
"status: " << e.
status << std::endl;
std::cerr <<
"message: " << e.
message << std::endl;
}
return 0;
}
#include <assert.h>
#include <iostream>
#include <math.h>
#include <numeric>
#include <string>
memory::dim product(const memory::dims &dims) {
return std::accumulate(dims.begin(), dims.end(), (memory::dim)1,
std::multiplies<memory::dim>());
}
void simple_net() {
std::vector<primitive> net_fwd, net_bwd;
std::vector<std::unordered_map<int, memory>> net_fwd_args, net_bwd_args;
const int batch = 32;
std::vector<float> net_src(batch * 3 * 227 * 227);
std::vector<float> net_dst(batch * 96 * 27 * 27);
for (size_t i = 0; i < net_src.size(); ++i)
net_src[i] = sinf((float)i);
memory::dims conv_src_tz = { batch, 3, 227, 227 };
memory::dims conv_weights_tz = { 96, 3, 11, 11 };
memory::dims conv_bias_tz = { 96 };
memory::dims conv_dst_tz = { batch, 96, 55, 55 };
memory::dims conv_strides = { 4, 4 };
memory::dims conv_padding = { 0, 0 };
std::vector<float> conv_weights(product(conv_weights_tz));
std::vector<float> conv_bias(product(conv_bias_tz));
for (size_t i = 0; i < conv_weights.size(); ++i)
conv_weights[i] = sinf((float)i);
for (size_t i = 0; i < conv_bias.size(); ++i)
conv_bias[i] = sinf((float)i);
auto conv_user_src_memory =
memory({ { conv_src_tz }, dt::f32, tag::nchw },
cpu_engine, net_src.data());
auto conv_user_weights_memory
=
memory({ { conv_weights_tz }, dt::f32, tag::oihw }, cpu_engine,
conv_weights.data());
auto conv_user_bias_memory =
memory({ { conv_bias_tz }, dt::f32, tag::x },
cpu_engine, conv_bias.data());
auto conv_src_md =
memory::desc({ conv_src_tz }, dt::bf16, tag::any);
auto conv_bias_md =
memory::desc({ conv_bias_tz }, dt::bf16, tag::any);
auto conv_weights_md =
memory::desc({ conv_weights_tz }, dt::bf16, tag::any);
auto conv_dst_md =
memory::desc({ conv_dst_tz }, dt::bf16, tag::any);
conv_dst_md, conv_strides, conv_padding, conv_padding);
auto conv_src_memory = conv_user_src_memory;
if (conv_pd.src_desc() != conv_user_src_memory.get_desc()) {
conv_src_memory =
memory(conv_pd.src_desc(), cpu_engine);
net_fwd.push_back(
reorder(conv_user_src_memory, conv_src_memory));
net_fwd_args.push_back({ { MKLDNN_ARG_FROM, conv_user_src_memory },
{ MKLDNN_ARG_TO, conv_src_memory } });
}
auto conv_weights_memory = conv_user_weights_memory;
if (conv_pd.weights_desc() != conv_user_weights_memory.
get_desc()) {
conv_weights_memory =
memory(conv_pd.weights_desc(), cpu_engine);
net_fwd.push_back(
reorder(conv_user_weights_memory, conv_weights_memory));
net_fwd_args.push_back({ { MKLDNN_ARG_FROM, conv_user_weights_memory },
{ MKLDNN_ARG_TO, conv_weights_memory } });
}
auto conv_dst_memory =
memory(conv_pd.dst_desc(), cpu_engine);
net_fwd_args.push_back({ { MKLDNN_ARG_SRC, conv_src_memory },
{ MKLDNN_ARG_WEIGHTS, conv_weights_memory },
{ MKLDNN_ARG_BIAS, conv_user_bias_memory },
{ MKLDNN_ARG_DST, conv_dst_memory } });
const float negative_slope = 1.0f;
auto relu_dst_memory =
memory(relu_pd.dst_desc(), cpu_engine);
net_fwd_args.push_back({ { MKLDNN_ARG_SRC, conv_dst_memory },
{ MKLDNN_ARG_DST, relu_dst_memory } });
const uint32_t local_size = 5;
const float alpha = 0.0001f;
const float beta = 0.75f;
const float k = 1.0f;
local_size, alpha, beta, k);
auto lrn_dst_memory =
memory(lrn_pd.dst_desc(), cpu_engine);
auto lrn_workspace_memory =
memory(lrn_pd.workspace_desc(), cpu_engine);
net_fwd_args.push_back({ { MKLDNN_ARG_SRC, relu_dst_memory },
{ MKLDNN_ARG_DST, lrn_dst_memory },
{ MKLDNN_ARG_WORKSPACE, lrn_workspace_memory } });
memory::dims pool_dst_tz = { batch, 96, 27, 27 };
memory::dims pool_kernel = { 3, 3 };
memory::dims pool_strides = { 2, 2 };
memory::dims pool_padding = { 0, 0 };
auto pool_user_dst_memory =
memory({ { pool_dst_tz }, dt::f32, tag::nchw },
cpu_engine, net_dst.data());
auto pool_dst_md =
memory::desc({ pool_dst_tz }, dt::bf16, tag::any);
pool_dst_md, pool_strides, pool_kernel,
pool_padding, pool_padding);
auto pool_workspace_memory =
memory(pool_pd.workspace_desc(), cpu_engine);
net_fwd_args.push_back({ { MKLDNN_ARG_SRC, lrn_dst_memory },
{ MKLDNN_ARG_WORKSPACE, pool_workspace_memory } });
auto pool_dst_memory = pool_user_dst_memory;
if (pool_pd.dst_desc() != pool_user_dst_memory.get_desc()) {
pool_dst_memory =
memory(pool_pd.dst_desc(), cpu_engine);
net_fwd_args.back().insert({ MKLDNN_ARG_DST, pool_dst_memory });
net_fwd.push_back(
reorder(pool_dst_memory, pool_user_dst_memory));
net_fwd_args.push_back({ { MKLDNN_ARG_FROM, pool_dst_memory },
{ MKLDNN_ARG_TO, pool_user_dst_memory } });
} else {
net_fwd_args.back().insert({ MKLDNN_ARG_DST, pool_dst_memory });
}
std::vector<float> net_diff_dst(batch * 96 * 27 * 27);
for (size_t i = 0; i < net_diff_dst.size(); ++i)
net_diff_dst[i] = sinf((float)i);
auto pool_user_diff_dst_memory
=
memory({ { pool_dst_tz }, dt::f32, tag::nchw }, cpu_engine,
net_diff_dst.data());
auto pool_diff_src_md = lrn_dst_memory.
get_desc();
auto pool_diff_dst_md = pool_dst_memory.get_desc();
pool_diff_src_md, pool_diff_dst_md, pool_strides, pool_kernel,
pool_padding, pool_padding);
pool_bwd_desc, cpu_engine, pool_pd);
auto pool_diff_dst_memory = pool_user_diff_dst_memory;
if (pool_dst_memory.get_desc() != pool_user_diff_dst_memory.get_desc()) {
pool_diff_dst_memory =
memory(pool_dst_memory.get_desc(), cpu_engine);
net_bwd.push_back(
reorder(pool_user_diff_dst_memory, pool_diff_dst_memory));
net_bwd_args.push_back({ { MKLDNN_ARG_FROM, pool_user_diff_dst_memory },
{ MKLDNN_ARG_TO, pool_diff_dst_memory } });
}
auto pool_diff_src_memory =
memory(pool_bwd_pd.diff_src_desc(), cpu_engine);
net_bwd.push_back(pooling_backward(pool_bwd_pd));
net_bwd_args.push_back({ { MKLDNN_ARG_DIFF_DST, pool_diff_dst_memory },
{ MKLDNN_ARG_DIFF_SRC, pool_diff_src_memory },
{ MKLDNN_ARG_WORKSPACE, pool_workspace_memory } });
auto lrn_diff_dst_md = lrn_dst_memory.
get_desc();
lrn_pd.src_desc(), lrn_diff_dst_md, local_size, alpha, beta, k);
auto lrn_bwd_pd
auto lrn_diff_src_memory =
memory(lrn_bwd_pd.diff_src_desc(), cpu_engine);
net_bwd_args.push_back({ { MKLDNN_ARG_SRC, relu_dst_memory },
{ MKLDNN_ARG_DIFF_DST, pool_diff_src_memory },
{ MKLDNN_ARG_DIFF_SRC, lrn_diff_src_memory },
{ MKLDNN_ARG_WORKSPACE, lrn_workspace_memory } });
auto relu_diff_dst_md = lrn_diff_src_memory.
get_desc();
auto relu_src_md = conv_pd.dst_desc();
relu_diff_dst_md, relu_src_md, negative_slope);
relu_bwd_desc, cpu_engine, relu_pd);
auto relu_diff_src_memory =
memory(relu_bwd_pd.diff_src_desc(), cpu_engine);
net_bwd_args.push_back({ { MKLDNN_ARG_SRC, conv_dst_memory },
{ MKLDNN_ARG_DIFF_DST, lrn_diff_src_memory },
{ MKLDNN_ARG_DIFF_SRC, relu_diff_src_memory } });
std::vector<float> conv_user_diff_weights_buffer(product(conv_weights_tz));
std::vector<float> conv_diff_bias_buffer(product(conv_bias_tz));
auto conv_user_diff_weights_memory
=
memory({ { conv_weights_tz }, dt::f32, tag::nchw }, cpu_engine,
conv_user_diff_weights_buffer.data());
auto conv_diff_bias_memory =
memory({ { conv_bias_tz }, dt::f32, tag::x },
cpu_engine, conv_diff_bias_buffer.data());
auto conv_bwd_src_md =
memory::desc({ conv_src_tz }, dt::bf16, tag::any);
auto conv_diff_bias_md =
memory::desc({ conv_bias_tz }, dt::bf16, tag::any);
auto conv_diff_weights_md
auto conv_diff_dst_md =
memory::desc({ conv_dst_tz }, dt::bf16, tag::any);
conv_diff_bias_md, conv_diff_dst_md, conv_strides, conv_padding,
conv_padding);
conv_bwd_weights_desc, cpu_engine, conv_pd);
auto conv_bwd_src_memory = conv_src_memory;
if (conv_bwd_weights_pd.src_desc() != conv_src_memory.get_desc()) {
conv_bwd_src_memory
=
memory(conv_bwd_weights_pd.src_desc(), cpu_engine);
net_bwd.push_back(
reorder(conv_src_memory, conv_bwd_src_memory));
net_bwd_args.push_back({ { MKLDNN_ARG_FROM, conv_src_memory },
{ MKLDNN_ARG_TO, conv_bwd_src_memory } });
}
auto conv_diff_dst_memory = relu_diff_src_memory;
if (conv_bwd_weights_pd.diff_dst_desc()
!= relu_diff_src_memory.get_desc()) {
conv_diff_dst_memory
=
memory(conv_bwd_weights_pd.diff_dst_desc(), cpu_engine);
net_bwd.push_back(
reorder(relu_diff_src_memory, conv_diff_dst_memory));
net_bwd_args.push_back({ { MKLDNN_ARG_FROM, relu_diff_src_memory },
{ MKLDNN_ARG_TO, conv_diff_dst_memory } });
}
net_bwd_args.push_back({ { MKLDNN_ARG_SRC, conv_bwd_src_memory },
{ MKLDNN_ARG_DIFF_DST, conv_diff_dst_memory },
{ MKLDNN_ARG_DIFF_BIAS, conv_diff_bias_memory } });
auto conv_diff_weights_memory = conv_user_diff_weights_memory;
if (conv_bwd_weights_pd.diff_weights_desc()
!= conv_user_diff_weights_memory.get_desc()) {
conv_diff_weights_memory
=
memory(conv_bwd_weights_pd.diff_weights_desc(), cpu_engine);
net_bwd_args.back().insert(
{ MKLDNN_ARG_DIFF_WEIGHTS, conv_diff_weights_memory });
conv_diff_weights_memory, conv_user_diff_weights_memory));
net_bwd_args.push_back({ { MKLDNN_ARG_FROM, conv_diff_weights_memory },
{ MKLDNN_ARG_TO, conv_user_diff_weights_memory } });
} else {
net_bwd_args.back().insert(
{ MKLDNN_ARG_DIFF_WEIGHTS, conv_diff_weights_memory });
}
assert(net_fwd.size() == net_fwd_args.size() && "something is missing");
assert(net_bwd.size() == net_bwd_args.size() && "something is missing");
int n_iter = 1;
while (n_iter) {
for (size_t i = 0; i < net_fwd.size(); ++i)
net_fwd.at(i).execute(s, net_fwd_args.at(i));
for (size_t i = 0; i < net_bwd.size(); ++i)
net_bwd.at(i).execute(s, net_bwd_args.at(i));
--n_iter;
}
}
int main(int argc, char **argv) {
try {
simple_net();
std::cout << "passed" << std::endl;
std::cerr << "status: " << e.status << std::endl;
std::cerr << "message: " << e.message << std::endl;
}
return 0;
}