This C++ API example demonstrates how to build GNMT model inference.
#include <assert.h>
#include <cstring>
#include <iostream>
#include <math.h>
#include <numeric>
#include <string>
#include "example_utils.hpp"
#if defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER)
#define collapse(x)
#endif
const dim_t batch = 128;
const dim_t src_seq_length_max = 28;
const dim_t tgt_seq_length_max = 28;
const dim_t feature_size = 1024;
const dim_t enc_bidir_n_layers = 1;
const dim_t enc_unidir_n_layers = 7;
const dim_t dec_n_layers = 8;
const int lstm_n_gates = 4;
std::vector<float> weighted_src_layer(batch *feature_size, 1.0f);
std::vector<float> alignment_model(
src_seq_length_max *batch *feature_size, 1.0f);
std::vector<float> alignments(src_seq_length_max *batch, 1.0f);
std::vector<float> exp_sums(batch, 1.0f);
void compute_weighted_annotations(float *weighted_annotations,
dim_t src_seq_length_max, dim_t batch, dim_t feature_size,
float *weights_annot, float *annotations) {
dim_t num_weighted_annotations = src_seq_length_max * batch;
dnnl_sgemm(
'N',
'N', num_weighted_annotations, feature_size, feature_size,
1.f, annotations, feature_size, weights_annot, feature_size, 0.f,
weighted_annotations, feature_size);
}
void compute_attention(float *context_vectors, dim_t src_seq_length_max,
dim_t batch, dim_t feature_size, float *weights_src_layer,
float *dec_src_layer, float *annotations, float *weighted_annotations,
float *weights_alignments) {
dnnl_sgemm(
'N',
'N', batch, feature_size, feature_size, 1.f, dec_src_layer,
feature_size, weights_src_layer, feature_size, 0.f,
weighted_src_layer.data(), feature_size);
float *alignment_model_ptr = alignment_model.data();
#ifdef _OPENMP
#pragma omp parallel for collapse(2)
#endif
for (dim_t i = 0; i < src_seq_length_max; i++) {
for (dim_t j = 0; j < batch * feature_size; j++)
alignment_model_ptr[i * batch * feature_size + j] = tanhf(
weighted_src_layer.data()[j]
+ weighted_annotations[i * batch * feature_size + j]);
}
dim_t num_weighted_annotations = src_seq_length_max * batch;
dnnl_sgemm(
'N',
'N', num_weighted_annotations, 1, feature_size, 1.f,
alignment_model_ptr, feature_size, weights_alignments, 1, 0.f,
alignments.data(), 1);
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (dim_t i = 0; i < batch; i++)
exp_sums[i] = 0.0f;
#ifdef _OPENMP
#pragma omp parallel for collapse(2)
#endif
for (dim_t i = 0; i < src_seq_length_max; i++) {
for (dim_t j = 0; j < batch; j++) {
alignments[i * batch + j] = expf(alignments[i * batch + j]);
exp_sums[j] += alignments[i * batch + j];
}
}
#ifdef _OPENMP
#pragma omp parallel for collapse(2)
#endif
for (dim_t i = 0; i < src_seq_length_max; i++)
for (dim_t j = 0; j < batch; j++)
alignments[i * batch + j] /= exp_sums[j];
#ifdef _OPENMP
#pragma omp parallel for collapse(2)
#endif
for (dim_t i = 0; i < batch; i++)
for (dim_t j = 0; j < feature_size; j++)
context_vectors[i * (feature_size + feature_size) + feature_size
+ j]
= 0.0f;
#ifdef _OPENMP
#pragma omp parallel for collapse(3)
#endif
for (dim_t i = 0; i < batch; i++)
for (dim_t k = 0; k < src_seq_length_max; k++)
for (dim_t j = 0; j < feature_size; j++)
context_vectors[i * (feature_size + feature_size) + feature_size
+ j]
+= alignments[k * batch + i]
* annotations[j + feature_size * (i + batch * k)];
}
void copy_context(
float *src_iter, dim_t n_layers, dim_t batch, dim_t feature_size) {
#ifdef _OPENMP
#pragma omp parallel for collapse(3)
#endif
for (dim_t k = 1; k < n_layers; k++)
for (dim_t j = 0; j < batch; j++)
for (dim_t i = 0; i < feature_size; i++)
src_iter[(k * batch + j) * (feature_size + feature_size)
+ feature_size + i]
= src_iter[j * (feature_size + feature_size)
+ feature_size + i];
}
void simple_net() {
std::vector<primitive> encoder_net, decoder_net;
std::vector<std::unordered_map<int, memory>> encoder_net_args,
decoder_net_args;
std::vector<float> net_src(batch * src_seq_length_max * feature_size, 1.0f);
std::vector<float> net_dst(batch * tgt_seq_length_max * feature_size, 1.0f);
= {src_seq_length_max, batch, feature_size};
= {enc_bidir_n_layers, 2, feature_size, lstm_n_gates, feature_size};
= {enc_bidir_n_layers, 2, feature_size, lstm_n_gates, feature_size};
= {enc_bidir_n_layers, 2, lstm_n_gates, feature_size};
= {src_seq_length_max, batch, 2 * feature_size};
std::vector<float> user_enc_bidir_wei_layer(
enc_bidir_n_layers * 2 * feature_size * lstm_n_gates * feature_size,
1.0f);
std::vector<float> user_enc_bidir_wei_iter(
enc_bidir_n_layers * 2 * feature_size * lstm_n_gates * feature_size,
1.0f);
std::vector<float> user_enc_bidir_bias(
enc_bidir_n_layers * 2 * lstm_n_gates * feature_size, 1.0f);
user_enc_bidir_src_layer_md, cpu_engine, net_src.data());
auto user_enc_bidir_wei_layer_memory
user_enc_bidir_wei_layer.data());
auto user_enc_bidir_wei_iter_memory
user_enc_bidir_wei_iter.data());
user_enc_bidir_bias_md, cpu_engine, user_enc_bidir_bias.data());
auto enc_bidir_wei_layer_md =
memory::desc({enc_bidir_weights_layer_tz},
auto enc_bidir_wei_iter_md =
memory::desc({enc_bidir_weights_iter_tz},
auto enc_bidir_dst_layer_md =
memory::desc({enc_bidir_dst_layer_tz},
enc_bidir_wei_iter_md, user_enc_bidir_bias_md,
auto enc_bidir_prim_desc
auto enc_bidir_wei_layer_memory
=
memory(enc_bidir_prim_desc.weights_layer_desc(), cpu_engine);
user_enc_bidir_wei_layer_memory, enc_bidir_wei_layer_memory);
reorder(enc_bidir_wei_layer_reorder_pd)
.
execute(s, user_enc_bidir_wei_layer_memory,
enc_bidir_wei_layer_memory);
auto enc_bidir_wei_iter_memory
=
memory(enc_bidir_prim_desc.weights_iter_desc(), cpu_engine);
user_enc_bidir_wei_iter_memory, enc_bidir_wei_iter_memory);
reorder(enc_bidir_wei_iter_reorder_pd)
.
execute(s, user_enc_bidir_wei_iter_memory,
enc_bidir_wei_iter_memory);
auto enc_bidir_dst_layer_memory
=
dnnl::memory(enc_bidir_prim_desc.dst_layer_desc(), cpu_engine);
encoder_net_args.push_back(
std::vector<float> user_enc_uni_first_wei_layer(
1 * 1 * 2 * feature_size * lstm_n_gates * feature_size, 1.0f);
std::vector<float> user_enc_uni_first_wei_iter(
1 * 1 * feature_size * lstm_n_gates * feature_size, 1.0f);
std::vector<float> user_enc_uni_first_bias(
1 * 1 * lstm_n_gates * feature_size, 1.0f);
= {1, 1, 2 * feature_size, lstm_n_gates, feature_size};
= {1, 1, feature_size, lstm_n_gates, feature_size};
= {1, 1, lstm_n_gates, feature_size};
= {src_seq_length_max, batch, feature_size};
auto user_enc_uni_first_wei_layer_memory
user_enc_uni_first_wei_layer.data());
auto user_enc_uni_first_wei_iter_memory
user_enc_uni_first_wei_iter.data());
auto user_enc_uni_first_bias_memory
user_enc_uni_first_bias.data());
auto enc_uni_first_wei_layer_md
auto enc_uni_first_wei_iter_md
auto enc_uni_first_dst_layer_md
enc_uni_first_wei_iter_md, user_enc_uni_first_bias_md,
enc_uni_first_layer_desc, cpu_engine);
auto enc_uni_first_wei_layer_memory
=
memory(enc_uni_first_prim_desc.weights_layer_desc(), cpu_engine);
auto enc_uni_first_wei_layer_reorder_pd
enc_uni_first_wei_layer_memory);
reorder(enc_uni_first_wei_layer_reorder_pd)
.
execute(s, user_enc_uni_first_wei_layer_memory,
enc_uni_first_wei_layer_memory);
auto enc_uni_first_wei_iter_memory
=
memory(enc_uni_first_prim_desc.weights_iter_desc(), cpu_engine);
user_enc_uni_first_wei_iter_memory, enc_uni_first_wei_iter_memory);
reorder(enc_uni_first_wei_iter_reorder_pd)
.
execute(s, user_enc_uni_first_wei_iter_memory,
enc_uni_first_wei_iter_memory);
enc_uni_first_prim_desc.dst_layer_desc(), cpu_engine);
encoder_net.push_back(
lstm_forward(enc_uni_first_prim_desc));
encoder_net_args.push_back(
std::vector<float> user_enc_uni_wei_layer((enc_unidir_n_layers - 1) * 1
* feature_size * lstm_n_gates * feature_size,
1.0f);
std::vector<float> user_enc_uni_wei_iter((enc_unidir_n_layers - 1) * 1
* feature_size * lstm_n_gates * feature_size,
1.0f);
std::vector<float> user_enc_uni_bias(
(enc_unidir_n_layers - 1) * 1 * lstm_n_gates * feature_size, 1.0f);
memory::dims user_enc_uni_wei_layer_dims = {(enc_unidir_n_layers - 1), 1,
feature_size, lstm_n_gates, feature_size};
memory::dims user_enc_uni_wei_iter_dims = {(enc_unidir_n_layers - 1), 1,
feature_size, lstm_n_gates, feature_size};
= {(enc_unidir_n_layers - 1), 1, lstm_n_gates, feature_size};
memory::dims enc_dst_layer_dims = {src_seq_length_max, batch, feature_size};
auto user_enc_uni_wei_layer_memory =
dnnl::memory(user_enc_uni_wei_layer_md,
cpu_engine, user_enc_uni_wei_layer.data());
user_enc_uni_wei_iter_md, cpu_engine, user_enc_uni_wei_iter.data());
user_enc_uni_bias_md, cpu_engine, user_enc_uni_bias.data());
auto enc_uni_wei_layer_md =
memory::desc({user_enc_uni_wei_layer_dims},
auto enc_uni_wei_iter_md =
memory::desc({user_enc_uni_wei_iter_dims},
enc_uni_wei_layer_md, enc_uni_wei_iter_md, user_enc_uni_bias_md,
enc_uni_layer_desc, cpu_engine);
auto enc_uni_wei_layer_memory
=
memory(enc_uni_prim_desc.weights_layer_desc(), cpu_engine);
user_enc_uni_wei_layer_memory, enc_uni_wei_layer_memory);
reorder(enc_uni_wei_layer_reorder_pd)
s, user_enc_uni_wei_layer_memory, enc_uni_wei_layer_memory);
auto enc_uni_wei_iter_memory
=
memory(enc_uni_prim_desc.weights_iter_desc(), cpu_engine);
user_enc_uni_wei_iter_memory, enc_uni_wei_iter_memory);
reorder(enc_uni_wei_iter_reorder_pd)
.
execute(s, user_enc_uni_wei_iter_memory, enc_uni_wei_iter_memory);
auto enc_dst_layer_memory
=
dnnl::memory(enc_uni_prim_desc.dst_layer_desc(), cpu_engine);
encoder_net_args.push_back(
std::vector<float> user_dec_wei_layer(
dec_n_layers * 1 * feature_size * lstm_n_gates * feature_size,
1.0f);
std::vector<float> user_dec_wei_iter(dec_n_layers * 1
* (feature_size + feature_size) * lstm_n_gates
* feature_size,
1.0f);
std::vector<float> user_dec_bias(
dec_n_layers * 1 * lstm_n_gates * feature_size, 1.0f);
std::vector<float> user_dec_dst(
tgt_seq_length_max * batch * feature_size, 1.0f);
std::vector<float> user_weights_attention_src_layer(
feature_size * feature_size, 1.0f);
std::vector<float> user_weights_annotation(
feature_size * feature_size, 1.0f);
std::vector<float> user_weights_alignments(feature_size, 1.0f);
= {dec_n_layers, 1, feature_size, lstm_n_gates, feature_size};
feature_size + feature_size, lstm_n_gates, feature_size};
= {dec_n_layers, 1, lstm_n_gates, feature_size};
memory::dims dec_dst_iter_c_dims = {dec_n_layers, 1, batch, feature_size};
= {dec_n_layers, 1, batch, feature_size + feature_size};
= {dec_n_layers, 1, batch, feature_size};
user_dec_wei_layer_md, cpu_engine, user_dec_wei_layer.data());
user_dec_wei_iter_md, cpu_engine, user_dec_wei_iter.data());
auto user_dec_bias_memory
=
dnnl::memory(user_dec_bias_md, cpu_engine, user_dec_bias.data());
auto user_dec_dst_layer_memory
=
dnnl::memory(dec_dst_layer_md, cpu_engine, user_dec_dst.data());
auto dec_src_layer_memory =
dnnl::memory(dec_src_layer_md, cpu_engine);
auto dec_dst_iter_c_memory =
dnnl::memory(dec_dst_iter_c_md, cpu_engine);
auto dec_dst_iter_memory =
dnnl::memory(dec_dst_iter_md, cpu_engine);
dec_dst_iter_noctx_dims, {0, 0, 0, 0, 0});
dec_dst_iter_md, dec_dst_iter_c_md, dec_wei_layer_md,
dec_wei_iter_md, user_dec_bias_md, dec_dst_layer_md,
dec_dst_iter_noctx_md, dec_dst_iter_c_md);
auto dec_ctx_prim_desc
auto dec_wei_layer_memory
=
memory(dec_ctx_prim_desc.weights_layer_desc(), cpu_engine);
user_dec_wei_layer_memory, dec_wei_layer_memory);
.
execute(s, user_dec_wei_layer_memory, dec_wei_layer_memory);
auto dec_wei_iter_memory
=
memory(dec_ctx_prim_desc.weights_iter_desc(), cpu_engine);
user_dec_wei_iter_memory, dec_wei_iter_memory);
.
execute(s, user_dec_wei_iter_memory, dec_wei_iter_memory);
std::vector<float> weighted_annotations(
src_seq_length_max * batch * feature_size, 1.0f);
auto execute = [&]() {
assert(encoder_net.size() == encoder_net_args.size()
&& "something is missing");
for (size_t p = 0; p < encoder_net.size(); ++p)
encoder_net.at(p).execute(s, encoder_net_args.at(p));
compute_weighted_annotations(weighted_annotations.data(),
src_seq_length_max, batch, feature_size,
user_weights_annotation.data(),
(float *)enc_dst_layer_memory.get_data_handle());
for (dim_t i = 0; i < tgt_seq_length_max; i++) {
float *src_att_layer_handle
float *src_att_iter_handle
= (float *)dec_dst_iter_memory.get_data_handle();
compute_attention(src_att_iter_handle, src_seq_length_max, batch,
feature_size, user_weights_attention_src_layer.data(),
src_att_layer_handle,
(float *)enc_bidir_dst_layer_memory.get_data_handle(),
weighted_annotations.data(),
user_weights_alignments.data());
copy_context(
src_att_iter_handle, dec_n_layers, batch, feature_size);
assert(decoder_net.size() == decoder_net_args.size()
&& "something is missing");
for (size_t p = 0; p < decoder_net.size(); ++p)
decoder_net.at(p).execute(s, decoder_net_args.at(p));
auto dst_layer_handle
dst_layer_handle + batch * feature_size);
}
};
std::cout << "Parameters:" << std::endl
<< " batch = " << batch << std::endl
<< " feature size = " << feature_size << std::endl
<< " maximum source sequence length = " << src_seq_length_max
<< std::endl
<< " maximum target sequence length = " << tgt_seq_length_max
<< std::endl
<< " number of layers of the bidirectional encoder = "
<< enc_bidir_n_layers << std::endl
<< " number of layers of the unidirectional encoder = "
<< enc_unidir_n_layers << std::endl
<< " number of layers of the decoder = " << dec_n_layers
<< std::endl;
execute();
s.wait();
}
int main(int argc, char **argv) {
}