This C API example demonstrates how to build an AlexNet neural network topology for forward-pass inference.
The example implements the AlexNet layers as numbered primitives (for example, conv1, pool1, conv2).
#define _POSIX_C_SOURCE 200112L
#include <stdlib.h>
#include "example_utils.h"
#define BATCH 8
#define IC 3
#define OC 96
#define CONV_IH 227
#define CONV_IW 227
#define CONV_OH 55
#define CONV_OW 55
#define CONV_STRIDE 4
#define CONV_PAD 0
#define POOL_OH 27
#define POOL_OW 27
#define POOL_STRIDE 2
#define POOL_PAD 0
static size_t product(
dnnl_dim_t *arr,
size_t size) {
size_t prod = 1;
for (size_t i = 0; i < size; ++i)
prod *= arr[i];
return prod;
}
typedef struct {
int nargs;
} args_t;
static void prepare_arg_node(args_t *node, int nargs) {
node->nargs = nargs;
}
static void free_arg_node(args_t *node) {
free(node->args);
}
}
static void init_data_memory(uint32_t dim,
const dnnl_dim_t *dims,
&user_md, dim, dims,
dnnl_f32, user_tag));
write_to_dnnl_memory(data, *memory);
}
int dir_is_user_to_prim,
uint32_t *net_index,
DNNL_MEMORY_ALLOCATE));
if (dir_is_user_to_prim) {
user_memory_md, user_mem_engine, prim_memory_md,
prim_engine, NULL));
} else {
prim_memory_md, prim_engine, user_memory_md,
user_mem_engine, NULL));
}
net[*net_index] = *reorder;
prepare_arg_node(&net_args[*net_index], 2);
set_arg(&net_args[*net_index].args[0], DNNL_ARG_FROM,
dir_is_user_to_prim ? *user_memory : *prim_memory);
set_arg(&net_args[*net_index].args[1], DNNL_ARG_TO,
dir_is_user_to_prim ? *prim_memory : *user_memory);
(*net_index)++;
} else {
*prim_memory = NULL;
*reorder = NULL;
}
}
uint32_t n = 0;
args_t net_args[10];
float *net_src
= (float *)malloc(BATCH * IC * CONV_IH * CONV_IW * sizeof(float));
float *net_dst
= (float *)malloc(BATCH * OC * POOL_OH * POOL_OW * sizeof(float));
dnnl_dim_t conv_user_src_sizes[4] = {BATCH, IC, CONV_IH, CONV_IW};
dnnl_dim_t conv_user_weights_sizes[4] = {OC, IC, 11, 11};
dnnl_dim_t conv_user_dst_sizes[4] = {BATCH, OC, CONV_OH, CONV_OW};
dnnl_dim_t conv_strides[2] = {CONV_STRIDE, CONV_STRIDE};
dnnl_dim_t conv_padding[2] = {CONV_PAD, CONV_PAD};
float *conv_src = net_src;
float *conv_weights = (float *)malloc(
product(conv_user_weights_sizes, 4) * sizeof(float));
float *conv_bias
= (float *)malloc(product(conv_bias_sizes, 1) * sizeof(float));
conv_user_bias_memory;
conv_src, &conv_user_src_memory);
conv_weights, &conv_user_weights_memory);
init_data_memory(1, conv_bias_sizes,
dnnl_x,
dnnl_f32, engine, conv_bias,
&conv_user_bias_memory);
&conv_bias_md, &conv_dst_md, conv_strides, conv_padding,
conv_padding));
&conv_pd, &conv_any_desc, NULL, engine, NULL));
dnnl_memory_t conv_internal_src_memory, conv_internal_weights_memory,
conv_internal_dst_memory;
&conv_internal_dst_memory, dst_md, engine, DNNL_MEMORY_ALLOCATE));
CHECK(prepare_reorder(&conv_user_src_memory, src_md, engine, 1,
&conv_internal_src_memory, &conv_reorder_src, &n, net, net_args));
CHECK(prepare_reorder(&conv_user_weights_memory, weights_md, engine, 1,
&conv_internal_weights_memory, &conv_reorder_weights, &n, net,
net_args));
? conv_internal_src_memory
: conv_user_src_memory;
? conv_internal_weights_memory
: conv_user_weights_memory;
net[n] = conv;
prepare_arg_node(&net_args[n], 4);
set_arg(&net_args[n].args[0], DNNL_ARG_SRC, conv_src_memory);
set_arg(&net_args[n].args[1], DNNL_ARG_WEIGHTS, conv_weights_memory);
set_arg(&net_args[n].args[2], DNNL_ARG_BIAS, conv_user_bias_memory);
set_arg(&net_args[n].args[3], DNNL_ARG_DST, conv_internal_dst_memory);
n++;
float negative_slope = 1.0f;
&relu_dst_memory, relu_dst_md, engine, DNNL_MEMORY_ALLOCATE));
net[n] = relu;
prepare_arg_node(&net_args[n], 2);
set_arg(&net_args[n].args[0], DNNL_ARG_SRC, conv_internal_dst_memory);
set_arg(&net_args[n].args[1], DNNL_ARG_DST, relu_dst_memory);
n++;
uint32_t local_size = 5;
float alpha = 0.0001f;
float beta = 0.75f;
float k = 1.0f;
&lrn_dst_memory, lrn_dst_md, engine, DNNL_MEMORY_ALLOCATE));
&lrn_ws_memory, lrn_ws_md, engine, DNNL_MEMORY_ALLOCATE));
net[n] = lrn;
prepare_arg_node(&net_args[n], 3);
set_arg(&net_args[n].args[0], DNNL_ARG_SRC, relu_dst_memory);
set_arg(&net_args[n].args[1], DNNL_ARG_DST, lrn_dst_memory);
set_arg(&net_args[n].args[2], DNNL_ARG_WORKSPACE, lrn_ws_memory);
n++;
dnnl_dim_t pool_dst_sizes[4] = {BATCH, OC, POOL_OH, POOL_OW};
dnnl_dim_t pool_strides[2] = {POOL_STRIDE, POOL_STRIDE};
dnnl_dim_t pool_padding[2] = {POOL_PAD, POOL_PAD};
&pool_user_dst_memory);
pool_kernel, pool_padding, pool_padding));
&pool_ws_memory, pool_ws_md, engine, DNNL_MEMORY_ALLOCATE));
n += 1;
CHECK(prepare_reorder(&pool_user_dst_memory, pool_dst_md, engine, 0,
&pool_internal_dst_memory, &pool_reorder_dst, &n, net, net_args));
n -= pool_reorder_dst ? 2 : 1;
pool_dst_memory = pool_internal_dst_memory ? pool_internal_dst_memory
: pool_user_dst_memory;
net[n] = pool;
prepare_arg_node(&net_args[n], 3);
set_arg(&net_args[n].args[0], DNNL_ARG_SRC, lrn_dst_memory);
set_arg(&net_args[n].args[1], DNNL_ARG_DST, pool_dst_memory);
set_arg(&net_args[n].args[2], DNNL_ARG_WORKSPACE, pool_ws_memory);
n++;
if (pool_reorder_dst) n += 1;
for (uint32_t i = 0; i < n; ++i) {
net[i], stream, net_args[i].nargs, net_args[i].args));
}
for (uint32_t i = 0; i < n; ++i)
free_arg_node(&net_args[i]);
free(net_src);
free(net_dst);
free(conv_weights);
free(conv_bias);
}
int main(int argc, char **argv) {
dnnl_status_t result = simple_net(parse_engine_kind(argc, argv));
printf("%s\n",
(result ==
dnnl_success) ?
"Simple net f32 inference passed!" : "Simple net f32 inference failed!");
return result;
}