This C API example demonstrates programming flow when reordering memory between CPU and GPU engines
#include <stdio.h>
#include <stdlib.h>
#define CHECK(f) \
do { \
dnnl_status_t s = f; \
if (s != dnnl_success) { \
printf("[%s:%d] error: %s returns %d\n", __FILE__, __LINE__, #f, \
s); \
exit(2); \
} \
} while (0)
size_t product(
int n_dims,
const dnnl_dim_t dims[]) {
size_t n_elems = 1;
for (int d = 0; d < n_dims; ++d) {
n_elems *= (size_t)dims[d];
}
return n_elems;
}
float *array;
const size_t n_elems = product(n_dims, dims);
for (size_t e = 0; e < n_elems; ++e) {
array[e] = e % 7 ? 1.0f : -1.0f;
}
}
int negs = 0;
float *array;
const size_t n_elems = product(n_dims, dims);
for (size_t e = 0; e < n_elems; ++e) {
negs += array[e] < 0.0f;
}
return negs;
}
int doit() {
&m_cpu, &m_cpu_md, engine_cpu, DNNL_MEMORY_ALLOCATE));
&m_gpu, &m_gpu_md, engine_gpu, DNNL_MEMORY_ALLOCATE));
fill(m_cpu, 4, tz);
if (find_negative(m_cpu, 4, tz) == 0) {
printf("Please fix filling of data\n");
exit(2);
}
&r1_pd, &m_cpu_md, engine_cpu, &m_gpu_md, engine_gpu, NULL));
&relu_pd, &relu_d, NULL, engine_gpu, NULL));
&r2_pd, &m_gpu_md, engine_gpu, &m_cpu_md, engine_cpu, NULL));
dnnl_exec_arg_t r1_args[] = {{DNNL_ARG_FROM, m_cpu}, {DNNL_ARG_TO, m_gpu}};
= {{DNNL_ARG_SRC, m_gpu}, {DNNL_ARG_DST, m_gpu}};
dnnl_exec_arg_t r2_args[] = {{DNNL_ARG_FROM, m_gpu}, {DNNL_ARG_TO, m_cpu}};
if (find_negative(m_cpu, 4, tz) != 0) return 2;
return 0;
}
int main() {
int result = doit();
if (result)
printf("failed\n");
else
printf("passed\n");
return result;
}