This C API example demonstrates programming flow when reordering memory between CPU and GPU engines.
#include <stdio.h>
#include <stdlib.h>
#include "example_utils.h"
size_t product(
int n_dims,
const dnnl_dim_t dims[]) {
size_t n_elems = 1;
for (int d = 0; d < n_dims; ++d) {
n_elems *= (size_t)dims[d];
}
return n_elems;
}
const size_t n_elems = product(n_dims, dims);
float *array = (float *)malloc(n_elems * sizeof(float));
for (size_t e = 0; e < n_elems; ++e) {
array[e] = e % 7 ? 1.0f : -1.0f;
}
write_to_dnnl_memory(array, mem);
free(array);
}
const size_t n_elems = product(n_dims, dims);
float *array = (float *)malloc(n_elems * sizeof(float));
read_from_dnnl_memory(array, mem);
int negs = 0;
for (size_t e = 0; e < n_elems; ++e) {
negs += array[e] < 0.0f;
}
free(array);
return negs;
}
void cross_engine_reorder() {
&m_cpu, &m_cpu_md, engine_cpu, DNNL_MEMORY_ALLOCATE));
&m_gpu, &m_gpu_md, engine_gpu, DNNL_MEMORY_ALLOCATE));
fill(m_cpu, 4, tz);
if (find_negative(m_cpu, 4, tz) == 0)
COMPLAIN_EXAMPLE_ERROR_AND_EXIT(
"%s", "incorrect data fill, no negative values found");
&r1_pd, &m_cpu_md, engine_cpu, &m_gpu_md, engine_gpu, NULL));
&relu_pd, &relu_d, NULL, engine_gpu, NULL));
&r2_pd, &m_gpu_md, engine_gpu, &m_cpu_md, engine_cpu, NULL));
if (find_negative(m_cpu, 4, tz) != 0)
COMPLAIN_EXAMPLE_ERROR_AND_EXIT(
"%s", "found negative values after ReLU applied");
}
int main() {
cross_engine_reorder();
printf("Example passed on CPU/GPU.\n");
return 0;
}