This C++ API example demonstrates programming flow when reordering memory between CPU and GPU engines.
Annotated version: Reorder between CPU and GPU engines
#include <iostream>
#include <numeric>
#include <stdexcept>
#include <vector>
#include "example_utils.hpp"
return std::accumulate(dims.begin(), dims.end(), (
memory::dim)1,
std::multiplies<memory::dim>());
}
std::vector<float> array(product(adims));
for (size_t e = 0; e < array.size(); ++e) {
array[e] = e % 7 ? 1.0f : -1.0f;
}
write_to_dnnl_memory(array.data(), mem);
}
int negs = 0;
std::vector<float> array(product(adims));
read_from_dnnl_memory(array.data(), mem);
for (size_t e = 0; e < adims.size(); ++e) {
negs += array[e] < 0.0f;
}
return negs;
}
void cross_engine_reorder_tutorial() {
auto stream_gpu =
stream(gpu_engine);
auto m_cpu
cpu_engine);
auto m_gpu
gpu_engine);
fill(m_cpu, tz);
r1.execute(stream_gpu, m_cpu, m_gpu);
r2.execute(stream_gpu, m_gpu, m_cpu);
stream_gpu.wait();
if (find_negative(m_cpu, tz) != 0)
throw std::logic_error(
"Unexpected output, find a negative value after the ReLU "
"execution.");
}
int main(int argc, char **argv) {
cross_engine_reorder_tutorial);
}