9#include <sycl/sycl.hpp>
13template <
typename Selector> sycl::device select_device(Selector &&selector) {
17 d = sycl::device(std::forward<Selector>(selector));
18 std::cout <<
"Running on device \""
19 << d.get_info<sycl::info::device::name>() <<
"\"" << std::endl;
20 }
catch (sycl::exception
const &e) {
21 std::cout <<
"Cannot select an accelerator\n" << e.what() <<
"\n";
22 std::cout <<
"Using a CPU device\n";
23 d = sycl::device(sycl::cpu_selector_v);
28inline void list_devices() {
29 auto platforms = sycl::platform::get_platforms();
31 for (
auto &platform : platforms) {
32 std::cout <<
"Platform: " << platform.get_info<sycl::info::platform::name>()
35 auto devices = platform.get_devices();
36 for (
auto &device : devices) {
37 std::cout <<
" Device: " << device.get_info<sycl::info::device::name>()
43inline void print_device_details(std::span<sycl::device> devices) {
44 std::size_t device_id = 0;
45 for (
auto &&device : devices) {
46 std::cout <<
"Device " << device_id <<
": "
47 << device.get_info<sycl::info::device::name>() << std::endl;
52template <
typename Selector>
void list_devices(Selector &&selector) {
53 sycl::platform p(std::forward<Selector>(selector));
54 auto devices = p.get_devices();
56 printf(
"--Platform Info-----------------\n");
58 printf(
"Platform %s has %lu root devices.\n",
59 p.get_info<sycl::info::platform::name>().c_str(), devices.size());
61 for (std::size_t i = 0; i < devices.size(); i++) {
62 auto &&device = devices[i];
64 printf(
" %lu %s\n", i,
65 device.get_info<sycl::info::device::name>().c_str());
67 auto subdevices = device.create_sub_devices<
68 sycl::info::partition_property::partition_by_affinity_domain>(
69 sycl::info::partition_affinity_domain::numa);
71 printf(
" Subdevices:\n");
72 for (std::size_t j = 0; j < subdevices.size(); j++) {
73 auto &&subdevice = subdevices[j];
74 printf(
" %lu.%lu %s\n", i, j,
75 subdevice.get_info<sycl::info::device::name>().c_str());
79 printf(
"--------------------------------\n");
82inline std::vector<sycl::device>
83trim_devices(
const std::vector<sycl::device> &devices, std::size_t n_devices) {
84 std::vector<sycl::device> trimmed_devices = devices;
86 if (n_devices < devices.size()) {
87 trimmed_devices.resize(n_devices);
89 return trimmed_devices;
92template <
typename Selector>
93std::vector<sycl::device> get_numa_devices_impl_(Selector &&selector) {
94 std::vector<sycl::device> devices;
96 sycl::platform p(std::forward<Selector>(selector));
97 auto root_devices = p.get_devices();
99 for (
auto &&root_device : root_devices) {
100 auto subdevices = root_device.create_sub_devices<
101 sycl::info::partition_property::partition_by_affinity_domain>(
102 sycl::info::partition_affinity_domain::numa);
104 for (
auto &&subdevice : subdevices) {
105 devices.push_back(subdevice);
112template <
typename Selector>
113std::vector<sycl::device> get_devices(Selector &&selector) {
114 sycl::platform p(std::forward<Selector>(selector));
115 return p.get_devices();
118template <
typename Selector>
119std::vector<sycl::device> get_numa_devices(Selector &&selector) {
120#ifdef USE_OMP_INTEROP
122 static std::vector<sycl::queue *> ompt_queues_;
123 if (ompt_queues_.empty()) {
124 int num_devices = omp_get_num_devices();
125 for (
int d = 0; d < num_devices; ++d) {
126 omp_interop_t interop =
nullptr;
127#pragma omp interop device(d) init(prefer_type("sycl"), targetsync : interop)
129 sycl::queue *omp_queue =
static_cast<sycl::queue *
>(
130 omp_get_interop_ptr(interop, omp_ipr_targetsync, &result));
131 if (result != omp_irc_success)
132 throw std::runtime_error(
133 "Fail to obtain sycl::queue by openmp::interop");
134 ompt_queues_.emplace_back(omp_queue);
137 std::vector<sycl::device> devices;
138 for (
auto &&q : ompt_queues_) {
139 devices.push_back(q->get_device());
144 return get_numa_devices_impl_(std::forward<Selector>(selector));
145 }
catch (sycl::exception
const &e) {
146 if (e.code() == sycl::errc::feature_not_supported) {
147 std::cerr <<
"NUMA partitioning not supported, returning root devices..."
149 return get_devices(std::forward<Selector>(selector));
159template <
typename Selector>
160std::vector<sycl::device> get_duplicated_devices(Selector &&selector,
162 auto devices = get_numa_devices(std::forward<Selector>(selector));
164 if (devices.size() >= n) {
165 return std::vector<sycl::device>(devices.begin(), devices.begin() + n);
168 while (devices.size() < n) {
169 auto d = devices[i++];
170 devices.push_back(d);
176template <
typename Range>
void print_range(Range &&r, std::string label =
"") {
177 std::size_t indent = 1;
180 std::cout <<
"\"" << label <<
"\": ";
181 indent += label.size() + 4;
184 std::string indent_whitespace(indent,
' ');
187 std::size_t columns = 10;
188 std::size_t count = 1;
189 for (
auto iter = r.begin(); iter != r.end(); ++iter) {
190 std::cout << static_cast<rng::range_value_t<Range>>(*iter);
194 if (next != r.end()) {
196 if (count % columns == 0) {
197 std::cout <<
"\n" << indent_whitespace;
202 std::cout <<
"]" << std::endl;
205template <
typename Matrix>
206void print_matrix(Matrix &&m, std::string label =
"") {
207 std::cout << m.shape()[0] <<
" x " << m.shape()[1] <<
" matrix with "
208 << m.size() <<
" stored values";
210 std::cout <<
" \"" << label <<
"\"";
212 std::cout << std::endl;
214 for (
auto &&tuple : m) {
215 auto &&[index, value] = tuple;
216 auto &&[i, j] = index;
218 std::cout <<
"(" << i <<
", " << j <<
"): " << value << std::endl;
222template <
typename R>
void print_range_details(R &&r, std::string label =
"") {
224 std::cout <<
"\"" << label <<
"\" ";
227 std::cout <<
"distributed range with " << rng::size(dr::ranges::segments(r))
228 <<
" segments." << std::endl;
231 for (
auto &&segment : dr::ranges::segments(r)) {
232 std::cout <<
"Seg " << idx++ <<
", size " << segment.size() <<
" (rank "
233 << dr::ranges::rank(segment) <<
")" << std::endl;
237template <dr::distributed_range R>
238void range_details(R &&r, std::size_t width = 80) {
239 std::size_t size = rng::size(r);
241 for (
auto &&[idx, segment] :
242 dr::__detail::enumerate(dr::ranges::segments(r))) {
243 std::size_t local_size = rng::size(segment);
245 double percent = double(local_size) / size;
247 std::size_t num_chars = percent * width;
248 num_chars = std::max(num_chars, std::size_t(3));
250 std::size_t whitespace = num_chars - 3;
252 std::size_t initial_whitespace = whitespace / 2;
253 std::size_t after_whitespace = whitespace - initial_whitespace;
255 std::cout <<
"[" << std::string(initial_whitespace,
' ')
256 << dr::ranges::rank(segment) << std::string(after_whitespace,
' ')
259 std::cout << std::endl;
266 { t(device) } -> std::convertible_to<int>;