Distributed Ranges
Loading...
Searching...
No Matches
util.hpp
1// SPDX-FileCopyrightText: Intel Corporation
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#pragma once
6
7#include <iostream>
8#include <omp.h>
9#include <sycl/sycl.hpp>
10
11namespace dr::sp {
12
13template <typename Selector> sycl::device select_device(Selector &&selector) {
14 sycl::device d;
15
16 try {
17 d = sycl::device(std::forward<Selector>(selector));
18 std::cout << "Running on device \""
19 << d.get_info<sycl::info::device::name>() << "\"" << std::endl;
20 } catch (sycl::exception const &e) {
21 std::cout << "Cannot select an accelerator\n" << e.what() << "\n";
22 std::cout << "Using a CPU device\n";
23 d = sycl::device(sycl::cpu_selector_v);
24 }
25 return d;
26}
27
28inline void list_devices() {
29 auto platforms = sycl::platform::get_platforms();
30
31 for (auto &platform : platforms) {
32 std::cout << "Platform: " << platform.get_info<sycl::info::platform::name>()
33 << std::endl;
34
35 auto devices = platform.get_devices();
36 for (auto &device : devices) {
37 std::cout << " Device: " << device.get_info<sycl::info::device::name>()
38 << std::endl;
39 }
40 }
41}
42
43inline void print_device_details(std::span<sycl::device> devices) {
44 std::size_t device_id = 0;
45 for (auto &&device : devices) {
46 std::cout << "Device " << device_id << ": "
47 << device.get_info<sycl::info::device::name>() << std::endl;
48 device_id++;
49 }
50}
51
52template <typename Selector> void list_devices(Selector &&selector) {
53 sycl::platform p(std::forward<Selector>(selector));
54 auto devices = p.get_devices();
55
56 printf("--Platform Info-----------------\n");
57
58 printf("Platform %s has %lu root devices.\n",
59 p.get_info<sycl::info::platform::name>().c_str(), devices.size());
60
61 for (std::size_t i = 0; i < devices.size(); i++) {
62 auto &&device = devices[i];
63
64 printf(" %lu %s\n", i,
65 device.get_info<sycl::info::device::name>().c_str());
66
67 auto subdevices = device.create_sub_devices<
68 sycl::info::partition_property::partition_by_affinity_domain>(
69 sycl::info::partition_affinity_domain::numa);
70
71 printf(" Subdevices:\n");
72 for (std::size_t j = 0; j < subdevices.size(); j++) {
73 auto &&subdevice = subdevices[j];
74 printf(" %lu.%lu %s\n", i, j,
75 subdevice.get_info<sycl::info::device::name>().c_str());
76 }
77 }
78
79 printf("--------------------------------\n");
80}
81
82inline std::vector<sycl::device>
83trim_devices(const std::vector<sycl::device> &devices, std::size_t n_devices) {
84 std::vector<sycl::device> trimmed_devices = devices;
85
86 if (n_devices < devices.size()) {
87 trimmed_devices.resize(n_devices);
88 }
89 return trimmed_devices;
90}
91
92template <typename Selector>
93std::vector<sycl::device> get_numa_devices_impl_(Selector &&selector) {
94 std::vector<sycl::device> devices;
95
96 sycl::platform p(std::forward<Selector>(selector));
97 auto root_devices = p.get_devices();
98
99 for (auto &&root_device : root_devices) {
100 auto subdevices = root_device.create_sub_devices<
101 sycl::info::partition_property::partition_by_affinity_domain>(
102 sycl::info::partition_affinity_domain::numa);
103
104 for (auto &&subdevice : subdevices) {
105 devices.push_back(subdevice);
106 }
107 }
108
109 return devices;
110}
111
112template <typename Selector>
113std::vector<sycl::device> get_devices(Selector &&selector) {
114 sycl::platform p(std::forward<Selector>(selector));
115 return p.get_devices();
116}
117
118template <typename Selector>
119std::vector<sycl::device> get_numa_devices(Selector &&selector) {
120#ifdef USE_OMP_INTEROP
121 // possible to move ompt_queues_ sp::__detail as a global object
122 static std::vector<sycl::queue *> ompt_queues_;
123 if (ompt_queues_.empty()) {
124 int num_devices = omp_get_num_devices();
125 for (int d = 0; d < num_devices; ++d) {
126 omp_interop_t interop = nullptr;
127#pragma omp interop device(d) init(prefer_type("sycl"), targetsync : interop)
128 int result;
129 sycl::queue *omp_queue = static_cast<sycl::queue *>(
130 omp_get_interop_ptr(interop, omp_ipr_targetsync, &result));
131 if (result != omp_irc_success)
132 throw std::runtime_error(
133 "Fail to obtain sycl::queue by openmp::interop");
134 ompt_queues_.emplace_back(omp_queue);
135 }
136 }
137 std::vector<sycl::device> devices;
138 for (auto &&q : ompt_queues_) {
139 devices.push_back(q->get_device());
140 }
141 return devices;
142#else
143 try {
144 return get_numa_devices_impl_(std::forward<Selector>(selector));
145 } catch (sycl::exception const &e) {
146 if (e.code() == sycl::errc::feature_not_supported) {
147 std::cerr << "NUMA partitioning not supported, returning root devices..."
148 << std::endl;
149 return get_devices(std::forward<Selector>(selector));
150 } else {
151 throw;
152 }
153 }
154#endif
155}
156
157// Return exactly `n` devices obtained using the selector `selector`.
158// May duplicate devices
159template <typename Selector>
160std::vector<sycl::device> get_duplicated_devices(Selector &&selector,
161 std::size_t n) {
162 auto devices = get_numa_devices(std::forward<Selector>(selector));
163
164 if (devices.size() >= n) {
165 return std::vector<sycl::device>(devices.begin(), devices.begin() + n);
166 } else {
167 std::size_t i = 0;
168 while (devices.size() < n) {
169 auto d = devices[i++];
170 devices.push_back(d);
171 }
172 return devices;
173 }
174}
175
176template <typename Range> void print_range(Range &&r, std::string label = "") {
177 std::size_t indent = 1;
178
179 if (label != "") {
180 std::cout << "\"" << label << "\": ";
181 indent += label.size() + 4;
182 }
183
184 std::string indent_whitespace(indent, ' ');
185
186 std::cout << "[";
187 std::size_t columns = 10;
188 std::size_t count = 1;
189 for (auto iter = r.begin(); iter != r.end(); ++iter) {
190 std::cout << static_cast<rng::range_value_t<Range>>(*iter);
191
192 auto next = iter;
193 ++next;
194 if (next != r.end()) {
195 std::cout << ", ";
196 if (count % columns == 0) {
197 std::cout << "\n" << indent_whitespace;
198 }
199 }
200 ++count;
201 }
202 std::cout << "]" << std::endl;
203}
204
205template <typename Matrix>
206void print_matrix(Matrix &&m, std::string label = "") {
207 std::cout << m.shape()[0] << " x " << m.shape()[1] << " matrix with "
208 << m.size() << " stored values";
209 if (label != "") {
210 std::cout << " \"" << label << "\"";
211 }
212 std::cout << std::endl;
213
214 for (auto &&tuple : m) {
215 auto &&[index, value] = tuple;
216 auto &&[i, j] = index;
217
218 std::cout << "(" << i << ", " << j << "): " << value << std::endl;
219 }
220}
221
222template <typename R> void print_range_details(R &&r, std::string label = "") {
223 if (label != "") {
224 std::cout << "\"" << label << "\" ";
225 }
226
227 std::cout << "distributed range with " << rng::size(dr::ranges::segments(r))
228 << " segments." << std::endl;
229
230 std::size_t idx = 0;
231 for (auto &&segment : dr::ranges::segments(r)) {
232 std::cout << "Seg " << idx++ << ", size " << segment.size() << " (rank "
233 << dr::ranges::rank(segment) << ")" << std::endl;
234 }
235}
236
237template <dr::distributed_range R>
238void range_details(R &&r, std::size_t width = 80) {
239 std::size_t size = rng::size(r);
240
241 for (auto &&[idx, segment] :
242 dr::__detail::enumerate(dr::ranges::segments(r))) {
243 std::size_t local_size = rng::size(segment);
244
245 double percent = double(local_size) / size;
246
247 std::size_t num_chars = percent * width;
248 num_chars = std::max(num_chars, std::size_t(3));
249
250 std::size_t whitespace = num_chars - 3;
251
252 std::size_t initial_whitespace = whitespace / 2;
253 std::size_t after_whitespace = whitespace - initial_whitespace;
254
255 std::cout << "[" << std::string(initial_whitespace, ' ')
256 << dr::ranges::rank(segment) << std::string(after_whitespace, ' ')
257 << "]";
258 }
259 std::cout << std::endl;
260}
261
262namespace __detail {
263
264template <typename T>
265concept sycl_device_selector = requires(T &t, const sycl::device &device) {
266 { t(device) } -> std::convertible_to<int>;
267};
268
269}
270
271} // namespace dr::sp