Distributed Ranges
Loading...
Searching...
No Matches
sparse_matrix.hpp
1// SPDX-FileCopyrightText: Intel Corporation
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#pragma once
6
7#include <dr/detail/generate_csr.hpp>
8#include <dr/detail/index.hpp>
9#include <dr/detail/matrix_entry.hpp>
10#include <dr/sp/algorithms/copy.hpp>
11#include <dr/sp/containers/matrix_partition.hpp>
12#include <dr/sp/device_vector.hpp>
13#include <dr/sp/distributed_span.hpp>
14#include <dr/sp/init.hpp>
15#include <dr/views/csr_matrix_view.hpp>
16#include <iterator>
17
18namespace dr::sp {
19
20template <rng::random_access_range Segments>
21 requires(rng::viewable_range<Segments>)
23public:
24 using segment_type = rng::range_value_t<Segments>;
25
26 using value_type = rng::range_value_t<segment_type>;
27
28 using size_type = rng::range_size_t<segment_type>;
29 using difference_type = rng::range_difference_t<segment_type>;
30
31 using reference = rng::range_reference_t<segment_type>;
32
33 using iterator_category = std::random_access_iterator_tag;
34
38
39 constexpr distributed_range_accessor() noexcept = default;
40 constexpr ~distributed_range_accessor() noexcept = default;
42 const distributed_range_accessor &) noexcept = default;
44 operator=(const distributed_range_accessor &) noexcept = default;
45
46 constexpr distributed_range_accessor(Segments segments, size_type segment_id,
47 size_type idx) noexcept
48 : segments_(rng::views::all(std::forward<Segments>(segments))),
49 segment_id_(segment_id), idx_(idx) {
50 while (idx_ >= rng::size((*(segments_.begin() + segment_id_))) &&
51 segment_id_ < rng::size(segments_)) {
52 segment_id_++;
53 idx_ = 0;
54 }
55 }
56
58 operator+=(difference_type offset) noexcept {
59
60 while (offset > 0) {
61 difference_type current_offset = std::min(
62 offset,
63 difference_type(rng::size(*(segments_.begin() + segment_id_))) -
64 difference_type(idx_));
65 idx_ += current_offset;
66 offset -= current_offset;
67
68 while (idx_ >= rng::size((*(segments_.begin() + segment_id_))) &&
69 segment_id_ < rng::size(segments_)) {
70 segment_id_++;
71 idx_ = 0;
72 }
73 }
74
75 while (offset < 0) {
76 difference_type current_offset =
77 std::min(-offset, difference_type(idx_) + 1);
78
79 difference_type new_idx = difference_type(idx_) - current_offset;
80
81 while (new_idx < 0 && segment_id_ > 0) {
82 segment_id_--;
83 new_idx = rng::size(*(segments_.begin() + segment_id_)) - 1;
84 }
85
86 idx_ = new_idx;
87 offset += current_offset;
88 }
89
90 assert(offset == 0);
91 return *this;
92 }
93
94 constexpr bool operator==(const iterator_accessor &other) const noexcept {
95 return segment_id_ == other.segment_id_ && idx_ == other.idx_;
96 }
97
98 constexpr difference_type
99 operator-(const iterator_accessor &other) const noexcept {
100 return difference_type(get_global_idx()) - other.get_global_idx();
101 }
102
103 constexpr bool operator<(const iterator_accessor &other) const noexcept {
104 if (segment_id_ < other.segment_id_) {
105 return true;
106 } else if (segment_id_ == other.segment_id_) {
107 return idx_ < other.idx_;
108 } else {
109 return false;
110 }
111 }
112
113 constexpr reference operator*() const noexcept {
114 return *((*(segments_.begin() + segment_id_)).begin() + idx_);
115 }
116
117private:
118 size_type get_global_idx() const noexcept {
119 size_type cumulative_size = 0;
120 for (std::size_t i = 0; i < segment_id_; i++) {
121 cumulative_size += segments_[i].size();
122 }
123 return cumulative_size + idx_;
124 }
125
126 rng::views::all_t<Segments> segments_;
127 size_type segment_id_ = 0;
128 size_type idx_ = 0;
129};
130
131template <typename Segments>
134
135template <typename T, std::integral I = std::int64_t> class sparse_matrix {
136public:
137 using size_type = std::size_t;
138 using difference_type = std::ptrdiff_t;
139
141
142 using scalar_reference = rng::range_reference_t<
144 using const_scalar_reference = rng::range_reference_t<
146
149
150 using key_type = dr::index<I>;
151
153 T, I,
154 rng::iterator_t<dr::sp::device_vector<T, dr::sp::device_allocator<T>>>,
155 rng::iterator_t<dr::sp::device_vector<I, dr::sp::device_allocator<I>>>>;
156
157 // using iterator = sparse_matrix_iterator<T, dr::sp::device_vector<T,
158 // dr::sp::device_allocator<T>>>;
159 using iterator =
161
163 : shape_(shape), partition_(default_partition_()) {
164 init_();
165 }
166
167 sparse_matrix(key_type shape, double density)
168 : shape_(shape), partition_(default_partition_()) {
169 init_random_(density);
170 }
171
172 sparse_matrix(key_type shape, double density,
173 const matrix_partition &partition)
174 : shape_(shape), partition_(partition.clone()) {
175 init_random_(density);
176 }
177
178 sparse_matrix(key_type shape, const matrix_partition &partition)
179 : shape_(shape), partition_(partition.clone()) {
180 init_();
181 }
182
183 size_type size() const noexcept { return total_nnz_; }
184
185 key_type shape() const noexcept { return shape_; }
186
187 iterator begin() { return iterator(segments(), 0, 0); }
188
189 iterator end() {
190 return iterator(segments(), grid_shape_[0] * grid_shape_[1], 0);
191 }
192
193 segment_type tile(key_type tile_index) {
194 std::size_t tile_idx = tile_index[0] * grid_shape_[1] + tile_index[1];
195 auto values = values_[tile_idx].begin();
196 auto rowptr = rowptr_[tile_idx].begin();
197 auto colind = colind_[tile_idx].begin();
198 auto nnz = nnz_[tile_idx];
199
200 std::size_t tm =
201 std::min(tile_shape_[0], shape()[0] - tile_index[0] * tile_shape_[0]);
202 std::size_t tn =
203 std::min(tile_shape_[1], shape()[1] - tile_index[1] * tile_shape_[1]);
204
205 return segment_type(values, rowptr, colind, key_type(tm, tn), nnz,
206 values_[tile_idx].rank());
207 }
208
209 // Note: this function is currently *not* asynchronous due to a deadlock
210 // in `gemv_benchmark`. I believe this is a SYCL bug.
211 template <typename... Args>
212 auto copy_tile_async(key_type tile_index,
214 std::size_t tile_idx = tile_index[0] * grid_shape_[1] + tile_index[1];
215 auto &&values = values_[tile_idx];
216 auto &&colind = colind_[tile_idx];
217 auto &&rowptr = rowptr_[tile_idx];
218 auto &&nnz = nnz_[tile_idx];
219
220 total_nnz_ -= nnz;
221 nnz = tile_view.size();
222
223 total_nnz_ += tile_view.size();
224
225 values.resize(tile_view.size());
226 colind.resize(tile_view.size());
227 rowptr.resize(tile_view.shape()[0] + 1);
228
229 auto v_e = dr::sp::copy_async(tile_view.values_data(),
230 tile_view.values_data() + values.size(),
231 values.data());
232
233 auto c_e = dr::sp::copy_async(tile_view.colind_data(),
234 tile_view.colind_data() + colind.size(),
235 colind.data());
236
237 auto r_e = dr::sp::copy_async(tile_view.rowptr_data(),
238 tile_view.rowptr_data() + rowptr.size(),
239 rowptr.data());
240
241 tiles_ = generate_tiles_();
242 segments_ = generate_segments_();
243
244 v_e.wait();
245 c_e.wait();
246 r_e.wait();
247
248 return __detail::combine_events({v_e, c_e, r_e});
249 }
250
251 template <typename... Args>
252 void copy_tile(key_type tile_index,
254 copy_tile_async(tile_index, tile_view).wait();
255 }
256
257 key_type tile_shape() const noexcept { return tile_shape_; }
258
259 key_type grid_shape() const noexcept { return grid_shape_; }
260
261 std::span<segment_type> tiles() { return std::span(tiles_); }
262
263 std::span<segment_type> segments() { return std::span(segments_); }
264
265private:
266 std::vector<segment_type> generate_tiles_() {
267 std::vector<segment_type> views_;
268
269 for (std::size_t i = 0; i < grid_shape_[0]; i++) {
270 for (std::size_t j = 0; j < grid_shape_[1]; j++) {
271 std::size_t tm = std::min<std::size_t>(tile_shape_[0],
272 shape()[0] - i * tile_shape_[0]);
273 std::size_t tn = std::min<std::size_t>(tile_shape_[1],
274 shape()[1] - j * tile_shape_[1]);
275
276 std::size_t tile_idx = i * grid_shape_[1] + j;
277
278 auto values = values_[tile_idx].begin();
279 auto rowptr = rowptr_[tile_idx].begin();
280 auto colind = colind_[tile_idx].begin();
281 auto nnz = nnz_[tile_idx];
282
283 views_.emplace_back(values, rowptr, colind, key_type(tm, tn), nnz,
284 values_[tile_idx].rank());
285 }
286 }
287 return views_;
288 }
289
290 std::vector<segment_type> generate_segments_() {
291 std::vector<segment_type> views_;
292
293 for (std::size_t i = 0; i < grid_shape_[0]; i++) {
294 for (std::size_t j = 0; j < grid_shape_[1]; j++) {
295 std::size_t tm = std::min<std::size_t>(tile_shape_[0],
296 shape()[0] - i * tile_shape_[0]);
297 std::size_t tn = std::min<std::size_t>(tile_shape_[1],
298 shape()[1] - j * tile_shape_[1]);
299
300 std::size_t tile_idx = i * grid_shape_[1] + j;
301
302 auto values = values_[tile_idx].begin();
303 auto rowptr = rowptr_[tile_idx].begin();
304 auto colind = colind_[tile_idx].begin();
305 auto nnz = nnz_[tile_idx];
306
307 std::size_t m_offset = i * tile_shape_[0];
308 std::size_t n_offset = j * tile_shape_[1];
309
310 views_.emplace_back(values, rowptr, colind, key_type(tm, tn), nnz,
311 values_[i * grid_shape_[1] + j].rank(),
312 key_type(m_offset, n_offset));
313 }
314 }
315 return views_;
316 }
317
318private:
319 void init_() {
320 grid_shape_ = key_type(partition_->grid_shape(shape()));
321 tile_shape_ = key_type(partition_->tile_shape(shape()));
322
323 values_.reserve(grid_shape_[0] * grid_shape_[1]);
324 rowptr_.reserve(grid_shape_[0] * grid_shape_[1]);
325 colind_.reserve(grid_shape_[0] * grid_shape_[1]);
326 nnz_.reserve(grid_shape_[0] * grid_shape_[1]);
327
328 for (std::size_t i = 0; i < grid_shape_[0]; i++) {
329 for (std::size_t j = 0; j < grid_shape_[1]; j++) {
330 std::size_t rank = partition_->tile_rank(shape(), {i, j});
331
332 auto device = dr::sp::devices()[rank];
333 dr::sp::device_allocator<T> alloc(dr::sp::context(), device);
334 dr::sp::device_allocator<I> i_alloc(dr::sp::context(), device);
335
336 values_.emplace_back(1, alloc, rank);
337 rowptr_.emplace_back(2, i_alloc, rank);
338 colind_.emplace_back(1, i_alloc, rank);
339 nnz_.push_back(0);
340 rowptr_.back()[0] = 0;
341 rowptr_.back()[1] = 0;
342 }
343 }
344 tiles_ = generate_tiles_();
345 segments_ = generate_segments_();
346 }
347
348 void init_random_(double density) {
349 grid_shape_ = key_type(partition_->grid_shape(shape()));
350 tile_shape_ = key_type(partition_->tile_shape(shape()));
351
352 values_.reserve(grid_shape_[0] * grid_shape_[1]);
353 rowptr_.reserve(grid_shape_[0] * grid_shape_[1]);
354 colind_.reserve(grid_shape_[0] * grid_shape_[1]);
355 nnz_.reserve(grid_shape_[0] * grid_shape_[1]);
356
357 for (std::size_t i = 0; i < grid_shape_[0]; i++) {
358 for (std::size_t j = 0; j < grid_shape_[1]; j++) {
359 std::size_t rank = partition_->tile_rank(shape(), {i, j});
360
361 std::size_t tm = std::min<std::size_t>(tile_shape_[0],
362 shape()[0] - i * tile_shape_[0]);
363 std::size_t tn = std::min<std::size_t>(tile_shape_[1],
364 shape()[1] - j * tile_shape_[1]);
365
366 auto device = dr::sp::devices()[rank];
367 dr::sp::device_allocator<T> alloc(dr::sp::context(), device);
368 dr::sp::device_allocator<I> i_alloc(dr::sp::context(), device);
369
370 auto seed = i * grid_shape_[1] + j;
371
372 auto csr = generate_random_csr<T, I>(key_type(tm, tn), density, seed);
373 std::size_t nnz = csr.size();
374
376 csr.size(), alloc, rank);
378 tm + 1, i_alloc, rank);
379
381 csr.size(), i_alloc, rank);
382
383 dr::sp::copy(csr.values_data(), csr.values_data() + csr.size(),
384 values.data());
385 dr::sp::copy(csr.rowptr_data(), csr.rowptr_data() + tm + 1,
386 rowptr.data());
387 dr::sp::copy(csr.colind_data(), csr.colind_data() + csr.size(),
388 colind.data());
389
390 values_.push_back(std::move(values));
391 rowptr_.emplace_back(std::move(rowptr));
392 colind_.emplace_back(std::move(colind));
393 nnz_.push_back(nnz);
394 total_nnz_ += nnz;
395
396 delete[] csr.values_data();
397 delete[] csr.rowptr_data();
398 delete[] csr.colind_data();
399 }
400 }
401 tiles_ = generate_tiles_();
402 segments_ = generate_segments_();
403 }
404
405 std::unique_ptr<dr::sp::matrix_partition> default_partition_() {
406 auto ptr = dr::sp::row_cyclic();
407 return std::make_unique<dr::sp::block_cyclic>(ptr);
408 }
409
410private:
411 key_type shape_;
412 key_type grid_shape_;
413 key_type tile_shape_;
414 std::unique_ptr<dr::sp::matrix_partition> partition_;
415
416 std::vector<dr::sp::device_vector<T, dr::sp::device_allocator<T>>> values_;
417 std::vector<dr::sp::device_vector<I, dr::sp::device_allocator<I>>> rowptr_;
418 std::vector<dr::sp::device_vector<I, dr::sp::device_allocator<I>>> colind_;
419
420 std::vector<std::size_t> nnz_;
421 std::size_t total_nnz_ = 0;
422
423 std::vector<segment_type> tiles_;
424 std::vector<segment_type> segments_;
425};
426
427} // namespace dr::sp
Definition: iterator_adaptor.hpp:23
Definition: matrix_entry.hpp:20
Definition: matrix_entry.hpp:115
Definition: allocators.hpp:20
Definition: device_vector.hpp:13
Definition: sparse_matrix.hpp:22
Definition: matrix_partition.hpp:23
Definition: sparse_matrix.hpp:135
Definition: csr_matrix_view.hpp:126