7#include <dr/detail/generate_csr.hpp>
8#include <dr/detail/index.hpp>
9#include <dr/detail/matrix_entry.hpp>
10#include <dr/sp/algorithms/copy.hpp>
11#include <dr/sp/containers/matrix_partition.hpp>
12#include <dr/sp/device_vector.hpp>
13#include <dr/sp/distributed_span.hpp>
14#include <dr/sp/init.hpp>
15#include <dr/views/csr_matrix_view.hpp>
20template <rng::random_access_range Segments>
21 requires(rng::viewable_range<Segments>)
24 using segment_type = rng::range_value_t<Segments>;
26 using value_type = rng::range_value_t<segment_type>;
28 using size_type = rng::range_size_t<segment_type>;
29 using difference_type = rng::range_difference_t<segment_type>;
31 using reference = rng::range_reference_t<segment_type>;
33 using iterator_category = std::random_access_iterator_tag;
47 size_type idx) noexcept
48 : segments_(rng::views::all(std::forward<Segments>(segments))),
49 segment_id_(segment_id), idx_(idx) {
50 while (idx_ >= rng::size((*(segments_.begin() + segment_id_))) &&
51 segment_id_ < rng::size(segments_)) {
58 operator+=(difference_type offset)
noexcept {
61 difference_type current_offset = std::min(
63 difference_type(rng::size(*(segments_.begin() + segment_id_))) -
64 difference_type(idx_));
65 idx_ += current_offset;
66 offset -= current_offset;
68 while (idx_ >= rng::size((*(segments_.begin() + segment_id_))) &&
69 segment_id_ < rng::size(segments_)) {
76 difference_type current_offset =
77 std::min(-offset, difference_type(idx_) + 1);
79 difference_type new_idx = difference_type(idx_) - current_offset;
81 while (new_idx < 0 && segment_id_ > 0) {
83 new_idx = rng::size(*(segments_.begin() + segment_id_)) - 1;
87 offset += current_offset;
95 return segment_id_ == other.segment_id_ && idx_ == other.idx_;
98 constexpr difference_type
100 return difference_type(get_global_idx()) - other.get_global_idx();
104 if (segment_id_ < other.segment_id_) {
106 }
else if (segment_id_ == other.segment_id_) {
107 return idx_ < other.idx_;
113 constexpr reference operator*()
const noexcept {
114 return *((*(segments_.begin() + segment_id_)).begin() + idx_);
118 size_type get_global_idx()
const noexcept {
119 size_type cumulative_size = 0;
120 for (std::size_t i = 0; i < segment_id_; i++) {
121 cumulative_size += segments_[i].size();
123 return cumulative_size + idx_;
126 rng::views::all_t<Segments> segments_;
127 size_type segment_id_ = 0;
131template <
typename Segments>
137 using size_type = std::size_t;
138 using difference_type = std::ptrdiff_t;
142 using scalar_reference = rng::range_reference_t<
144 using const_scalar_reference = rng::range_reference_t<
154 rng::iterator_t<dr::sp::device_vector<T, dr::sp::device_allocator<T>>>,
155 rng::iterator_t<dr::sp::device_vector<I, dr::sp::device_allocator<I>>>>;
163 : shape_(shape), partition_(default_partition_()) {
168 : shape_(shape), partition_(default_partition_()) {
169 init_random_(density);
174 : shape_(shape), partition_(partition.clone()) {
175 init_random_(density);
179 : shape_(shape), partition_(partition.clone()) {
183 size_type size()
const noexcept {
return total_nnz_; }
185 key_type shape()
const noexcept {
return shape_; }
190 return iterator(segments(), grid_shape_[0] * grid_shape_[1], 0);
194 std::size_t tile_idx = tile_index[0] * grid_shape_[1] + tile_index[1];
195 auto values = values_[tile_idx].begin();
196 auto rowptr = rowptr_[tile_idx].begin();
197 auto colind = colind_[tile_idx].begin();
198 auto nnz = nnz_[tile_idx];
201 std::min(tile_shape_[0], shape()[0] - tile_index[0] * tile_shape_[0]);
203 std::min(tile_shape_[1], shape()[1] - tile_index[1] * tile_shape_[1]);
206 values_[tile_idx].rank());
211 template <
typename... Args>
212 auto copy_tile_async(
key_type tile_index,
214 std::size_t tile_idx = tile_index[0] * grid_shape_[1] + tile_index[1];
215 auto &&values = values_[tile_idx];
216 auto &&colind = colind_[tile_idx];
217 auto &&rowptr = rowptr_[tile_idx];
218 auto &&nnz = nnz_[tile_idx];
221 nnz = tile_view.size();
223 total_nnz_ += tile_view.size();
225 values.resize(tile_view.size());
226 colind.resize(tile_view.size());
227 rowptr.resize(tile_view.shape()[0] + 1);
229 auto v_e = dr::sp::copy_async(tile_view.values_data(),
230 tile_view.values_data() + values.size(),
233 auto c_e = dr::sp::copy_async(tile_view.colind_data(),
234 tile_view.colind_data() + colind.size(),
237 auto r_e = dr::sp::copy_async(tile_view.rowptr_data(),
238 tile_view.rowptr_data() + rowptr.size(),
241 tiles_ = generate_tiles_();
242 segments_ = generate_segments_();
248 return __detail::combine_events({v_e, c_e, r_e});
251 template <
typename... Args>
254 copy_tile_async(tile_index, tile_view).wait();
257 key_type tile_shape()
const noexcept {
return tile_shape_; }
259 key_type grid_shape()
const noexcept {
return grid_shape_; }
261 std::span<segment_type> tiles() {
return std::span(tiles_); }
263 std::span<segment_type> segments() {
return std::span(segments_); }
266 std::vector<segment_type> generate_tiles_() {
267 std::vector<segment_type> views_;
269 for (std::size_t i = 0; i < grid_shape_[0]; i++) {
270 for (std::size_t j = 0; j < grid_shape_[1]; j++) {
271 std::size_t tm = std::min<std::size_t>(tile_shape_[0],
272 shape()[0] - i * tile_shape_[0]);
273 std::size_t tn = std::min<std::size_t>(tile_shape_[1],
274 shape()[1] - j * tile_shape_[1]);
276 std::size_t tile_idx = i * grid_shape_[1] + j;
278 auto values = values_[tile_idx].begin();
279 auto rowptr = rowptr_[tile_idx].begin();
280 auto colind = colind_[tile_idx].begin();
281 auto nnz = nnz_[tile_idx];
283 views_.emplace_back(values, rowptr, colind,
key_type(tm, tn), nnz,
284 values_[tile_idx].rank());
290 std::vector<segment_type> generate_segments_() {
291 std::vector<segment_type> views_;
293 for (std::size_t i = 0; i < grid_shape_[0]; i++) {
294 for (std::size_t j = 0; j < grid_shape_[1]; j++) {
295 std::size_t tm = std::min<std::size_t>(tile_shape_[0],
296 shape()[0] - i * tile_shape_[0]);
297 std::size_t tn = std::min<std::size_t>(tile_shape_[1],
298 shape()[1] - j * tile_shape_[1]);
300 std::size_t tile_idx = i * grid_shape_[1] + j;
302 auto values = values_[tile_idx].begin();
303 auto rowptr = rowptr_[tile_idx].begin();
304 auto colind = colind_[tile_idx].begin();
305 auto nnz = nnz_[tile_idx];
307 std::size_t m_offset = i * tile_shape_[0];
308 std::size_t n_offset = j * tile_shape_[1];
310 views_.emplace_back(values, rowptr, colind,
key_type(tm, tn), nnz,
311 values_[i * grid_shape_[1] + j].rank(),
320 grid_shape_ =
key_type(partition_->grid_shape(shape()));
321 tile_shape_ =
key_type(partition_->tile_shape(shape()));
323 values_.reserve(grid_shape_[0] * grid_shape_[1]);
324 rowptr_.reserve(grid_shape_[0] * grid_shape_[1]);
325 colind_.reserve(grid_shape_[0] * grid_shape_[1]);
326 nnz_.reserve(grid_shape_[0] * grid_shape_[1]);
328 for (std::size_t i = 0; i < grid_shape_[0]; i++) {
329 for (std::size_t j = 0; j < grid_shape_[1]; j++) {
330 std::size_t rank = partition_->tile_rank(shape(), {i, j});
332 auto device = dr::sp::devices()[rank];
336 values_.emplace_back(1, alloc, rank);
337 rowptr_.emplace_back(2, i_alloc, rank);
338 colind_.emplace_back(1, i_alloc, rank);
340 rowptr_.back()[0] = 0;
341 rowptr_.back()[1] = 0;
344 tiles_ = generate_tiles_();
345 segments_ = generate_segments_();
348 void init_random_(
double density) {
349 grid_shape_ =
key_type(partition_->grid_shape(shape()));
350 tile_shape_ =
key_type(partition_->tile_shape(shape()));
352 values_.reserve(grid_shape_[0] * grid_shape_[1]);
353 rowptr_.reserve(grid_shape_[0] * grid_shape_[1]);
354 colind_.reserve(grid_shape_[0] * grid_shape_[1]);
355 nnz_.reserve(grid_shape_[0] * grid_shape_[1]);
357 for (std::size_t i = 0; i < grid_shape_[0]; i++) {
358 for (std::size_t j = 0; j < grid_shape_[1]; j++) {
359 std::size_t rank = partition_->tile_rank(shape(), {i, j});
361 std::size_t tm = std::min<std::size_t>(tile_shape_[0],
362 shape()[0] - i * tile_shape_[0]);
363 std::size_t tn = std::min<std::size_t>(tile_shape_[1],
364 shape()[1] - j * tile_shape_[1]);
366 auto device = dr::sp::devices()[rank];
370 auto seed = i * grid_shape_[1] + j;
372 auto csr = generate_random_csr<T, I>(
key_type(tm, tn), density, seed);
373 std::size_t nnz = csr.size();
376 csr.size(), alloc, rank);
378 tm + 1, i_alloc, rank);
381 csr.size(), i_alloc, rank);
383 dr::sp::copy(csr.values_data(), csr.values_data() + csr.size(),
385 dr::sp::copy(csr.rowptr_data(), csr.rowptr_data() + tm + 1,
387 dr::sp::copy(csr.colind_data(), csr.colind_data() + csr.size(),
390 values_.push_back(std::move(values));
391 rowptr_.emplace_back(std::move(rowptr));
392 colind_.emplace_back(std::move(colind));
396 delete[] csr.values_data();
397 delete[] csr.rowptr_data();
398 delete[] csr.colind_data();
401 tiles_ = generate_tiles_();
402 segments_ = generate_segments_();
405 std::unique_ptr<dr::sp::matrix_partition> default_partition_() {
406 auto ptr = dr::sp::row_cyclic();
407 return std::make_unique<dr::sp::block_cyclic>(ptr);
414 std::unique_ptr<dr::sp::matrix_partition> partition_;
416 std::vector<dr::sp::device_vector<T, dr::sp::device_allocator<T>>> values_;
417 std::vector<dr::sp::device_vector<I, dr::sp::device_allocator<I>>> rowptr_;
418 std::vector<dr::sp::device_vector<I, dr::sp::device_allocator<I>>> colind_;
420 std::vector<std::size_t> nnz_;
421 std::size_t total_nnz_ = 0;
423 std::vector<segment_type> tiles_;
424 std::vector<segment_type> segments_;
Definition: iterator_adaptor.hpp:23
Definition: matrix_entry.hpp:20
Definition: matrix_entry.hpp:115
Definition: allocators.hpp:20
Definition: device_vector.hpp:13
Definition: sparse_matrix.hpp:22
Definition: matrix_partition.hpp:23
Definition: sparse_matrix.hpp:135
Definition: csr_matrix_view.hpp:126