7#include <dr/mp/algorithms/fill.hpp>
8#include <dr/mp/allocator.hpp>
9#include <dr/mp/containers/distribution.hpp>
10#include <dr/mp/containers/segment.hpp>
18 void *allocate(std::size_t data_size) {
19 assert(data_size > 0);
21 DRLOG(
"called MPI allocate({}) -> got:{}", data_size, data);
22 win_.create(default_comm(), data, data_size);
23 active_wins().insert(win_.mpi_win());
27 void deallocate(
void *data, std::size_t data_size) {
28 assert(data_size > 0);
29 DRLOG(
"calling MPI deallocate ({}, data_size:{})", data, data_size);
30 active_wins().erase(win_.mpi_win());
36 void getmem(
void *dst, std::size_t offset, std::size_t datalen,
38 DRLOG(
"calling MPI get(dst:{}, "
39 "segm_offset:{}, size:{}, peer:{})",
40 dst, offset, datalen, segment_index);
42#if (MPI_VERSION >= 4) || \
43 (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000))
45 win_.get(dst, datalen, segment_index, offset);
47 for (std::size_t remainder = datalen, off = 0UL; remainder > 0;) {
48 std::size_t s = std::min(remainder, (std::size_t)INT_MAX);
49 DRLOG(
"{}:{} win_.get total {} now {} bytes at off {}, dst offset {}",
50 default_comm().rank(), __LINE__, datalen, s, off, offset + off);
51 win_.get((uint8_t *)dst + off, s, segment_index, offset + off);
58 void putmem(
void const *src, std::size_t offset, std::size_t datalen,
60 DRLOG(
"calling MPI put(segm_offset:{}, "
61 "src:{}, size:{}, peer:{})",
62 offset, src, datalen, segment_index);
64#if (MPI_VERSION >= 4) || \
65 (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000))
68 for (std::size_t remainder = datalen, off = 0UL; remainder > 0;) {
69 std::size_t s = std::min(remainder, (std::size_t)INT_MAX);
70 DRLOG(
"{}:{} win_.put {} bytes at off {}, dst offset {}",
71 default_comm().rank(), __LINE__, s, off, offset + off);
72 win_.put((uint8_t *)src + off, s, segment_index, offset + off);
78 win_.put(src, datalen, segment_index, offset);
81 for (std::size_t remainder = datalen, off = 0UL; remainder > 0;) {
82 std::size_t s = std::min(remainder, (std::size_t)INT_MAX);
83 DRLOG(
"{}:{} win_.put {} bytes at off {}, dst offset {}",
84 default_comm().rank(), __LINE__, s, off, offset + off);
85 win_.put((uint8_t *)src + off, s, segment_index, offset + off);
92 std::size_t getrank()
const {
return win_.communicator().rank(); }
94 void fence()
const { win_.fence(); }
102 void *allocate(std::size_t data_size) {
103 assert(data_size > 0);
104 shared_mem_ = ishmem_malloc(data_size);
105 DRLOG(
"called ishmem_malloc({}) -> got:{}", data_size, shared_mem_);
109 void deallocate(
void *data, std::size_t data_size) {
110 assert(data_size > 0);
111 assert(data == shared_mem_);
112 drlog.debug(
"calling ishmem_free({})\n", data);
116 void getmem(
void *dst, std::size_t offset, std::size_t datalen,
118 void *src =
static_cast<std::byte *
>(shared_mem_) + offset;
120 DRLOG(
"calling ishmem_getmem(dst:{}, src:{} (= dv:{} + "
121 "segm_offset:{}), size:{}, peer:{})",
122 dst, src, shared_mem_, offset, datalen, segment_index);
124 ishmem_getmem(dst, src, datalen, segment_index);
127 void putmem(
void const *src, std::size_t offset, std::size_t datalen,
129 void *dst =
static_cast<std::byte *
>(shared_mem_) + offset;
130 DRLOG(
"calling ishmem_putmem(dst:{} (= dv:{} + segm_offset:{}), "
131 "src:{}, size:{}, peer:{})",
132 dst, shared_mem_, offset, src, datalen, segment_index);
133 ishmem_putmem(dst, src, datalen, segment_index);
136 std::size_t getrank()
const {
137 auto my_process_segment_index = ishmem_my_pe();
138 DRLOG(
"called ishmem_my_pe() -> {}", my_process_segment_index);
139 return my_process_segment_index;
153 using value_type = T;
154 using size_type = std::size_t;
155 using difference_type = std::ptrdiff_t;
156 using backend_type = BackendT;
160 using iterator_category = std::random_access_iterator_tag;
161 using value_type =
typename distributed_vector::value_type;
162 using difference_type =
typename distributed_vector::difference_type;
166 : parent_(parent), offset_(offset) {}
168 auto operator+(difference_type n)
const {
169 return iterator(parent_, offset_ + n);
171 friend auto operator+(difference_type n,
const iterator &other) {
174 auto operator-(difference_type n)
const {
175 return iterator(parent_, offset_ - n);
177 auto operator-(
iterator other)
const {
return offset_ - other.offset_; }
179 auto &operator+=(difference_type n) {
183 auto &operator-=(difference_type n) {
191 auto operator++(
int) {
200 auto operator--(
int) {
206 bool operator==(
iterator other)
const {
207 if (parent_ ==
nullptr || other.parent_ ==
nullptr) {
210 return offset_ == other.offset_;
213 auto operator<=>(
iterator other)
const {
214 assert(parent_ == other.parent_);
215 return offset_ <=> other.offset_;
218 auto operator*()
const {
219 auto segment_size = parent_->segment_size_;
221 ->segments()[offset_ / segment_size][offset_ % segment_size];
223 auto operator[](difference_type n)
const {
return *(*
this + n); }
226 auto segment_size = parent_->segment_size_;
227 return (parent_->segments()[offset_ / segment_size].begin() +
228 offset_ % segment_size)
239 return dr::__detail::drop_segments(parent_->segments(), offset_);
244 difference_type offset_;
262 mp::fill(*
this, fill_value);
268 if (data_ !=
nullptr) {
269 backend.deallocate(data_, data_size_ *
sizeof(value_type));
282 auto size()
const {
return size_; }
285 auto &halo()
const {
return *halo_; }
287 auto segments()
const {
return rng::views::all(segments_); }
289 void fence() { backend.fence(); }
291 auto segment_size()
const {
return segment_size_; }
293 auto get_segment_offset(std::size_t segment_id)
const {
294 return segment_id * segment_size_;
298 void init(
auto size,
auto dist) {
300 distribution_ = dist;
303 auto comm_size = default_comm().size();
304 auto hb = dist.halo();
305 std::size_t gran = dist.granularity();
307 assert(
size % gran == 0 &&
"size must be a multiple of the granularity");
308 assert(hb.prev % gran == 0 &&
"size must be a multiple of the granularity");
309 assert(hb.next % gran == 0 &&
"size must be a multiple of the granularity");
310 segment_size_ = gran * std::max({(
size / gran + comm_size - 1) / comm_size,
311 hb.prev / gran, hb.next / gran});
313 data_size_ = segment_size_ + hb.prev + hb.next;
316 data_ =
static_cast<T *
>(backend.allocate(data_size_ *
sizeof(T)));
319 halo_ =
new span_halo<T>(default_comm(), data_, data_size_, hb);
321 std::size_t segment_index = 0;
322 for (std::size_t i = 0; i <
size; i += segment_size_) {
323 segments_.emplace_back(
this, segment_index++,
324 std::min(segment_size_,
size - i), data_size_);
330 friend dv_segment_iterator<distributed_vector>;
332 std::size_t segment_size_ = 0;
333 std::size_t data_size_ = 0;
337 distribution distribution_;
339 std::vector<dv_segment<distributed_vector>> segments_;
343template <
typename T,
typename B>
344auto &halo(
const distributed_vector<T, B> &dv) {
Definition: distributed_vector.hpp:14
Definition: allocator.hpp:11
Definition: distributed_vector.hpp:158
distributed vector
Definition: distributed_vector.hpp:150
auto size() const
Returns size.
Definition: distributed_vector.hpp:282
auto operator[](difference_type n) const
Returns reference using index.
Definition: distributed_vector.hpp:284
distributed_vector(std::size_t size, value_type fill_value, distribution dist=distribution())
Constructor.
Definition: distributed_vector.hpp:259
auto end() const
Returns iterator to end.
Definition: distributed_vector.hpp:279
auto begin() const
Returns iterator to beginning.
Definition: distributed_vector.hpp:277
distributed_vector(std::size_t size=0, distribution dist=distribution())
Constructor.
Definition: distributed_vector.hpp:254
Definition: communicator.hpp:242
Definition: distribution.hpp:11