Distributed Ranges
Loading...
Searching...
No Matches
copy.hpp
1// SPDX-FileCopyrightText: Intel Corporation
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#pragma once
6
7#include <memory>
8#include <type_traits>
9
10#include <sycl/sycl.hpp>
11
12#include <dr/concepts/concepts.hpp>
13#include <dr/detail/segments_tools.hpp>
14#include <dr/sp/detail.hpp>
15#include <dr/sp/device_ptr.hpp>
16#include <dr/sp/util.hpp>
17
18namespace dr::sp {
19
20// Copy between contiguous ranges
21template <std::contiguous_iterator InputIt, std::contiguous_iterator OutputIt>
22 requires __detail::is_syclmemcopyable<std::iter_value_t<InputIt>,
23 std::iter_value_t<OutputIt>>
24sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) {
25 // auto &&q = dr::sp::__detail::default_queue();
26 auto &&q = __detail::get_queue_for_pointers(first, d_first);
27 return q.memcpy(std::to_address(d_first), std::to_address(first),
28 sizeof(std::iter_value_t<InputIt>) * (last - first));
29}
30
32template <std::contiguous_iterator InputIt, std::contiguous_iterator OutputIt>
33 requires __detail::is_syclmemcopyable<std::iter_value_t<InputIt>,
34 std::iter_value_t<OutputIt>>
35OutputIt copy(InputIt first, InputIt last, OutputIt d_first) {
36 copy_async(first, last, d_first).wait();
37 return d_first + (last - first);
38}
39
40// Copy from contiguous range to device
41template <std::contiguous_iterator Iter, typename T>
42 requires __detail::is_syclmemcopyable<std::iter_value_t<Iter>, T>
43sycl::event copy_async(Iter first, Iter last, device_ptr<T> d_first) {
44 // auto &&q = dr::sp::__detail::default_queue();
45 auto &&q = __detail::get_queue_for_pointers(first, d_first);
46 return q.memcpy(d_first.get_raw_pointer(), std::to_address(first),
47 sizeof(T) * (last - first));
48}
49
50template <std::contiguous_iterator Iter, typename T>
51 requires __detail::is_syclmemcopyable<std::iter_value_t<Iter>, T>
52device_ptr<T> copy(Iter first, Iter last, device_ptr<T> d_first) {
53 copy_async(first, last, d_first).wait();
54 return d_first + (last - first);
55}
56
57// Copy from device to contiguous range
58template <typename T, std::contiguous_iterator Iter>
59 requires __detail::is_syclmemcopyable<T, std::iter_value_t<Iter>>
60sycl::event copy_async(device_ptr<T> first, device_ptr<T> last, Iter d_first) {
61 // auto &&q = dr::sp::__detail::default_queue();
62 auto &&q = __detail::get_queue_for_pointers(first, d_first);
63 return q.memcpy(std::to_address(d_first), first.get_raw_pointer(),
64 sizeof(T) * (last - first));
65}
66
67template <typename T, std::contiguous_iterator Iter>
68 requires __detail::is_syclmemcopyable<T, std::iter_value_t<Iter>>
69Iter copy(device_ptr<T> first, device_ptr<T> last, Iter d_first) {
70 copy_async(first, last, d_first).wait();
71 return d_first + (last - first);
72}
73
74// Copy from device to device
75template <typename T>
76 requires(!std::is_const_v<T> && std::is_trivially_copyable_v<T>)
77sycl::event copy_async(device_ptr<std::add_const_t<T>> first,
78 device_ptr<std::add_const_t<T>> last,
79 device_ptr<T> d_first) {
80 // auto &&q = dr::sp::__detail::default_queue();
81 auto &&q = __detail::get_queue_for_pointers(first, d_first);
82 return q.memcpy(d_first.get_raw_pointer(), first.get_raw_pointer(),
83 sizeof(T) * (last - first));
84}
85
86template <typename T>
87 requires(!std::is_const_v<T> && std::is_trivially_copyable_v<T>)
88sycl::event copy_async(sycl::queue &q, device_ptr<std::add_const_t<T>> first,
89 device_ptr<std::add_const_t<T>> last,
90 device_ptr<T> d_first) {
91 return q.memcpy(d_first.get_raw_pointer(), first.get_raw_pointer(),
92 sizeof(T) * (last - first));
93}
94
95template <typename T>
96 requires(!std::is_const_v<T> && std::is_trivially_copyable_v<T>)
97device_ptr<T> copy(device_ptr<std::add_const_t<T>> first,
98 device_ptr<std::add_const_t<T>> last,
99 device_ptr<T> d_first) {
100 copy_async(first, last, d_first).wait();
101 return d_first + (last - first);
102}
103
104// Copy from local range to distributed range
105template <std::forward_iterator InputIt, dr::distributed_iterator OutputIt>
106 requires __detail::is_syclmemcopyable<std::iter_value_t<InputIt>,
107 std::iter_value_t<OutputIt>>
108sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) {
109 auto &&segments = dr::ranges::segments(d_first);
110 auto segment_iter = rng::begin(segments);
111
112 std::vector<sycl::event> events;
113
114 while (first != last) {
115 auto &&segment = *segment_iter;
116 auto size = rng::distance(segment);
117
118 std::size_t n_to_copy = std::min<size_t>(size, rng::distance(first, last));
119
120 auto local_last = first;
121 rng::advance(local_last, n_to_copy);
122
123 events.emplace_back(
124 dr::sp::copy_async(first, local_last, rng::begin(segment)));
125
126 ++segment_iter;
127 rng::advance(first, n_to_copy);
128 }
129
130 return dr::sp::__detail::combine_events(events);
131}
132
133auto copy(rng::contiguous_range auto r, dr::distributed_iterator auto d_first) {
134 return copy(rng::begin(r), rng::end(r), d_first);
135}
136
137auto copy(dr::distributed_range auto r, std::contiguous_iterator auto d_first) {
138 return copy(rng::begin(r), rng::end(r), d_first);
139}
140
141template <std::forward_iterator InputIt, dr::distributed_iterator OutputIt>
142 requires __detail::is_syclmemcopyable<std::iter_value_t<InputIt>,
143 std::iter_value_t<OutputIt>>
144OutputIt copy(InputIt first, InputIt last, OutputIt d_first) {
145 copy_async(first, last, d_first).wait();
146 return d_first + (last - first);
147}
148
149// Copy from distributed range to local range
150template <dr::distributed_iterator InputIt, std::forward_iterator OutputIt>
151 requires __detail::is_syclmemcopyable<std::iter_value_t<InputIt>,
152 std::iter_value_t<OutputIt>>
153sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) {
154 auto dist = rng::distance(first, last);
155 auto segments =
156 dr::__detail::take_segments(dr::ranges::segments(first), dist);
157
158 std::vector<sycl::event> events;
159
160 for (auto &&segment : segments) {
161 auto size = rng::distance(segment);
162
163 events.emplace_back(
164 dr::sp::copy_async(rng::begin(segment), rng::end(segment), d_first));
165
166 rng::advance(d_first, size);
167 }
168
169 return dr::sp::__detail::combine_events(events);
170}
171
172template <dr::distributed_iterator InputIt, std::forward_iterator OutputIt>
173 requires __detail::is_syclmemcopyable<std::iter_value_t<InputIt>,
174 std::iter_value_t<OutputIt>>
175OutputIt copy(InputIt first, InputIt last, OutputIt d_first) {
176 copy_async(first, last, d_first).wait();
177 return d_first + (last - first);
178}
179
180// Copy from distributed range to distributed range
181template <dr::distributed_iterator InputIt, dr::distributed_iterator OutputIt>
182 requires __detail::is_syclmemcopyable<std::iter_value_t<InputIt>,
183 std::iter_value_t<OutputIt>>
184sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) {
185 auto dist = rng::distance(first, last);
186 auto segments =
187 dr::__detail::take_segments(dr::ranges::segments(first), dist);
188
189 std::vector<sycl::event> events;
190
191 for (auto &&segment : segments) {
192 auto size = rng::distance(segment);
193
194 events.emplace_back(
195 dr::sp::copy_async(rng::begin(segment), rng::end(segment), d_first));
196
197 rng::advance(d_first, size);
198 }
199
200 return dr::sp::__detail::combine_events(events);
201}
202
203template <dr::distributed_iterator InputIt, dr::distributed_iterator OutputIt>
204 requires __detail::is_syclmemcopyable<std::iter_value_t<InputIt>,
205 std::iter_value_t<OutputIt>>
206OutputIt copy(InputIt first, InputIt last, OutputIt d_first) {
207 copy_async(first, last, d_first).wait();
208 return d_first + (last - first);
209}
210
211// Ranges versions
212
213// Distributed to distributed
214template <dr::distributed_range R, dr::distributed_iterator O>
215 requires __detail::is_syclmemcopyable<rng::range_value_t<R>,
216 std::iter_value_t<O>>
217sycl::event copy_async(R &&r, O result) {
218 return copy_async(rng::begin(r), rng::end(r), result);
219}
220
221template <dr::distributed_range R, dr::distributed_iterator O>
222 requires __detail::is_syclmemcopyable<rng::range_value_t<R>,
223 std::iter_value_t<O>>
224O copy(R &&r, O result) {
225 return copy(rng::begin(r), rng::end(r), result);
226}
227
228} // namespace dr::sp
Definition: concepts.hpp:31
Definition: concepts.hpp:20