7namespace dr::mp::__detail {
9inline auto std_reduce(rng::forward_range
auto &&r,
auto &&binary_op) {
10 using value_type = rng::range_value_t<
decltype(r)>;
14 auto skip1 = rng::begin(r);
17 return std::reduce(std::execution::par_unseq, skip1, rng::end(r),
18 value_type(*rng::begin(r)), binary_op);
22inline auto dpl_reduce(rng::forward_range
auto &&r,
auto &&binary_op) {
23 rng::range_value_t<
decltype(r)> none{};
24#ifdef SYCL_LANGUAGE_VERSION
28 using T = rng::range_value_t<
decltype(r)>;
29 using Fn =
decltype(binary_op);
30 if constexpr (sycl::has_known_identity_v<Fn, T>) {
31 dr::drlog.debug(
" known identity\n");
32 return std::reduce(dpl_policy(),
35 sycl::known_identity_v<Fn, T>, binary_op);
37 dr::drlog.debug(
" peel 1st value\n");
38 return std::reduce(dpl_policy(),
41 sycl_get_deref(rng::begin(r)), binary_op);
58template <dr::distributed_range DR>
59auto reduce(std::size_t root,
bool root_provided, DR &&dr,
auto &&binary_op) {
60 using value_type = rng::range_value_t<DR>;
61 auto comm = default_comm();
64 return rng::range_value_t<DR>{};
68 dr::drlog.debug(
"Parallel reduce\n");
71 auto reduce = [=](
auto &&r) {
72 assert(rng::size(r) > 0);
74 dr::drlog.debug(
" with DPL\n");
75 return dpl_reduce(r, binary_op);
77 dr::drlog.debug(
" with CPU\n");
78 return std_reduce(r, binary_op);
81 auto locals = rng::views::transform(local_segments(dr), reduce);
82 auto local = std_reduce(locals, binary_op);
84 std::vector<value_type> all(comm.size());
87 comm.gather(local, std::span{all}, root);
88 if (root == comm.rank()) {
89 return std_reduce(all, binary_op);
95 comm.all_gather(local, all);
96 return std_reduce(all, binary_op);
99 dr::drlog.debug(
"Serial reduce\n");
101 if (!root_provided || root == comm.rank()) {
102 result = std_reduce(dr, binary_op);
110template <
typename T, dr::distributed_range DR>
111T reduce(std::size_t root,
bool root_provided, DR &&dr, T init,
112 auto &&binary_op = std::plus<>{}) {
114 if (rng::empty(dr)) {
117 return binary_op(init, reduce(root, root_provided, dr, binary_op));
121#if defined(__GNUC__) && !defined(__clang__)
122 __attribute__((optimize(0)))
124 no_optimize(
auto x) {
138template <
typename T, dr::distributed_range DR>
139auto reduce(std::size_t root, DR &&dr, T init,
auto &&binary_op) {
140 return __detail::reduce(root,
true, std::forward<DR>(dr), init, binary_op);
143template <
typename T, dr::distributed_range DR>
144auto reduce(DR &&dr, T init,
auto &&binary_op) {
145 return __detail::reduce(0,
false, std::forward<DR>(dr), init, binary_op);
151template <
typename T, dr::distributed_range DR>
152auto reduce(std::size_t root, DR &&dr, T init) {
153 return __detail::reduce(root,
true, std::forward<DR>(dr), init,
157template <
typename T, dr::distributed_range DR>
auto reduce(DR &&dr, T init) {
158 return __detail::reduce(0,
false, std::forward<DR>(dr), init, std::plus<>{});
164template <dr::distributed_range DR>
auto reduce(std::size_t root, DR &&dr) {
165 return __detail::reduce(root,
true, std::forward<DR>(dr), std::plus<>{});
169template <dr::distributed_range DR>
auto reduce(DR &&dr) {
170 auto x = __detail::reduce(0,
false, std::forward<DR>(dr), std::plus<>{});
178 __detail::no_optimize(x);
190template <
typename T, dr::distributed_iterator DI>
191auto reduce(std::size_t root, DI first, DI last, T init,
auto &&binary_op) {
192 return __detail::reduce(root,
true, rng::subrange(first, last), init,
196template <
typename T, dr::distributed_iterator DI>
197auto reduce(DI first, DI last, T init,
auto &&binary_op) {
198 return __detail::reduce(0,
false, rng::subrange(first, last), init,
205template <
typename T, dr::distributed_iterator DI>
206auto reduce(std::size_t root, DI first, DI last, T init) {
207 return __detail::reduce(root,
true, rng::subrange(first, last), init,
211template <
typename T, dr::distributed_iterator DI>
212auto reduce(DI first, DI last, T init) {
213 return __detail::reduce(0,
false, rng::subrange(first, last), init,
220template <dr::distributed_iterator DI>
221auto reduce(std::size_t root, DI first, DI last) {
222 return __detail::reduce(root,
true, rng::subrange(first, last),
226template <dr::distributed_iterator DI>
auto reduce(DI first, DI last) {
227 return __detail::reduce(0,
false, rng::subrange(first, last), std::plus<>{});
Definition: onedpl_direct_iterator.hpp:15