Ginkgo Generated from branch based on master. Ginkgo version 1.7.0
A numerical linear algebra library targeting many-core architectures
Loading...
Searching...
No Matches
mpi.hpp
1/*******************************<GINKGO LICENSE>******************************
2Copyright (c) 2017-2023, the Ginkgo authors
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions
7are met:
8
91. Redistributions of source code must retain the above copyright
10notice, this list of conditions and the following disclaimer.
11
122. Redistributions in binary form must reproduce the above copyright
13notice, this list of conditions and the following disclaimer in the
14documentation and/or other materials provided with the distribution.
15
163. Neither the name of the copyright holder nor the names of its
17contributors may be used to endorse or promote products derived from
18this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31******************************<GINKGO LICENSE>*******************************/
32
33#ifndef GKO_PUBLIC_CORE_BASE_MPI_HPP_
34#define GKO_PUBLIC_CORE_BASE_MPI_HPP_
35
36
37#include <memory>
38#include <type_traits>
39#include <utility>
40
41
42#include <ginkgo/config.hpp>
43#include <ginkgo/core/base/exception.hpp>
44#include <ginkgo/core/base/exception_helpers.hpp>
45#include <ginkgo/core/base/executor.hpp>
46#include <ginkgo/core/base/types.hpp>
47#include <ginkgo/core/base/utils_helper.hpp>
48
49
50#if GINKGO_BUILD_MPI
51
52
53#include <mpi.h>
54
55
56namespace gko {
57namespace experimental {
64namespace mpi {
65
66
70inline constexpr bool is_gpu_aware()
71{
72#if GINKGO_HAVE_GPU_AWARE_MPI
73 return true;
74#else
75 return false;
76#endif
77}
78
79
88
89
90#define GKO_REGISTER_MPI_TYPE(input_type, mpi_type) \
91 template <> \
92 struct type_impl<input_type> { \
93 static MPI_Datatype get_type() { return mpi_type; } \
94 }
95
104template <typename T>
105struct type_impl {};
106
107
108GKO_REGISTER_MPI_TYPE(char, MPI_CHAR);
109GKO_REGISTER_MPI_TYPE(unsigned char, MPI_UNSIGNED_CHAR);
110GKO_REGISTER_MPI_TYPE(unsigned, MPI_UNSIGNED);
111GKO_REGISTER_MPI_TYPE(int, MPI_INT);
112GKO_REGISTER_MPI_TYPE(unsigned short, MPI_UNSIGNED_SHORT);
113GKO_REGISTER_MPI_TYPE(unsigned long, MPI_UNSIGNED_LONG);
114GKO_REGISTER_MPI_TYPE(long, MPI_LONG);
115GKO_REGISTER_MPI_TYPE(long long, MPI_LONG_LONG_INT);
116GKO_REGISTER_MPI_TYPE(unsigned long long, MPI_UNSIGNED_LONG_LONG);
117GKO_REGISTER_MPI_TYPE(float, MPI_FLOAT);
118GKO_REGISTER_MPI_TYPE(double, MPI_DOUBLE);
119GKO_REGISTER_MPI_TYPE(long double, MPI_LONG_DOUBLE);
120GKO_REGISTER_MPI_TYPE(std::complex<float>, MPI_C_FLOAT_COMPLEX);
121GKO_REGISTER_MPI_TYPE(std::complex<double>, MPI_C_DOUBLE_COMPLEX);
122
123
131public:
139 {
140 GKO_ASSERT_NO_MPI_ERRORS(MPI_Type_contiguous(count, old_type, &type_));
141 GKO_ASSERT_NO_MPI_ERRORS(MPI_Type_commit(&type_));
142 }
143
148
153
158
165 {
166 *this = std::move(other);
167 }
168
177 {
178 if (this != &other) {
179 this->type_ = std::exchange(other.type_, MPI_DATATYPE_NULL);
180 }
181 return *this;
182 }
183
188 {
189 if (type_ != MPI_DATATYPE_NULL) {
190 MPI_Type_free(&type_);
191 }
192 }
193
199 MPI_Datatype get() const { return type_; }
200
201private:
202 MPI_Datatype type_;
203};
204
205
210enum class thread_type {
211 serialized = MPI_THREAD_SERIALIZED,
212 funneled = MPI_THREAD_FUNNELED,
213 single = MPI_THREAD_SINGLE,
214 multiple = MPI_THREAD_MULTIPLE
215};
216
217
228public:
229 static bool is_finalized()
230 {
231 int flag = 0;
232 GKO_ASSERT_NO_MPI_ERRORS(MPI_Finalized(&flag));
233 return flag;
234 }
235
236 static bool is_initialized()
237 {
238 int flag = 0;
239 GKO_ASSERT_NO_MPI_ERRORS(MPI_Initialized(&flag));
240 return flag;
241 }
242
248 int get_provided_thread_support() const { return provided_thread_support_; }
249
258 environment(int& argc, char**& argv,
259 const thread_type thread_t = thread_type::serialized)
260 {
261 this->required_thread_support_ = static_cast<int>(thread_t);
262 GKO_ASSERT_NO_MPI_ERRORS(
263 MPI_Init_thread(&argc, &argv, this->required_thread_support_,
264 &(this->provided_thread_support_)));
265 }
266
271
272 environment(const environment&) = delete;
273 environment(environment&&) = delete;
274 environment& operator=(const environment&) = delete;
275 environment& operator=(environment&&) = delete;
276
277private:
278 int required_thread_support_;
279 int provided_thread_support_;
280};
281
282
283namespace {
284
285
290class comm_deleter {
291public:
292 using pointer = MPI_Comm*;
293 void operator()(pointer comm) const
294 {
295 GKO_ASSERT(*comm != MPI_COMM_NULL);
296 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_free(comm));
297 delete comm;
298 }
299};
300
301
302} // namespace
303
304
308struct status {
312 status() : status_(MPI_Status{}) {}
313
319 MPI_Status* get() { return &this->status_; }
320
331 template <typename T>
332 int get_count(const T* data) const
333 {
334 int count;
335 MPI_Get_count(&status_, type_impl<T>::get_type(), &count);
336 return count;
337 }
338
339private:
340 MPI_Status status_;
341};
342
343
348class request {
349public:
355
356 request(const request&) = delete;
357
358 request& operator=(const request&) = delete;
359
360 request(request&& o) noexcept { *this = std::move(o); }
361
362 request& operator=(request&& o) noexcept
363 {
364 if (this != &o) {
365 this->req_ = std::exchange(o.req_, MPI_REQUEST_NULL);
366 }
367 return *this;
368 }
369
370 ~request()
371 {
372 if (req_ != MPI_REQUEST_NULL) {
373 if (MPI_Request_free(&req_) != MPI_SUCCESS) {
374 std::terminate(); // since we can't throw in destructors, we
375 // have to terminate the program
376 }
377 }
378 }
379
385 MPI_Request* get() { return &this->req_; }
386
394 {
396 GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(&req_, status.get()));
397 return status;
398 }
399
400
401private:
402 MPI_Request req_;
403};
404
405
413inline std::vector<status> wait_all(std::vector<request>& req)
414{
415 std::vector<status> stat;
416 for (std::size_t i = 0; i < req.size(); ++i) {
417 stat.emplace_back(req[i].wait());
418 }
419 return stat;
420}
421
422
438public:
449 communicator(const MPI_Comm& comm, bool force_host_buffer = false)
450 : comm_(), force_host_buffer_(force_host_buffer)
451 {
452 this->comm_.reset(new MPI_Comm(comm));
453 }
454
464 {
466 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split(comm, color, key, &comm_out));
467 this->comm_.reset(new MPI_Comm(comm_out), comm_deleter{});
468 }
469
479 {
481 GKO_ASSERT_NO_MPI_ERRORS(
483 this->comm_.reset(new MPI_Comm(comm_out), comm_deleter{});
484 }
485
491 const MPI_Comm& get() const { return *(this->comm_.get()); }
492
493 bool force_host_buffer() const { return force_host_buffer_; }
494
500 int size() const { return get_num_ranks(); }
501
507 int rank() const { return get_my_rank(); };
508
514 int node_local_rank() const { return get_node_local_rank(); };
515
521 bool operator==(const communicator& rhs) const
522 {
523 return compare(rhs.get());
524 }
525
531 bool operator!=(const communicator& rhs) const { return !(*this == rhs); }
532
537 void synchronize() const
538 {
539 GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(this->get()));
540 }
541
555 template <typename SendType>
556 void send(std::shared_ptr<const Executor> exec, const SendType* send_buffer,
557 const int send_count, const int destination_rank,
558 const int send_tag) const
559 {
560 auto guard = exec->get_scoped_device_id_guard();
561 GKO_ASSERT_NO_MPI_ERRORS(
563 destination_rank, send_tag, this->get()));
564 }
565
582 template <typename SendType>
583 request i_send(std::shared_ptr<const Executor> exec,
584 const SendType* send_buffer, const int send_count,
585 const int destination_rank, const int send_tag) const
586 {
587 auto guard = exec->get_scoped_device_id_guard();
588 request req;
589 GKO_ASSERT_NO_MPI_ERRORS(
591 destination_rank, send_tag, this->get(), req.get()));
592 return req;
593 }
594
610 template <typename RecvType>
611 status recv(std::shared_ptr<const Executor> exec, RecvType* recv_buffer,
612 const int recv_count, const int source_rank,
613 const int recv_tag) const
614 {
615 auto guard = exec->get_scoped_device_id_guard();
616 status st;
617 GKO_ASSERT_NO_MPI_ERRORS(
619 source_rank, recv_tag, this->get(), st.get()));
620 return st;
621 }
622
638 template <typename RecvType>
639 request i_recv(std::shared_ptr<const Executor> exec, RecvType* recv_buffer,
640 const int recv_count, const int source_rank,
641 const int recv_tag) const
642 {
643 auto guard = exec->get_scoped_device_id_guard();
644 request req;
645 GKO_ASSERT_NO_MPI_ERRORS(
647 source_rank, recv_tag, this->get(), req.get()));
648 return req;
649 }
650
663 template <typename BroadcastType>
664 void broadcast(std::shared_ptr<const Executor> exec, BroadcastType* buffer,
665 int count, int root_rank) const
666 {
667 auto guard = exec->get_scoped_device_id_guard();
668 GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast(buffer, count,
670 root_rank, this->get()));
671 }
672
688 template <typename BroadcastType>
689 request i_broadcast(std::shared_ptr<const Executor> exec,
690 BroadcastType* buffer, int count, int root_rank) const
691 {
692 auto guard = exec->get_scoped_device_id_guard();
693 request req;
694 GKO_ASSERT_NO_MPI_ERRORS(
696 root_rank, this->get(), req.get()));
697 return req;
698 }
699
714 template <typename ReduceType>
715 void reduce(std::shared_ptr<const Executor> exec,
717 int count, MPI_Op operation, int root_rank) const
718 {
719 auto guard = exec->get_scoped_device_id_guard();
720 GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count,
722 operation, root_rank, this->get()));
723 }
724
741 template <typename ReduceType>
742 request i_reduce(std::shared_ptr<const Executor> exec,
744 int count, MPI_Op operation, int root_rank) const
745 {
746 auto guard = exec->get_scoped_device_id_guard();
747 request req;
748 GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce(
750 operation, root_rank, this->get(), req.get()));
751 return req;
752 }
753
767 template <typename ReduceType>
768 void all_reduce(std::shared_ptr<const Executor> exec,
769 ReduceType* recv_buffer, int count, MPI_Op operation) const
770 {
771 auto guard = exec->get_scoped_device_id_guard();
772 GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(
774 operation, this->get()));
775 }
776
792 template <typename ReduceType>
793 request i_all_reduce(std::shared_ptr<const Executor> exec,
794 ReduceType* recv_buffer, int count,
795 MPI_Op operation) const
796 {
797 auto guard = exec->get_scoped_device_id_guard();
798 request req;
799 GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(
801 operation, this->get(), req.get()));
802 return req;
803 }
804
819 template <typename ReduceType>
820 void all_reduce(std::shared_ptr<const Executor> exec,
822 int count, MPI_Op operation) const
823 {
824 auto guard = exec->get_scoped_device_id_guard();
825 GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(
827 operation, this->get()));
828 }
829
846 template <typename ReduceType>
847 request i_all_reduce(std::shared_ptr<const Executor> exec,
849 int count, MPI_Op operation) const
850 {
851 auto guard = exec->get_scoped_device_id_guard();
852 request req;
853 GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(
855 operation, this->get(), req.get()));
856 return req;
857 }
858
875 template <typename SendType, typename RecvType>
876 void gather(std::shared_ptr<const Executor> exec,
877 const SendType* send_buffer, const int send_count,
878 RecvType* recv_buffer, const int recv_count,
879 int root_rank) const
880 {
881 auto guard = exec->get_scoped_device_id_guard();
882 GKO_ASSERT_NO_MPI_ERRORS(
885 root_rank, this->get()));
886 }
887
907 template <typename SendType, typename RecvType>
908 request i_gather(std::shared_ptr<const Executor> exec,
909 const SendType* send_buffer, const int send_count,
910 RecvType* recv_buffer, const int recv_count,
911 int root_rank) const
912 {
913 auto guard = exec->get_scoped_device_id_guard();
914 request req;
915 GKO_ASSERT_NO_MPI_ERRORS(MPI_Igather(
918 this->get(), req.get()));
919 return req;
920 }
921
940 template <typename SendType, typename RecvType>
941 void gather_v(std::shared_ptr<const Executor> exec,
942 const SendType* send_buffer, const int send_count,
943 RecvType* recv_buffer, const int* recv_counts,
944 const int* displacements, int root_rank) const
945 {
946 auto guard = exec->get_scoped_device_id_guard();
947 GKO_ASSERT_NO_MPI_ERRORS(MPI_Gatherv(
951 }
952
973 template <typename SendType, typename RecvType>
974 request i_gather_v(std::shared_ptr<const Executor> exec,
975 const SendType* send_buffer, const int send_count,
976 RecvType* recv_buffer, const int* recv_counts,
977 const int* displacements, int root_rank) const
978 {
979 auto guard = exec->get_scoped_device_id_guard();
980 request req;
981 GKO_ASSERT_NO_MPI_ERRORS(MPI_Igatherv(
985 req.get()));
986 return req;
987 }
988
1004 template <typename SendType, typename RecvType>
1005 void all_gather(std::shared_ptr<const Executor> exec,
1006 const SendType* send_buffer, const int send_count,
1007 RecvType* recv_buffer, const int recv_count) const
1008 {
1009 auto guard = exec->get_scoped_device_id_guard();
1010 GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather(
1013 this->get()));
1014 }
1015
1034 template <typename SendType, typename RecvType>
1035 request i_all_gather(std::shared_ptr<const Executor> exec,
1036 const SendType* send_buffer, const int send_count,
1037 RecvType* recv_buffer, const int recv_count) const
1038 {
1039 auto guard = exec->get_scoped_device_id_guard();
1040 request req;
1041 GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallgather(
1044 this->get(), req.get()));
1045 return req;
1046 }
1047
1063 template <typename SendType, typename RecvType>
1064 void scatter(std::shared_ptr<const Executor> exec,
1065 const SendType* send_buffer, const int send_count,
1066 RecvType* recv_buffer, const int recv_count,
1067 int root_rank) const
1068 {
1069 auto guard = exec->get_scoped_device_id_guard();
1070 GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter(
1073 this->get()));
1074 }
1075
1094 template <typename SendType, typename RecvType>
1095 request i_scatter(std::shared_ptr<const Executor> exec,
1096 const SendType* send_buffer, const int send_count,
1097 RecvType* recv_buffer, const int recv_count,
1098 int root_rank) const
1099 {
1100 auto guard = exec->get_scoped_device_id_guard();
1101 request req;
1102 GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscatter(
1105 this->get(), req.get()));
1106 return req;
1107 }
1108
1127 template <typename SendType, typename RecvType>
1128 void scatter_v(std::shared_ptr<const Executor> exec,
1129 const SendType* send_buffer, const int* send_counts,
1130 const int* displacements, RecvType* recv_buffer,
1131 const int recv_count, int root_rank) const
1132 {
1133 auto guard = exec->get_scoped_device_id_guard();
1134 GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatterv(
1138 }
1139
1160 template <typename SendType, typename RecvType>
1161 request i_scatter_v(std::shared_ptr<const Executor> exec,
1162 const SendType* send_buffer, const int* send_counts,
1163 const int* displacements, RecvType* recv_buffer,
1164 const int recv_count, int root_rank) const
1165 {
1166 auto guard = exec->get_scoped_device_id_guard();
1167 request req;
1168 GKO_ASSERT_NO_MPI_ERRORS(
1172 root_rank, this->get(), req.get()));
1173 return req;
1174 }
1175
1192 template <typename RecvType>
1193 void all_to_all(std::shared_ptr<const Executor> exec, RecvType* recv_buffer,
1194 const int recv_count) const
1195 {
1196 auto guard = exec->get_scoped_device_id_guard();
1197 GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall(
1200 this->get()));
1201 }
1202
1221 template <typename RecvType>
1222 request i_all_to_all(std::shared_ptr<const Executor> exec,
1223 RecvType* recv_buffer, const int recv_count) const
1224 {
1225 auto guard = exec->get_scoped_device_id_guard();
1226 request req;
1227 GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall(
1230 this->get(), req.get()));
1231 return req;
1232 }
1233
1250 template <typename SendType, typename RecvType>
1251 void all_to_all(std::shared_ptr<const Executor> exec,
1252 const SendType* send_buffer, const int send_count,
1253 RecvType* recv_buffer, const int recv_count) const
1254 {
1255 auto guard = exec->get_scoped_device_id_guard();
1256 GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall(
1259 this->get()));
1260 }
1261
1280 template <typename SendType, typename RecvType>
1281 request i_all_to_all(std::shared_ptr<const Executor> exec,
1282 const SendType* send_buffer, const int send_count,
1283 RecvType* recv_buffer, const int recv_count) const
1284 {
1285 auto guard = exec->get_scoped_device_id_guard();
1286 request req;
1287 GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall(
1290 this->get(), req.get()));
1291 return req;
1292 }
1293
1313 template <typename SendType, typename RecvType>
1314 void all_to_all_v(std::shared_ptr<const Executor> exec,
1315 const SendType* send_buffer, const int* send_counts,
1316 const int* send_offsets, RecvType* recv_buffer,
1317 const int* recv_counts, const int* recv_offsets) const
1318 {
1319 this->all_to_all_v(std::move(exec), send_buffer, send_counts,
1323 }
1324
1340 void all_to_all_v(std::shared_ptr<const Executor> exec,
1341 const void* send_buffer, const int* send_counts,
1343 void* recv_buffer, const int* recv_counts,
1344 const int* recv_offsets, MPI_Datatype recv_type) const
1345 {
1346 auto guard = exec->get_scoped_device_id_guard();
1347 GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoallv(
1350 }
1351
1371 request i_all_to_all_v(std::shared_ptr<const Executor> exec,
1372 const void* send_buffer, const int* send_counts,
1374 void* recv_buffer, const int* recv_counts,
1375 const int* recv_offsets,
1376 MPI_Datatype recv_type) const
1377 {
1378 auto guard = exec->get_scoped_device_id_guard();
1379 request req;
1380 GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv(
1382 recv_counts, recv_offsets, recv_type, this->get(), req.get()));
1383 return req;
1384 }
1385
1406 template <typename SendType, typename RecvType>
1407 request i_all_to_all_v(std::shared_ptr<const Executor> exec,
1408 const SendType* send_buffer, const int* send_counts,
1409 const int* send_offsets, RecvType* recv_buffer,
1410 const int* recv_counts,
1411 const int* recv_offsets) const
1412 {
1413 return this->i_all_to_all_v(
1414 std::move(exec), send_buffer, send_counts, send_offsets,
1417 }
1418
1433 template <typename ScanType>
1434 void scan(std::shared_ptr<const Executor> exec, const ScanType* send_buffer,
1435 ScanType* recv_buffer, int count, MPI_Op operation) const
1436 {
1437 auto guard = exec->get_scoped_device_id_guard();
1438 GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan(send_buffer, recv_buffer, count,
1440 operation, this->get()));
1441 }
1442
1459 template <typename ScanType>
1460 request i_scan(std::shared_ptr<const Executor> exec,
1462 int count, MPI_Op operation) const
1463 {
1464 auto guard = exec->get_scoped_device_id_guard();
1465 request req;
1466 GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count,
1468 operation, this->get(), req.get()));
1469 return req;
1470 }
1471
1472private:
1473 std::shared_ptr<MPI_Comm> comm_;
1474 bool force_host_buffer_;
1475
1476 int get_my_rank() const
1477 {
1478 int my_rank = 0;
1479 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(get(), &my_rank));
1480 return my_rank;
1481 }
1482
1483 int get_node_local_rank() const
1484 {
1486 int rank;
1487 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split_type(
1488 this->get(), MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local_comm));
1489 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(local_comm, &rank));
1491 return rank;
1492 }
1493
1494 int get_num_ranks() const
1495 {
1496 int size = 1;
1497 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_size(this->get(), &size));
1498 return size;
1499 }
1500
1501 bool compare(const MPI_Comm& other) const
1502 {
1503 int flag;
1504 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_compare(get(), other, &flag));
1505 return flag == MPI_IDENT;
1506 }
1507};
1508
1509
1514bool requires_host_buffer(const std::shared_ptr<const Executor>& exec,
1515 const communicator& comm);
1516
1517
1523inline double get_walltime() { return MPI_Wtime(); }
1524
1525
1534template <typename ValueType>
1535class window {
1536public:
1540 enum class create_type { allocate = 1, create = 2, dynamic_create = 3 };
1541
1545 enum class lock_type { shared = 1, exclusive = 2 };
1546
1550 window() : window_(MPI_WIN_NULL) {}
1551
1552 window(const window& other) = delete;
1553
1554 window& operator=(const window& other) = delete;
1555
1562 window(window&& other) : window_{std::exchange(other.window_, MPI_WIN_NULL)}
1563 {}
1564
1572 {
1573 window_ = std::exchange(other.window_, MPI_WIN_NULL);
1574 }
1575
1588 window(std::shared_ptr<const Executor> exec, ValueType* base, int num_elems,
1589 const communicator& comm, const int disp_unit = sizeof(ValueType),
1591 create_type c_type = create_type::create)
1592 {
1593 auto guard = exec->get_scoped_device_id_guard();
1594 unsigned size = num_elems * sizeof(ValueType);
1595 if (c_type == create_type::create) {
1596 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_create(
1597 base, size, disp_unit, input_info, comm.get(), &this->window_));
1598 } else if (c_type == create_type::dynamic_create) {
1599 GKO_ASSERT_NO_MPI_ERRORS(
1600 MPI_Win_create_dynamic(input_info, comm.get(), &this->window_));
1601 } else if (c_type == create_type::allocate) {
1602 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_allocate(
1603 size, disp_unit, input_info, comm.get(), base, &this->window_));
1604 } else {
1605 GKO_NOT_IMPLEMENTED;
1606 }
1607 }
1608
1614 MPI_Win get_window() const { return this->window_; }
1615
1622 void fence(int assert = 0) const
1623 {
1624 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_fence(assert, this->window_));
1625 }
1626
1635 void lock(int rank, lock_type lock_t = lock_type::shared,
1636 int assert = 0) const
1637 {
1638 if (lock_t == lock_type::shared) {
1639 GKO_ASSERT_NO_MPI_ERRORS(
1640 MPI_Win_lock(MPI_LOCK_SHARED, rank, assert, this->window_));
1641 } else if (lock_t == lock_type::exclusive) {
1642 GKO_ASSERT_NO_MPI_ERRORS(
1643 MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, assert, this->window_));
1644 } else {
1645 GKO_NOT_IMPLEMENTED;
1646 }
1647 }
1648
1655 void unlock(int rank) const
1656 {
1657 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock(rank, this->window_));
1658 }
1659
1666 void lock_all(int assert = 0) const
1667 {
1668 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_lock_all(assert, this->window_));
1669 }
1670
1675 void unlock_all() const
1676 {
1677 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock_all(this->window_));
1678 }
1679
1686 void flush(int rank) const
1687 {
1688 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush(rank, this->window_));
1689 }
1690
1697 void flush_local(int rank) const
1698 {
1699 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local(rank, this->window_));
1700 }
1701
1706 void flush_all() const
1707 {
1708 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_all(this->window_));
1709 }
1710
1715 void flush_all_local() const
1716 {
1717 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local_all(this->window_));
1718 }
1719
1723 void sync() const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_sync(this->window_)); }
1724
1729 {
1730 if (this->window_ && this->window_ != MPI_WIN_NULL) {
1731 MPI_Win_free(&this->window_);
1732 }
1733 }
1734
1745 template <typename PutType>
1746 void put(std::shared_ptr<const Executor> exec, const PutType* origin_buffer,
1747 const int origin_count, const int target_rank,
1748 const unsigned int target_disp, const int target_count) const
1749 {
1750 auto guard = exec->get_scoped_device_id_guard();
1751 GKO_ASSERT_NO_MPI_ERRORS(
1755 }
1756
1769 template <typename PutType>
1770 request r_put(std::shared_ptr<const Executor> exec,
1771 const PutType* origin_buffer, const int origin_count,
1772 const int target_rank, const unsigned int target_disp,
1773 const int target_count) const
1774 {
1775 auto guard = exec->get_scoped_device_id_guard();
1776 request req;
1777 GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput(
1780 type_impl<PutType>::get_type(), this->get_window(), req.get()));
1781 return req;
1782 }
1783
1795 template <typename PutType>
1796 void accumulate(std::shared_ptr<const Executor> exec,
1797 const PutType* origin_buffer, const int origin_count,
1798 const int target_rank, const unsigned int target_disp,
1799 const int target_count, MPI_Op operation) const
1800 {
1801 auto guard = exec->get_scoped_device_id_guard();
1802 GKO_ASSERT_NO_MPI_ERRORS(MPI_Accumulate(
1805 type_impl<PutType>::get_type(), operation, this->get_window()));
1806 }
1807
1821 template <typename PutType>
1822 request r_accumulate(std::shared_ptr<const Executor> exec,
1823 const PutType* origin_buffer, const int origin_count,
1824 const int target_rank, const unsigned int target_disp,
1825 const int target_count, MPI_Op operation) const
1826 {
1827 auto guard = exec->get_scoped_device_id_guard();
1828 request req;
1829 GKO_ASSERT_NO_MPI_ERRORS(MPI_Raccumulate(
1832 type_impl<PutType>::get_type(), operation, this->get_window(),
1833 req.get()));
1834 return req;
1835 }
1836
1847 template <typename GetType>
1848 void get(std::shared_ptr<const Executor> exec, GetType* origin_buffer,
1849 const int origin_count, const int target_rank,
1850 const unsigned int target_disp, const int target_count) const
1851 {
1852 auto guard = exec->get_scoped_device_id_guard();
1853 GKO_ASSERT_NO_MPI_ERRORS(
1857 }
1858
1871 template <typename GetType>
1872 request r_get(std::shared_ptr<const Executor> exec, GetType* origin_buffer,
1873 const int origin_count, const int target_rank,
1874 const unsigned int target_disp, const int target_count) const
1875 {
1876 auto guard = exec->get_scoped_device_id_guard();
1877 request req;
1878 GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget(
1881 type_impl<GetType>::get_type(), this->get_window(), req.get()));
1882 return req;
1883 }
1884
1898 template <typename GetType>
1899 void get_accumulate(std::shared_ptr<const Executor> exec,
1902 const int target_rank, const unsigned int target_disp,
1903 const int target_count, MPI_Op operation) const
1904 {
1905 auto guard = exec->get_scoped_device_id_guard();
1906 GKO_ASSERT_NO_MPI_ERRORS(MPI_Get_accumulate(
1910 type_impl<GetType>::get_type(), operation, this->get_window()));
1911 }
1912
1928 template <typename GetType>
1929 request r_get_accumulate(std::shared_ptr<const Executor> exec,
1932 const int target_rank,
1933 const unsigned int target_disp,
1934 const int target_count, MPI_Op operation) const
1935 {
1936 auto guard = exec->get_scoped_device_id_guard();
1937 request req;
1938 GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget_accumulate(
1942 type_impl<GetType>::get_type(), operation, this->get_window(),
1943 req.get()));
1944 return req;
1945 }
1946
1957 template <typename GetType>
1958 void fetch_and_op(std::shared_ptr<const Executor> exec,
1960 const int target_rank, const unsigned int target_disp,
1961 MPI_Op operation) const
1962 {
1963 auto guard = exec->get_scoped_device_id_guard();
1964 GKO_ASSERT_NO_MPI_ERRORS(MPI_Fetch_and_op(
1966 target_rank, target_disp, operation, this->get_window()));
1967 }
1968
1969private:
1970 MPI_Win window_;
1971};
1972
1973
1974} // namespace mpi
1975} // namespace experimental
1976} // namespace gko
1977
1978
1979#endif // GKO_HAVE_MPI
1980
1981
1982#endif // GKO_PUBLIC_CORE_BASE_MPI_HPP_
A thin wrapper of MPI_Comm that supports most MPI calls.
Definition mpi.hpp:437
status recv(std::shared_ptr< const Executor > exec, RecvType *recv_buffer, const int recv_count, const int source_rank, const int recv_tag) const
Receive data from source rank.
Definition mpi.hpp:611
void scatter_v(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int *send_counts, const int *displacements, RecvType *recv_buffer, const int recv_count, int root_rank) const
Scatter data from root rank to all ranks in the communicator with offsets.
Definition mpi.hpp:1128
request i_broadcast(std::shared_ptr< const Executor > exec, BroadcastType *buffer, int count, int root_rank) const
(Non-blocking) Broadcast data from calling process to all ranks in the communicator
Definition mpi.hpp:689
void gather(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, RecvType *recv_buffer, const int recv_count, int root_rank) const
Gather data onto the root rank from all ranks in the communicator.
Definition mpi.hpp:876
request i_recv(std::shared_ptr< const Executor > exec, RecvType *recv_buffer, const int recv_count, const int source_rank, const int recv_tag) const
Receive (Non-blocking, Immediate return) data from source rank.
Definition mpi.hpp:639
request i_scatter_v(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int *send_counts, const int *displacements, RecvType *recv_buffer, const int recv_count, int root_rank) const
(Non-blocking) Scatter data from root rank to all ranks in the communicator with offsets.
Definition mpi.hpp:1161
void all_to_all(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, RecvType *recv_buffer, const int recv_count) const
Communicate data from all ranks to all other ranks (MPI_Alltoall).
Definition mpi.hpp:1251
request i_all_to_all(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, RecvType *recv_buffer, const int recv_count) const
(Non-blocking) Communicate data from all ranks to all other ranks (MPI_Ialltoall).
Definition mpi.hpp:1281
request i_all_to_all_v(std::shared_ptr< const Executor > exec, const void *send_buffer, const int *send_counts, const int *send_offsets, MPI_Datatype send_type, void *recv_buffer, const int *recv_counts, const int *recv_offsets, MPI_Datatype recv_type) const
Communicate data from all ranks to all other ranks with offsets (MPI_Ialltoallv).
Definition mpi.hpp:1371
bool operator!=(const communicator &rhs) const
Compare two communicator objects for non-equality.
Definition mpi.hpp:531
void scatter(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, RecvType *recv_buffer, const int recv_count, int root_rank) const
Scatter data from root rank to all ranks in the communicator.
Definition mpi.hpp:1064
void synchronize() const
This function is used to synchronize the ranks in the communicator.
Definition mpi.hpp:537
int rank() const
Return the rank of the calling process in the communicator.
Definition mpi.hpp:507
request i_reduce(std::shared_ptr< const Executor > exec, const ReduceType *send_buffer, ReduceType *recv_buffer, int count, MPI_Op operation, int root_rank) const
(Non-blocking) Reduce data into root from all calling processes on the same communicator.
Definition mpi.hpp:742
int size() const
Return the size of the communicator (number of ranks).
Definition mpi.hpp:500
void send(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, const int destination_rank, const int send_tag) const
Send (Blocking) data from calling process to destination rank.
Definition mpi.hpp:556
request i_all_to_all_v(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int *send_counts, const int *send_offsets, RecvType *recv_buffer, const int *recv_counts, const int *recv_offsets) const
Communicate data from all ranks to all other ranks with offsets (MPI_Ialltoallv).
Definition mpi.hpp:1407
request i_gather(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, RecvType *recv_buffer, const int recv_count, int root_rank) const
(Non-blocking) Gather data onto the root rank from all ranks in the communicator.
Definition mpi.hpp:908
void all_to_all(std::shared_ptr< const Executor > exec, RecvType *recv_buffer, const int recv_count) const
(In-place) Communicate data from all ranks to all other ranks in place (MPI_Alltoall).
Definition mpi.hpp:1193
void all_to_all_v(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int *send_counts, const int *send_offsets, RecvType *recv_buffer, const int *recv_counts, const int *recv_offsets) const
Communicate data from all ranks to all other ranks with offsets (MPI_Alltoallv).
Definition mpi.hpp:1314
request i_all_reduce(std::shared_ptr< const Executor > exec, ReduceType *recv_buffer, int count, MPI_Op operation) const
(In-place, non-blocking) Reduce data from all calling processes from all calling processes on same co...
Definition mpi.hpp:793
request i_all_to_all(std::shared_ptr< const Executor > exec, RecvType *recv_buffer, const int recv_count) const
(In-place, Non-blocking) Communicate data from all ranks to all other ranks in place (MPI_Ialltoall).
Definition mpi.hpp:1222
void all_to_all_v(std::shared_ptr< const Executor > exec, const void *send_buffer, const int *send_counts, const int *send_offsets, MPI_Datatype send_type, void *recv_buffer, const int *recv_counts, const int *recv_offsets, MPI_Datatype recv_type) const
Communicate data from all ranks to all other ranks with offsets (MPI_Alltoallv).
Definition mpi.hpp:1340
int node_local_rank() const
Return the node local rank of the calling process in the communicator.
Definition mpi.hpp:514
void broadcast(std::shared_ptr< const Executor > exec, BroadcastType *buffer, int count, int root_rank) const
Broadcast data from calling process to all ranks in the communicator.
Definition mpi.hpp:664
const MPI_Comm & get() const
Return the underlying MPI_Comm object.
Definition mpi.hpp:491
communicator(const MPI_Comm &comm, int color, int key)
Create a communicator object from an existing MPI_Comm object using color and key.
Definition mpi.hpp:463
void all_reduce(std::shared_ptr< const Executor > exec, ReduceType *recv_buffer, int count, MPI_Op operation) const
(In-place) Reduce data from all calling processes from all calling processes on same communicator.
Definition mpi.hpp:768
void all_gather(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, RecvType *recv_buffer, const int recv_count) const
Gather data onto all ranks from all ranks in the communicator.
Definition mpi.hpp:1005
request i_all_gather(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, RecvType *recv_buffer, const int recv_count) const
(Non-blocking) Gather data onto all ranks from all ranks in the communicator.
Definition mpi.hpp:1035
bool operator==(const communicator &rhs) const
Compare two communicator objects for equality.
Definition mpi.hpp:521
void all_reduce(std::shared_ptr< const Executor > exec, const ReduceType *send_buffer, ReduceType *recv_buffer, int count, MPI_Op operation) const
Reduce data from all calling processes from all calling processes on same communicator.
Definition mpi.hpp:820
request i_gather_v(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, RecvType *recv_buffer, const int *recv_counts, const int *displacements, int root_rank) const
(Non-blocking) Gather data onto the root rank from all ranks in the communicator with offsets.
Definition mpi.hpp:974
request i_all_reduce(std::shared_ptr< const Executor > exec, const ReduceType *send_buffer, ReduceType *recv_buffer, int count, MPI_Op operation) const
Reduce data from all calling processes from all calling processes on same communicator.
Definition mpi.hpp:847
communicator(const MPI_Comm &comm, bool force_host_buffer=false)
Non-owning constructor for an existing communicator of type MPI_Comm.
Definition mpi.hpp:449
request i_scan(std::shared_ptr< const Executor > exec, const ScanType *send_buffer, ScanType *recv_buffer, int count, MPI_Op operation) const
Does a scan operation with the given operator.
Definition mpi.hpp:1460
void reduce(std::shared_ptr< const Executor > exec, const ReduceType *send_buffer, ReduceType *recv_buffer, int count, MPI_Op operation, int root_rank) const
Reduce data into root from all calling processes on the same communicator.
Definition mpi.hpp:715
request i_scatter(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, RecvType *recv_buffer, const int recv_count, int root_rank) const
(Non-blocking) Scatter data from root rank to all ranks in the communicator.
Definition mpi.hpp:1095
void scan(std::shared_ptr< const Executor > exec, const ScanType *send_buffer, ScanType *recv_buffer, int count, MPI_Op operation) const
Does a scan operation with the given operator.
Definition mpi.hpp:1434
void gather_v(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, RecvType *recv_buffer, const int *recv_counts, const int *displacements, int root_rank) const
Gather data onto the root rank from all ranks in the communicator with offsets.
Definition mpi.hpp:941
request i_send(std::shared_ptr< const Executor > exec, const SendType *send_buffer, const int send_count, const int destination_rank, const int send_tag) const
Send (Non-blocking, Immediate return) data from calling process to destination rank.
Definition mpi.hpp:583
communicator(const communicator &comm, int color, int key)
Create a communicator object from an existing MPI_Comm object using color and key.
Definition mpi.hpp:478
A move-only wrapper for a contiguous MPI_Datatype.
Definition mpi.hpp:130
MPI_Datatype get() const
Access the underlying MPI_Datatype.
Definition mpi.hpp:199
contiguous_type(int count, MPI_Datatype old_type)
Constructs a wrapper for a contiguous MPI_Datatype.
Definition mpi.hpp:138
contiguous_type()
Constructs empty wrapper with MPI_DATATYPE_NULL.
Definition mpi.hpp:147
contiguous_type(const contiguous_type &)=delete
Disallow copying of wrapper type.
contiguous_type(contiguous_type &&other) noexcept
Move constructor, leaves other with MPI_DATATYPE_NULL.
Definition mpi.hpp:164
contiguous_type & operator=(contiguous_type &&other) noexcept
Move assignment, leaves other with MPI_DATATYPE_NULL.
Definition mpi.hpp:176
contiguous_type & operator=(const contiguous_type &)=delete
Disallow copying of wrapper type.
~contiguous_type()
Destructs object by freeing wrapped MPI_Datatype.
Definition mpi.hpp:187
Class that sets up and finalizes the MPI environment.
Definition mpi.hpp:227
~environment()
Call MPI_Finalize at the end of the scope of this class.
Definition mpi.hpp:270
int get_provided_thread_support() const
Return the provided thread support.
Definition mpi.hpp:248
environment(int &argc, char **&argv, const thread_type thread_t=thread_type::serialized)
Call MPI_Init_thread and initialize the MPI environment.
Definition mpi.hpp:258
The request class is a light, move-only wrapper around the MPI_Request handle.
Definition mpi.hpp:348
request()
The default constructor.
Definition mpi.hpp:354
MPI_Request * get()
Get a pointer to the underlying MPI_Request handle.
Definition mpi.hpp:385
status wait()
Allows a rank to wait on a particular request handle.
Definition mpi.hpp:393
This class wraps the MPI_Window class with RAII functionality.
Definition mpi.hpp:1535
void get(std::shared_ptr< const Executor > exec, GetType *origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count) const
Get data from the target window.
Definition mpi.hpp:1848
request r_put(std::shared_ptr< const Executor > exec, const PutType *origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count) const
Put data into the target window.
Definition mpi.hpp:1770
window()
The default constructor.
Definition mpi.hpp:1550
void get_accumulate(std::shared_ptr< const Executor > exec, GetType *origin_buffer, const int origin_count, GetType *result_buffer, const int result_count, const int target_rank, const unsigned int target_disp, const int target_count, MPI_Op operation) const
Get Accumulate data from the target window.
Definition mpi.hpp:1899
void put(std::shared_ptr< const Executor > exec, const PutType *origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count) const
Put data into the target window.
Definition mpi.hpp:1746
~window()
The deleter which calls MPI_Win_free when the window leaves its scope.
Definition mpi.hpp:1728
lock_type
The lock type for passive target synchronization of the windows.
Definition mpi.hpp:1545
window & operator=(window &&other)
The move assignment operator.
Definition mpi.hpp:1571
request r_accumulate(std::shared_ptr< const Executor > exec, const PutType *origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, MPI_Op operation) const
(Non-blocking) Accumulate data into the target window.
Definition mpi.hpp:1822
request r_get_accumulate(std::shared_ptr< const Executor > exec, GetType *origin_buffer, const int origin_count, GetType *result_buffer, const int result_count, const int target_rank, const unsigned int target_disp, const int target_count, MPI_Op operation) const
(Non-blocking) Get Accumulate data (with handle) from the target window.
Definition mpi.hpp:1929
void fetch_and_op(std::shared_ptr< const Executor > exec, GetType *origin_buffer, GetType *result_buffer, const int target_rank, const unsigned int target_disp, MPI_Op operation) const
Fetch and operate on data from the target window (An optimized version of Get_accumulate).
Definition mpi.hpp:1958
void sync() const
Synchronize the public and private buffers for the window object.
Definition mpi.hpp:1723
void unlock(int rank) const
Close the epoch using MPI_Win_unlock for the window object.
Definition mpi.hpp:1655
void fence(int assert=0) const
The active target synchronization using MPI_Win_fence for the window object.
Definition mpi.hpp:1622
void flush(int rank) const
Flush the existing RDMA operations on the target rank for the calling process for the window object.
Definition mpi.hpp:1686
void unlock_all() const
Close the epoch on all ranks using MPI_Win_unlock_all for the window object.
Definition mpi.hpp:1675
create_type
The create type for the window object.
Definition mpi.hpp:1540
window(std::shared_ptr< const Executor > exec, ValueType *base, int num_elems, const communicator &comm, const int disp_unit=sizeof(ValueType), MPI_Info input_info=MPI_INFO_NULL, create_type c_type=create_type::create)
Create a window object with a given data pointer and type.
Definition mpi.hpp:1588
void accumulate(std::shared_ptr< const Executor > exec, const PutType *origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, MPI_Op operation) const
Accumulate data into the target window.
Definition mpi.hpp:1796
void lock_all(int assert=0) const
Create the epoch on all ranks using MPI_Win_lock_all for the window object.
Definition mpi.hpp:1666
void lock(int rank, lock_type lock_t=lock_type::shared, int assert=0) const
Create an epoch using MPI_Win_lock for the window object.
Definition mpi.hpp:1635
void flush_all_local() const
Flush all the local existing RDMA operations on the calling rank for the window object.
Definition mpi.hpp:1715
window(window &&other)
The move constructor.
Definition mpi.hpp:1562
void flush_local(int rank) const
Flush the existing RDMA operations on the calling rank from the target rank for the window object.
Definition mpi.hpp:1697
MPI_Win get_window() const
Get the underlying window object of MPI_Win type.
Definition mpi.hpp:1614
request r_get(std::shared_ptr< const Executor > exec, GetType *origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count) const
Get data (with handle) from the target window.
Definition mpi.hpp:1872
void flush_all() const
Flush all the existing RDMA operations for the calling process for the window object.
Definition mpi.hpp:1706
int map_rank_to_device_id(MPI_Comm comm, int num_devices)
Maps each MPI rank to a single device id in a round robin manner.
bool requires_host_buffer(const std::shared_ptr< const Executor > &exec, const communicator &comm)
Checks if the combination of Executor and communicator requires passing MPI buffers from the host mem...
double get_walltime()
Get the rank in the communicator of the calling process.
Definition mpi.hpp:1523
constexpr bool is_gpu_aware()
Return if GPU aware functionality is available.
Definition mpi.hpp:70
thread_type
This enum specifies the threading type to be used when creating an MPI environment.
Definition mpi.hpp:210
std::vector< status > wait_all(std::vector< request > &req)
Allows a rank to wait on multiple request handles.
Definition mpi.hpp:413
The Ginkgo namespace.
Definition abstract_factory.hpp:48
constexpr T one()
Returns the multiplicative identity for T.
Definition math.hpp:803
The status struct is a light wrapper around the MPI_Status struct.
Definition mpi.hpp:308
int get_count(const T *data) const
Get the count of the number of elements received by the communication call.
Definition mpi.hpp:332
status()
The default constructor.
Definition mpi.hpp:312
MPI_Status * get()
Get a pointer to the underlying MPI_Status object.
Definition mpi.hpp:319
A struct that is used to determine the MPI_Datatype of a specified type.
Definition mpi.hpp:105