14#include "arccore/message_passing_mpi/internal/MpiMachineShMemWinBaseInternal.h"
16#include "arccore/base/FatalErrorException.h"
20namespace Arcane::MessagePassing::Mpi
26MpiMachineShMemWinBaseInternal::
29, m_win_actual_sizeof()
30, m_win_target_segments()
31, m_comm_machine(comm_machine)
32, m_comm_machine_size(comm_machine_size)
33, m_comm_machine_rank(comm_machine_rank)
34, m_sizeof_type(sizeof_type)
35, m_machine_ranks(machine_ranks)
37 if (m_sizeof_type <= 0) {
40 if (sizeof_segment < 0 || sizeof_segment % m_sizeof_type != 0) {
43 m_all_mpi_win.resize(m_comm_machine_size);
45 MPI_Info win_info_true;
46 MPI_Info_create(&win_info_true);
47 MPI_Info_set(win_info_true,
"alloc_shared_noncontig",
"true");
49 MPI_Info win_info_false;
50 MPI_Info_create(&win_info_false);
51 MPI_Info_set(win_info_false,
"alloc_shared_noncontig",
"false");
54 for (Integer i = 0; i < m_comm_machine_size; ++i) {
56 if (m_comm_machine_rank == i) {
57 if (sizeof_segment == 0)
58 size_seg = m_sizeof_type;
60 size_seg = sizeof_segment;
63 std::byte* ptr_seg =
nullptr;
64 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info_true, m_comm_machine, &ptr_seg, &m_all_mpi_win[i]);
66 if (error != MPI_SUCCESS) {
73 std::byte* ptr_seg =
nullptr;
74 int error = MPI_Win_shared_query(m_all_mpi_win[m_comm_machine_rank], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
76 if (error != MPI_SUCCESS) {
83 m_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
88 Int64* ptr_seg =
nullptr;
89 Int64* ptr_win =
nullptr;
91 int error = MPI_Win_allocate_shared(
sizeof(Int64),
sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_need_resize);
93 if (error != MPI_SUCCESS) {
100 int error = MPI_Win_shared_query(m_win_need_resize, 0, &size_seg, &size_type, &ptr_win);
102 if (error != MPI_SUCCESS) {
106 m_need_resize = Span<Int64>{ ptr_win, m_comm_machine_size };
107 m_need_resize[m_comm_machine_rank] = -1;
109 if (ptr_win + m_comm_machine_rank != ptr_seg) {
115 Int64* ptr_seg =
nullptr;
116 Int64* ptr_win =
nullptr;
118 int error = MPI_Win_allocate_shared(
sizeof(Int64),
sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_actual_sizeof);
120 if (error != MPI_SUCCESS) {
127 int error = MPI_Win_shared_query(m_win_actual_sizeof, 0, &size_seg, &size_type, &ptr_win);
129 if (error != MPI_SUCCESS) {
133 m_sizeof_used_part = Span<Int64>{ ptr_win, m_comm_machine_size };
134 m_sizeof_used_part[m_comm_machine_rank] = sizeof_segment;
136 if (ptr_win + m_comm_machine_rank != ptr_seg) {
142 Int32* ptr_seg =
nullptr;
143 Int32* ptr_win =
nullptr;
145 int error = MPI_Win_allocate_shared(
sizeof(Int32),
sizeof(Int32), win_info_false, m_comm_machine, &ptr_seg, &m_win_target_segments);
147 if (error != MPI_SUCCESS) {
154 int error = MPI_Win_shared_query(m_win_target_segments, 0, &size_seg, &size_type, &ptr_win);
156 if (error != MPI_SUCCESS) {
160 m_target_segments = Span<Int32>{ ptr_win, m_comm_machine_size };
161 m_target_segments[m_comm_machine_rank] = -1;
163 if (ptr_win + m_comm_machine_rank != ptr_seg) {
168 MPI_Info_free(&win_info_false);
169 MPI_Info_free(&win_info_true);
171 MPI_Barrier(m_comm_machine);
177MpiMachineShMemWinBaseInternal::
178~MpiMachineShMemWinBaseInternal()
180 for (Integer i = 0; i < m_comm_machine_size; ++i) {
181 MPI_Win_free(&m_all_mpi_win[i]);
183 MPI_Win_free(&m_win_need_resize);
184 MPI_Win_free(&m_win_actual_sizeof);
185 MPI_Win_free(&m_win_target_segments);
191Int32 MpiMachineShMemWinBaseInternal::
194 return m_sizeof_type;
203 return m_machine_ranks;
209void MpiMachineShMemWinBaseInternal::
212 MPI_Barrier(m_comm_machine);
228segmentView(
Int32 rank)
230 const Int32 machine_rank = _worldToMachine(rank);
234 std::byte* ptr_seg =
nullptr;
235 int error = MPI_Win_shared_query(
m_all_mpi_win[machine_rank], machine_rank, &size_seg, &
size_type, &ptr_seg);
237 if (error != MPI_SUCCESS) {
248segmentConstView()
const
257segmentConstView(
Int32 rank)
const
259 const Int32 machine_rank = _worldToMachine(rank);
263 std::byte* ptr_seg =
nullptr;
264 int error = MPI_Win_shared_query(
m_all_mpi_win[machine_rank], machine_rank, &size_seg, &
size_type, &ptr_seg);
266 if (error != MPI_SUCCESS) {
276void MpiMachineShMemWinBaseInternal::
279 if (elem.
size() % m_sizeof_type) {
284 const Int64 future_sizeof_win = actual_sizeof_win + elem.
size();
287 if (future_sizeof_win > old_reserved) {
288 _reallocBarrier(future_sizeof_win);
297 for (
Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
305 MPI_Barrier(m_comm_machine);
311void MpiMachineShMemWinBaseInternal::
315 MPI_Barrier(m_comm_machine);
321void MpiMachineShMemWinBaseInternal::
324 if (elem.
size() % m_sizeof_type) {
328 const Int32 machine_rank = _worldToMachine(rank);
331 MPI_Barrier(m_comm_machine);
335 bool is_my_seg_edited =
false;
337 bool is_found =
false;
339 if (rank_asked == machine_rank) {
344 ARCCORE_FATAL(
"Two subdomains ask same rank for addToAnotherSegment()");
347 if (rank_asked == m_comm_machine_rank) {
348 is_my_seg_edited =
true;
356 std::byte* ptr_seg =
nullptr;
358 int error = MPI_Win_shared_query(
m_all_mpi_win[machine_rank], machine_rank, &size_seg, &
size_type, &ptr_seg);
360 if (error != MPI_SUCCESS) {
367 const Int64 future_sizeof_win = actual_sizeof_win + elem.
size();
368 const Int64 old_reserved = rank_reserved_part_span.
size();
370 if (future_sizeof_win > old_reserved) {
371 _reallocBarrier(machine_rank, future_sizeof_win);
375 std::byte* ptr_seg =
nullptr;
377 int error = MPI_Win_shared_query(
m_all_mpi_win[machine_rank], machine_rank, &size_seg, &
size_type, &ptr_seg);
379 if (error != MPI_SUCCESS) {
385 if (rank_reserved_part_span.
size() < future_sizeof_win) {
386 ARCCORE_FATAL(
"Bad realloc -- Old size : {0} -- New size : {1} -- Needed size : {2}", old_reserved, rank_reserved_part_span.
size(), future_sizeof_win);
393 for (
Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
394 rank_reserved_part_span[pos_win] = elem[pos_elem];
401 MPI_Barrier(m_comm_machine);
405 if (is_my_seg_edited) {
407 std::byte* ptr_seg =
nullptr;
409 int error = MPI_Win_shared_query(
m_all_mpi_win[m_comm_machine_rank], m_comm_machine_rank, &size_seg, &
size_type, &ptr_seg);
411 if (error != MPI_SUCCESS) {
421void MpiMachineShMemWinBaseInternal::
426 MPI_Barrier(m_comm_machine);
428 bool is_my_seg_edited =
false;
430 if (rank == m_comm_machine_rank) {
431 is_my_seg_edited =
true;
437 MPI_Barrier(m_comm_machine);
439 if (is_my_seg_edited) {
441 std::byte* ptr_seg =
nullptr;
443 int error = MPI_Win_shared_query(
m_all_mpi_win[m_comm_machine_rank], m_comm_machine_rank, &size_seg, &
size_type, &ptr_seg);
445 if (error != MPI_SUCCESS) {
455void MpiMachineShMemWinBaseInternal::
456reserve(
Int64 new_capacity)
462 _reallocBarrier(new_capacity);
469void MpiMachineShMemWinBaseInternal::
478void MpiMachineShMemWinBaseInternal::
479resize(
Int64 new_size)
481 if (new_size == -1) {
483 MPI_Barrier(m_comm_machine);
487 if (new_size < 0 || new_size % m_sizeof_type) {
493 if (new_size > old_reserved) {
494 _reallocBarrier(new_size);
507 MPI_Barrier(m_comm_machine);
513void MpiMachineShMemWinBaseInternal::
517 MPI_Barrier(m_comm_machine);
523void MpiMachineShMemWinBaseInternal::
537void MpiMachineShMemWinBaseInternal::
538_reallocBarrier(
Int64 new_sizeof)
540 m_need_resize[m_comm_machine_rank] = new_sizeof;
544 MPI_Barrier(m_comm_machine);
546 _reallocCollective();
550 m_need_resize[m_comm_machine_rank] = -1;
557 MPI_Barrier(m_comm_machine);
563void MpiMachineShMemWinBaseInternal::
564_reallocBarrier(
Int32 machine_rank,
Int64 new_sizeof)
566 m_need_resize[machine_rank] = new_sizeof;
570 MPI_Barrier(m_comm_machine);
572 _reallocCollective();
576 m_need_resize[machine_rank] = -1;
583 MPI_Barrier(m_comm_machine);
589void MpiMachineShMemWinBaseInternal::
592 MPI_Barrier(m_comm_machine);
593 _reallocCollective();
594 MPI_Barrier(m_comm_machine);
600void MpiMachineShMemWinBaseInternal::
604 MPI_Info_create(&win_info);
605 MPI_Info_set(win_info,
"alloc_shared_noncontig",
"true");
607 for (Integer i = 0; i < m_comm_machine_size; ++i) {
608 if (m_need_resize[i] == -1)
611 ARCCORE_ASSERT(m_need_resize[i] >= 0, (
"New size must be >= 0"));
612 ARCCORE_ASSERT(m_need_resize[i] % m_sizeof_type == 0, (
"New size must be % sizeof type"));
614 const Int64 size_seg = (m_comm_machine_rank == i ? (m_need_resize[i] == 0 ? m_sizeof_type : m_need_resize[i]) : 0);
616 MPI_Win old_win = m_all_mpi_win[i];
618 std::byte* ptr_seg =
nullptr;
621 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info, m_comm_machine, &ptr_seg, &m_all_mpi_win[i]);
622 if (error != MPI_SUCCESS) {
623 MPI_Info_free(&win_info);
627 if (m_comm_machine_rank == i) {
629 MPI_Aint mpi_reserved_size_seg;
634 error = MPI_Win_shared_query(m_all_mpi_win[m_comm_machine_rank], m_comm_machine_rank, &mpi_reserved_size_seg, &size_type, &ptr_seg);
635 if (error != MPI_SUCCESS || ptr_seg ==
nullptr) {
636 MPI_Win_free(&old_win);
637 MPI_Info_free(&win_info);
641 const Int64 min_size = std::min(m_need_resize[i], m_sizeof_used_part[m_comm_machine_rank]);
642 memcpy(ptr_seg, m_reserved_part_span.data(), min_size);
644 m_reserved_part_span = Span<std::byte>{ ptr_seg, mpi_reserved_size_seg };
646 MPI_Win_free(&old_win);
648 MPI_Info_free(&win_info);
654Int32 MpiMachineShMemWinBaseInternal::
655_worldToMachine(Int32 world)
const
657 for (Int32 i = 0; i < m_comm_machine_size; ++i) {
658 if (m_machine_ranks[i] == world) {
668Int32 MpiMachineShMemWinBaseInternal::
669_machineToWorld(Int32 machine)
const
671 return m_machine_ranks[machine];
#define ARCCORE_FATAL(...)
Macro throwing a FatalErrorException.
Constant view of an array of type T.
Span< Int64 > m_sizeof_used_part
Global view on contiguous window with the size of the primary windows.
Span< std::byte > m_reserved_part_span
Span< Int32 > m_target_segments
UniqueArray< MPI_Win > m_all_mpi_win
constexpr __host__ __device__ SizeType size() const noexcept
Returns the size of the array.
View of an array of elements of type T.
std::int64_t Int64
Signed integer type of 64 bits.
std::int32_t Int32
Signed integer type of 32 bits.