15#include "arccore/message_passing_mpi/internal/MpiDynamicMachineMemoryWindowBaseInternal.h"
17#include "arccore/base/FatalErrorException.h"
22namespace Arcane::MessagePassing::Mpi
28MpiDynamicMachineMemoryWindowBaseInternal::
29MpiDynamicMachineMemoryWindowBaseInternal(
Int64 sizeof_segment,
Int32 sizeof_type,
const MPI_Comm& comm_machine,
Int32 comm_machine_rank,
Int32 comm_machine_size, ConstArrayView<Int32> machine_ranks)
31, m_win_actual_sizeof()
32, m_win_target_segments()
33, m_comm_machine(comm_machine)
34, m_comm_machine_size(comm_machine_size)
35, m_comm_machine_rank(comm_machine_rank)
36, m_sizeof_type(sizeof_type)
37, m_machine_ranks(machine_ranks)
39 if (m_sizeof_type <= 0) {
40 ARCCORE_FATAL(
"Invalid sizeof_type");
42 if (sizeof_segment < 0 || sizeof_segment % m_sizeof_type != 0) {
43 ARCCORE_FATAL(
"Invalid initial sizeof_segment");
45 m_all_mpi_win.resize(m_comm_machine_size);
47 MPI_Info win_info_true;
48 MPI_Info_create(&win_info_true);
49 MPI_Info_set(win_info_true,
"alloc_shared_noncontig",
"true");
51 MPI_Info win_info_false;
52 MPI_Info_create(&win_info_false);
53 MPI_Info_set(win_info_false,
"alloc_shared_noncontig",
"false");
56 for (Integer i = 0; i < m_comm_machine_size; ++i) {
58 if (m_comm_machine_rank == i) {
59 if (sizeof_segment == 0)
60 size_seg = m_sizeof_type;
62 size_seg = sizeof_segment;
65 std::byte* ptr_seg =
nullptr;
66 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info_true, m_comm_machine, &ptr_seg, &m_all_mpi_win[i]);
68 if (error != MPI_SUCCESS) {
69 ARCCORE_FATAL(
"Error with MPI_Win_allocate_shared() call");
75 std::byte* ptr_seg =
nullptr;
76 int error = MPI_Win_shared_query(m_all_mpi_win[m_comm_machine_rank], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
78 if (error != MPI_SUCCESS) {
79 ARCCORE_FATAL(
"Error with MPI_Win_shared_query() call");
85 m_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
90 Int64* ptr_seg =
nullptr;
91 Int64* ptr_win =
nullptr;
93 int error = MPI_Win_allocate_shared(
sizeof(Int64),
sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_need_resize);
95 if (error != MPI_SUCCESS) {
96 ARCCORE_FATAL(
"Error with MPI_Win_allocate_shared() call");
102 int error = MPI_Win_shared_query(m_win_need_resize, 0, &size_seg, &size_type, &ptr_win);
104 if (error != MPI_SUCCESS) {
105 ARCCORE_FATAL(
"Error with MPI_Win_shared_query() call");
108 m_need_resize = Span<Int64>{ ptr_win, m_comm_machine_size };
109 m_need_resize[m_comm_machine_rank] = -1;
111 if (ptr_win + m_comm_machine_rank != ptr_seg) {
112 ARCCORE_FATAL(
"m_win_need_resize is noncontig");
117 Int64* ptr_seg =
nullptr;
118 Int64* ptr_win =
nullptr;
120 int error = MPI_Win_allocate_shared(
sizeof(Int64),
sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_actual_sizeof);
122 if (error != MPI_SUCCESS) {
123 ARCCORE_FATAL(
"Error with MPI_Win_allocate_shared() call");
129 int error = MPI_Win_shared_query(m_win_actual_sizeof, 0, &size_seg, &size_type, &ptr_win);
131 if (error != MPI_SUCCESS) {
132 ARCCORE_FATAL(
"Error with MPI_Win_shared_query() call");
135 m_sizeof_used_part = Span<Int64>{ ptr_win, m_comm_machine_size };
136 m_sizeof_used_part[m_comm_machine_rank] = sizeof_segment;
138 if (ptr_win + m_comm_machine_rank != ptr_seg) {
139 ARCCORE_FATAL(
"m_win_actual_sizeof is noncontig");
144 Int32* ptr_seg =
nullptr;
145 Int32* ptr_win =
nullptr;
147 int error = MPI_Win_allocate_shared(
sizeof(Int32),
sizeof(Int32), win_info_false, m_comm_machine, &ptr_seg, &m_win_target_segments);
149 if (error != MPI_SUCCESS) {
150 ARCCORE_FATAL(
"Error with MPI_Win_allocate_shared() call");
156 int error = MPI_Win_shared_query(m_win_target_segments, 0, &size_seg, &size_type, &ptr_win);
158 if (error != MPI_SUCCESS) {
159 ARCCORE_FATAL(
"Error with MPI_Win_shared_query() call");
162 m_target_segments = Span<Int32>{ ptr_win, m_comm_machine_size };
163 m_target_segments[m_comm_machine_rank] = -1;
165 if (ptr_win + m_comm_machine_rank != ptr_seg) {
166 ARCCORE_FATAL(
"m_win_owner_segments is noncontig");
170 MPI_Info_free(&win_info_false);
171 MPI_Info_free(&win_info_true);
173 MPI_Barrier(m_comm_machine);
179MpiDynamicMachineMemoryWindowBaseInternal::
180~MpiDynamicMachineMemoryWindowBaseInternal()
182 for (Integer i = 0; i < m_comm_machine_size; ++i) {
183 MPI_Win_free(&m_all_mpi_win[i]);
185 MPI_Win_free(&m_win_need_resize);
186 MPI_Win_free(&m_win_actual_sizeof);
187 MPI_Win_free(&m_win_target_segments);
193Int32 MpiDynamicMachineMemoryWindowBaseInternal::
196 return m_sizeof_type;
202ConstArrayView<Int32> MpiDynamicMachineMemoryWindowBaseInternal::
205 return m_machine_ranks;
211void MpiDynamicMachineMemoryWindowBaseInternal::
214 MPI_Barrier(m_comm_machine);
220Span<std::byte> MpiDynamicMachineMemoryWindowBaseInternal::
223 return m_reserved_part_span.subSpan(0, m_sizeof_used_part[m_comm_machine_rank]);
229Span<std::byte> MpiDynamicMachineMemoryWindowBaseInternal::
230segmentView(Int32 rank)
232 const Int32 machine_rank = _worldToMachine(rank);
236 std::byte* ptr_seg =
nullptr;
237 int error = MPI_Win_shared_query(m_all_mpi_win[machine_rank], machine_rank, &size_seg, &size_type, &ptr_seg);
239 if (error != MPI_SUCCESS) {
240 ARCCORE_FATAL(
"Error with MPI_Win_shared_query() call");
243 return Span<std::byte>{ ptr_seg, m_sizeof_used_part[machine_rank] };
249Span<const std::byte> MpiDynamicMachineMemoryWindowBaseInternal::
250segmentConstView()
const
252 return m_reserved_part_span.subSpan(0, m_sizeof_used_part[m_comm_machine_rank]);
258Span<const std::byte> MpiDynamicMachineMemoryWindowBaseInternal::
259segmentConstView(Int32 rank)
const
261 const Int32 machine_rank = _worldToMachine(rank);
265 std::byte* ptr_seg =
nullptr;
266 int error = MPI_Win_shared_query(m_all_mpi_win[machine_rank], machine_rank, &size_seg, &size_type, &ptr_seg);
268 if (error != MPI_SUCCESS) {
269 ARCCORE_FATAL(
"Error with MPI_Win_shared_query() call");
272 return Span<const std::byte>{ ptr_seg, m_sizeof_used_part[machine_rank] };
278void MpiDynamicMachineMemoryWindowBaseInternal::
279add(Span<const std::byte> elem)
281 if (elem.size() % m_sizeof_type) {
282 ARCCORE_FATAL(
"Sizeof elem not valid");
285 const Int64 actual_sizeof_win = m_sizeof_used_part[m_comm_machine_rank];
286 const Int64 future_sizeof_win = actual_sizeof_win + elem.size();
287 const Int64 old_reserved = m_reserved_part_span.size();
289 if (future_sizeof_win > old_reserved) {
290 _reallocBarrier(future_sizeof_win);
291 if (m_reserved_part_span.size() < future_sizeof_win) {
292 ARCCORE_FATAL(
"Bad realloc -- Old size : {0} -- New size : {1} -- Needed size : {2}", old_reserved, m_reserved_part_span.size(), future_sizeof_win);
299 for (Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
300 m_reserved_part_span[pos_win] = elem[pos_elem];
302 m_sizeof_used_part[m_comm_machine_rank] = future_sizeof_win;
307 MPI_Barrier(m_comm_machine);
313void MpiDynamicMachineMemoryWindowBaseInternal::
317 MPI_Barrier(m_comm_machine);
323void MpiDynamicMachineMemoryWindowBaseInternal::
324addToAnotherSegment(Int32 rank, Span<const std::byte> elem)
326 if (elem.size() % m_sizeof_type) {
327 ARCCORE_FATAL(
"Sizeof elem not valid");
330 const Int32 machine_rank = _worldToMachine(rank);
332 m_target_segments[m_comm_machine_rank] = machine_rank;
333 MPI_Barrier(m_comm_machine);
337 bool is_my_seg_edited =
false;
339 bool is_found =
false;
340 for (
const Int32 rank_asked : m_target_segments) {
341 if (rank_asked == machine_rank) {
346 ARCCORE_FATAL(
"Two subdomains ask same rank for addToAnotherSegment()");
349 if (rank_asked == m_comm_machine_rank) {
350 is_my_seg_edited =
true;
355 Span<std::byte> rank_reserved_part_span;
358 std::byte* ptr_seg =
nullptr;
360 int error = MPI_Win_shared_query(m_all_mpi_win[machine_rank], machine_rank, &size_seg, &size_type, &ptr_seg);
362 if (error != MPI_SUCCESS) {
363 ARCCORE_FATAL(
"Error with MPI_Win_shared_query() call");
365 rank_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
368 const Int64 actual_sizeof_win = m_sizeof_used_part[machine_rank];
369 const Int64 future_sizeof_win = actual_sizeof_win + elem.size();
370 const Int64 old_reserved = rank_reserved_part_span.size();
372 if (future_sizeof_win > old_reserved) {
373 _reallocBarrier(machine_rank, future_sizeof_win);
377 std::byte* ptr_seg =
nullptr;
379 int error = MPI_Win_shared_query(m_all_mpi_win[machine_rank], machine_rank, &size_seg, &size_type, &ptr_seg);
381 if (error != MPI_SUCCESS) {
382 ARCCORE_FATAL(
"Error with MPI_Win_shared_query() call");
384 rank_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
387 if (rank_reserved_part_span.size() < future_sizeof_win) {
388 ARCCORE_FATAL(
"Bad realloc -- Old size : {0} -- New size : {1} -- Needed size : {2}", old_reserved, rank_reserved_part_span.size(), future_sizeof_win);
395 for (Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
396 rank_reserved_part_span[pos_win] = elem[pos_elem];
398 m_sizeof_used_part[machine_rank] = future_sizeof_win;
403 MPI_Barrier(m_comm_machine);
404 m_target_segments[m_comm_machine_rank] = -1;
407 if (is_my_seg_edited) {
409 std::byte* ptr_seg =
nullptr;
411 int error = MPI_Win_shared_query(m_all_mpi_win[m_comm_machine_rank], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
413 if (error != MPI_SUCCESS) {
414 ARCCORE_FATAL(
"Error with MPI_Win_shared_query() call");
416 m_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
423void MpiDynamicMachineMemoryWindowBaseInternal::
428 MPI_Barrier(m_comm_machine);
430 bool is_my_seg_edited =
false;
431 for (
const Int32 rank : m_target_segments) {
432 if (rank == m_comm_machine_rank) {
433 is_my_seg_edited =
true;
439 MPI_Barrier(m_comm_machine);
441 if (is_my_seg_edited) {
443 std::byte* ptr_seg =
nullptr;
445 int error = MPI_Win_shared_query(m_all_mpi_win[m_comm_machine_rank], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
447 if (error != MPI_SUCCESS) {
448 ARCCORE_FATAL(
"Error with MPI_Win_shared_query() call");
450 m_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
457void MpiDynamicMachineMemoryWindowBaseInternal::
458reserve(Int64 new_capacity)
460 if (new_capacity <= m_reserved_part_span.size()) {
464 _reallocBarrier(new_capacity);
471void MpiDynamicMachineMemoryWindowBaseInternal::
480void MpiDynamicMachineMemoryWindowBaseInternal::
481resize(Int64 new_size)
483 if (new_size == -1) {
485 MPI_Barrier(m_comm_machine);
489 if (new_size < 0 || new_size % m_sizeof_type) {
490 ARCCORE_FATAL(
"new_size not valid");
493 Int64 old_reserved = m_reserved_part_span.size();
495 if (new_size > old_reserved) {
496 _reallocBarrier(new_size);
497 if (m_reserved_part_span.size() < new_size) {
498 ARCCORE_FATAL(
"Bad realloc -- Old size : {0} -- New size : {1} -- Needed size : {2}", old_reserved, m_reserved_part_span.size(), new_size);
504 m_sizeof_used_part[m_comm_machine_rank] = new_size;
509 MPI_Barrier(m_comm_machine);
515void MpiDynamicMachineMemoryWindowBaseInternal::
519 MPI_Barrier(m_comm_machine);
525void MpiDynamicMachineMemoryWindowBaseInternal::
528 if (m_reserved_part_span.size() == m_sizeof_used_part[m_comm_machine_rank]) {
532 _reallocBarrier(m_sizeof_used_part[m_comm_machine_rank]);
539void MpiDynamicMachineMemoryWindowBaseInternal::
540_reallocBarrier(Int64 new_sizeof)
542 m_need_resize[m_comm_machine_rank] = new_sizeof;
546 MPI_Barrier(m_comm_machine);
548 _reallocCollective();
552 m_need_resize[m_comm_machine_rank] = -1;
559 MPI_Barrier(m_comm_machine);
565void MpiDynamicMachineMemoryWindowBaseInternal::
566_reallocBarrier(Int32 machine_rank, Int64 new_sizeof)
568 m_need_resize[machine_rank] = new_sizeof;
572 MPI_Barrier(m_comm_machine);
574 _reallocCollective();
578 m_need_resize[machine_rank] = -1;
585 MPI_Barrier(m_comm_machine);
591void MpiDynamicMachineMemoryWindowBaseInternal::
594 MPI_Barrier(m_comm_machine);
595 _reallocCollective();
596 MPI_Barrier(m_comm_machine);
602void MpiDynamicMachineMemoryWindowBaseInternal::
606 MPI_Info_create(&win_info);
607 MPI_Info_set(win_info,
"alloc_shared_noncontig",
"true");
609 for (Integer i = 0; i < m_comm_machine_size; ++i) {
610 if (m_need_resize[i] == -1)
613 ARCCORE_ASSERT(m_need_resize[i] >= 0, (
"New size must be >= 0"));
614 ARCCORE_ASSERT(m_need_resize[i] % m_sizeof_type == 0, (
"New size must be % sizeof type"));
616 const Int64 size_seg = (m_comm_machine_rank == i ? (m_need_resize[i] == 0 ? m_sizeof_type : m_need_resize[i]) : 0);
618 MPI_Win old_win = m_all_mpi_win[i];
620 std::byte* ptr_seg =
nullptr;
623 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info, m_comm_machine, &ptr_seg, &m_all_mpi_win[i]);
624 if (error != MPI_SUCCESS) {
625 MPI_Info_free(&win_info);
626 ARCCORE_FATAL(
"Error with MPI_Win_allocate_shared() call");
629 if (m_comm_machine_rank == i) {
631 MPI_Aint mpi_reserved_size_seg;
636 error = MPI_Win_shared_query(m_all_mpi_win[m_comm_machine_rank], m_comm_machine_rank, &mpi_reserved_size_seg, &size_type, &ptr_seg);
637 if (error != MPI_SUCCESS || ptr_seg ==
nullptr) {
638 MPI_Win_free(&old_win);
639 MPI_Info_free(&win_info);
640 ARCCORE_FATAL(
"Error with MPI_Win_shared_query() call");
643 const Int64 min_size = std::min(m_need_resize[i], m_sizeof_used_part[m_comm_machine_rank]);
644 memcpy(ptr_seg, m_reserved_part_span.data(), min_size);
646 m_reserved_part_span = Span<std::byte>{ ptr_seg, mpi_reserved_size_seg };
648 MPI_Win_free(&old_win);
650 MPI_Info_free(&win_info);
656Int32 MpiDynamicMachineMemoryWindowBaseInternal::
657_worldToMachine(Int32 world)
const
659 for (Int32 i = 0; i < m_comm_machine_size; ++i) {
660 if (m_machine_ranks[i] == world) {
664 ARCCORE_FATAL(
"Rank is not in machine");
670Int32 MpiDynamicMachineMemoryWindowBaseInternal::
671_machineToWorld(Int32 machine)
const
673 return m_machine_ranks[machine];
std::int64_t Int64
Type entier signé sur 64 bits.
std::int32_t Int32
Type entier signé sur 32 bits.