Arcane  v3.16.7.0
Documentation utilisateur
Chargement...
Recherche...
Aucune correspondance
MpiDynamicMultiMachineMemoryWindowBaseInternal.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* MpiDynamicMultiMachineMemoryWindowBaseInternal.h (C) 2000-2025 */
9/* */
10/* Classe permettant de créer des fenêtres mémoires pour un noeud de calcul. */
11/* Les segments de ces fenêtres ne sont pas contigües en mémoire et peuvent */
12/* être redimensionnées. Un processus peut posséder plusieurs segments. */
13/*---------------------------------------------------------------------------*/
14
15#include "arccore/message_passing_mpi/internal/MpiDynamicMultiMachineMemoryWindowBaseInternal.h"
16
17#include "arccore/base/FatalErrorException.h"
18
19/*---------------------------------------------------------------------------*/
20/*---------------------------------------------------------------------------*/
21
22namespace Arcane::MessagePassing::Mpi
23{
24
25/*---------------------------------------------------------------------------*/
26/*---------------------------------------------------------------------------*/
27
28//! Le sizeof_segments ne doit pas être conservé !
29MpiDynamicMultiMachineMemoryWindowBaseInternal::
30MpiDynamicMultiMachineMemoryWindowBaseInternal(SmallSpan<Int64> sizeof_segments, Int32 nb_segments_per_proc, Int32 sizeof_type, const MPI_Comm& comm_machine, Int32 comm_machine_rank, Int32 comm_machine_size, ConstArrayView<Int32> machine_ranks)
31: m_win_need_resize()
32, m_win_actual_sizeof()
33, m_win_target_segments()
34, m_comm_machine(comm_machine)
35, m_comm_machine_size(comm_machine_size)
36, m_comm_machine_rank(comm_machine_rank)
37, m_sizeof_type(sizeof_type)
38, m_nb_segments_per_proc(nb_segments_per_proc)
39, m_machine_ranks(machine_ranks)
40, m_add_requests(nb_segments_per_proc)
41, m_resize_requests(nb_segments_per_proc)
42{
43 if (m_sizeof_type <= 0) {
44 ARCCORE_FATAL("Invalid sizeof_type");
45 }
46 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
47 if (sizeof_segments[i] < 0 || sizeof_segments[i] % m_sizeof_type != 0) {
48 ARCCORE_FATAL("Invalid initial sizeof_segment");
49 }
50 }
51 if (m_nb_segments_per_proc <= 0) {
52 ARCCORE_FATAL("Invalid nb_segments_per_proc");
53 }
54 m_all_mpi_win.resize(m_comm_machine_size * m_nb_segments_per_proc);
55 m_reserved_part_span.resize(m_nb_segments_per_proc);
56
57 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
58 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
59 m_resize_requests[num_seg] = -1;
60 }
61
62 MPI_Info win_info_true;
63 MPI_Info_create(&win_info_true);
64 MPI_Info_set(win_info_true, "alloc_shared_noncontig", "true");
65
66 MPI_Info win_info_false;
67 MPI_Info_create(&win_info_false);
68 MPI_Info_set(win_info_false, "alloc_shared_noncontig", "false");
69
70 const Int32 pos_my_wins = m_comm_machine_rank * m_nb_segments_per_proc;
71
72 {
73 // On crée tous les segments de tous les processus.
74 for (Integer i = 0; i < m_comm_machine_size; ++i) {
75 for (Integer j = 0; j < m_nb_segments_per_proc; ++j) {
76 Int64 size_seg = 0;
77 if (m_comm_machine_rank == i) {
78 if (sizeof_segments[j] == 0)
79 size_seg = m_sizeof_type;
80 else
81 size_seg = sizeof_segments[j];
82 }
83 std::byte* ptr_seg = nullptr;
84 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info_true, m_comm_machine, &ptr_seg, &m_all_mpi_win[j + i * m_nb_segments_per_proc]);
85
86 if (error != MPI_SUCCESS) {
87 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
88 }
89 }
90 }
91
92
93 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
94 MPI_Aint size_seg;
95 int size_type;
96 std::byte* ptr_seg = nullptr;
97 int error = MPI_Win_shared_query(m_all_mpi_win[i + pos_my_wins], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
98
99 if (error != MPI_SUCCESS) {
100 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
101 }
102
103 // Attention : L'utilisateur demande un nombre minimum d'éléments réservés.
104 // Mais MPI réserve la taille qu'il veut (effet du alloc_shared_noncontig=true).
105 // On est juste sûr que la taille qu'il a réservée est supérieure ou égale à sizeof_segment.
106 m_reserved_part_span[i] = Span<std::byte>{ ptr_seg, size_seg };
107 }
108 }
109
110 {
111 Int64* ptr_seg = nullptr;
112 Int64* ptr_win = nullptr;
113 {
114 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int64)) * m_nb_segments_per_proc, sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_need_resize);
115
116 if (error != MPI_SUCCESS) {
117 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
118 }
119 }
120 {
121 MPI_Aint size_seg;
122 int size_type;
123 int error = MPI_Win_shared_query(m_win_need_resize, 0, &size_seg, &size_type, &ptr_win);
124
125 if (error != MPI_SUCCESS) {
126 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
127 }
128
129 m_need_resize = Span<Int64>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
130
131 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
132 m_need_resize[i + pos_my_wins] = -1;
133 }
134 }
135 if (ptr_win + pos_my_wins != ptr_seg) {
136 ARCCORE_FATAL("m_win_need_resize is noncontig");
137 }
138 }
139
140 {
141 Int64* ptr_seg = nullptr;
142 Int64* ptr_win = nullptr;
143 {
144 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int64)) * m_nb_segments_per_proc, sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_actual_sizeof);
145
146 if (error != MPI_SUCCESS) {
147 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
148 }
149 }
150 {
151 MPI_Aint size_seg;
152 int size_type;
153 int error = MPI_Win_shared_query(m_win_actual_sizeof, 0, &size_seg, &size_type, &ptr_win);
154
155 if (error != MPI_SUCCESS) {
156 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
157 }
158
159 m_sizeof_used_part = Span<Int64>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
160
161 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
162 m_sizeof_used_part[i + pos_my_wins] = sizeof_segments[i];
163 }
164 }
165 if (ptr_win + pos_my_wins != ptr_seg) {
166 ARCCORE_FATAL("m_win_actual_sizeof is noncontig");
167 }
168 }
169
170 {
171 Int32* ptr_seg = nullptr;
172 Int32* ptr_win = nullptr;
173 {
174 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int32)) * m_nb_segments_per_proc, sizeof(Int32), win_info_false, m_comm_machine, &ptr_seg, &m_win_target_segments);
175
176 if (error != MPI_SUCCESS) {
177 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
178 }
179 }
180 {
181 MPI_Aint size_seg;
182 int size_type;
183 int error = MPI_Win_shared_query(m_win_target_segments, 0, &size_seg, &size_type, &ptr_win);
184
185 if (error != MPI_SUCCESS) {
186 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
187 }
188
189 m_target_segments = Span<Int32>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
190
191 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
192 m_target_segments[i + pos_my_wins] = -1;
193 }
194 }
195 if (ptr_win + pos_my_wins != ptr_seg) {
196 ARCCORE_FATAL("m_win_owner_segments is noncontig");
197 }
198 }
199
200 MPI_Info_free(&win_info_false);
201 MPI_Info_free(&win_info_true);
202
203 MPI_Barrier(m_comm_machine);
204}
205
206/*---------------------------------------------------------------------------*/
207/*---------------------------------------------------------------------------*/
208
209MpiDynamicMultiMachineMemoryWindowBaseInternal::
210~MpiDynamicMultiMachineMemoryWindowBaseInternal()
211{
212 for (Integer i = 0; i < m_comm_machine_size * m_nb_segments_per_proc; ++i) {
213 MPI_Win_free(&m_all_mpi_win[i]);
214 }
215 MPI_Win_free(&m_win_need_resize);
216 MPI_Win_free(&m_win_actual_sizeof);
217 MPI_Win_free(&m_win_target_segments);
218}
219
220/*---------------------------------------------------------------------------*/
221/*---------------------------------------------------------------------------*/
222
223Int32 MpiDynamicMultiMachineMemoryWindowBaseInternal::
224sizeofOneElem() const
225{
226 return m_sizeof_type;
227}
228
229/*---------------------------------------------------------------------------*/
230/*---------------------------------------------------------------------------*/
231
232ConstArrayView<Int32> MpiDynamicMultiMachineMemoryWindowBaseInternal::
233machineRanks() const
234{
235 return m_machine_ranks;
236}
237
238/*---------------------------------------------------------------------------*/
239/*---------------------------------------------------------------------------*/
240
241void MpiDynamicMultiMachineMemoryWindowBaseInternal::
242barrier() const
243{
244 MPI_Barrier(m_comm_machine);
245}
246
247/*---------------------------------------------------------------------------*/
248/*---------------------------------------------------------------------------*/
249
250Span<std::byte> MpiDynamicMultiMachineMemoryWindowBaseInternal::
251segmentView(Int32 num_seg)
252{
253 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
254 return m_reserved_part_span[num_seg].subSpan(0, m_sizeof_used_part[segment_infos_pos]);
255}
256
257/*---------------------------------------------------------------------------*/
258/*---------------------------------------------------------------------------*/
259
260Span<std::byte> MpiDynamicMultiMachineMemoryWindowBaseInternal::
261segmentView(Int32 rank, Int32 num_seg)
262{
263 const Int32 segment_infos_pos = num_seg + _worldToMachine(rank) * m_nb_segments_per_proc;
264
265 MPI_Aint size_seg;
266 int size_type;
267 std::byte* ptr_seg = nullptr;
268 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], rank, &size_seg, &size_type, &ptr_seg);
269
270 if (error != MPI_SUCCESS) {
271 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
272 }
273
274 return Span<std::byte>{ ptr_seg, m_sizeof_used_part[segment_infos_pos] };
275}
276
277/*---------------------------------------------------------------------------*/
278/*---------------------------------------------------------------------------*/
279
280Span<const std::byte> MpiDynamicMultiMachineMemoryWindowBaseInternal::
281segmentConstView(Int32 num_seg) const
282{
283 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
284 return m_reserved_part_span[num_seg].subSpan(0, m_sizeof_used_part[segment_infos_pos]);
285}
286
287/*---------------------------------------------------------------------------*/
288/*---------------------------------------------------------------------------*/
289
290Span<const std::byte> MpiDynamicMultiMachineMemoryWindowBaseInternal::
291segmentConstView(Int32 rank, Int32 num_seg) const
292{
293 const Int32 segment_infos_pos = num_seg + _worldToMachine(rank) * m_nb_segments_per_proc;
294
295 MPI_Aint size_seg;
296 int size_type;
297 std::byte* ptr_seg = nullptr;
298 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], rank, &size_seg, &size_type, &ptr_seg);
299
300 if (error != MPI_SUCCESS) {
301 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
302 }
303
304 return Span<const std::byte>{ ptr_seg, m_sizeof_used_part[segment_infos_pos] };
305}
306
307/*---------------------------------------------------------------------------*/
308/*---------------------------------------------------------------------------*/
309
310void MpiDynamicMultiMachineMemoryWindowBaseInternal::
311requestAdd(Int32 num_seg, Span<const std::byte> elem)
312{
313 if (elem.size() % m_sizeof_type) {
314 ARCCORE_FATAL("Sizeof elem not valid");
315 }
316 if (elem.empty() || elem.data() == nullptr) {
317 return;
318 }
319
320 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
321
322 const Int64 actual_sizeof_win = m_sizeof_used_part[segment_infos_pos];
323 const Int64 future_sizeof_win = actual_sizeof_win + elem.size();
324 const Int64 old_reserved = m_reserved_part_span[num_seg].size();
325
326 if (future_sizeof_win > old_reserved) {
327 _requestRealloc(segment_infos_pos, future_sizeof_win);
328 }
329 else {
330 _requestRealloc(segment_infos_pos);
331 }
332
333 m_add_requests[num_seg] = elem;
334 m_add_requested = true; // TODO Atomic ?
335}
336
337/*---------------------------------------------------------------------------*/
338/*---------------------------------------------------------------------------*/
339
340void MpiDynamicMultiMachineMemoryWindowBaseInternal::
341executeAdd()
342{
343 _executeRealloc();
344
345 if (!m_add_requested) {
346 return;
347 }
348 m_add_requested = false;
349
350 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
351 if (m_add_requests[num_seg].empty() || m_add_requests[num_seg].data() == nullptr) {
352 continue;
353 }
354
355 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
356
357 const Int64 actual_sizeof_win = m_sizeof_used_part[segment_infos_pos];
358 const Int64 future_sizeof_win = actual_sizeof_win + m_add_requests[num_seg].size();
359
360 if (m_reserved_part_span[num_seg].size() < future_sizeof_win) {
361 ARCCORE_FATAL("Bad realloc -- New size : {1} -- Needed size : {2}", m_reserved_part_span[num_seg].size(), future_sizeof_win);
362 }
363
364 for (Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
365 m_reserved_part_span[num_seg][pos_win] = m_add_requests[num_seg][pos_elem];
366 }
367 m_sizeof_used_part[segment_infos_pos] = future_sizeof_win;
368
369 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
370 }
371}
372
373/*---------------------------------------------------------------------------*/
374/*---------------------------------------------------------------------------*/
375
376void MpiDynamicMultiMachineMemoryWindowBaseInternal::
377requestAddToAnotherSegment(Int32 thread, Int32 rank, Int32 num_seg, Span<const std::byte> elem)
378{
379 if (elem.size() % m_sizeof_type) {
380 ARCCORE_FATAL("Sizeof elem not valid");
381 }
382 if (elem.empty() || elem.data() == nullptr) {
383 return;
384 }
385
386 const Int32 machine_rank = _worldToMachine(rank);
387 const Int32 target_segment_infos_pos = num_seg + machine_rank * m_nb_segments_per_proc;
388
389 {
390 const Int32 segment_infos_pos = thread + m_comm_machine_rank * m_nb_segments_per_proc;
391 m_target_segments[segment_infos_pos] = target_segment_infos_pos;
392 }
393
394 Span<std::byte> rank_reserved_part_span;
395 {
396 MPI_Aint size_seg;
397 std::byte* ptr_seg = nullptr;
398 int size_type;
399 int error = MPI_Win_shared_query(m_all_mpi_win[target_segment_infos_pos], machine_rank, &size_seg, &size_type, &ptr_seg);
400
401 if (error != MPI_SUCCESS) {
402 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
403 }
404 rank_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
405 }
406
407 const Int64 actual_sizeof_win = m_sizeof_used_part[target_segment_infos_pos];
408 const Int64 future_sizeof_win = actual_sizeof_win + elem.size();
409 const Int64 old_reserved = rank_reserved_part_span.size();
410
411 if (future_sizeof_win > old_reserved) {
412 _requestRealloc(target_segment_infos_pos, future_sizeof_win);
413 }
414 else {
415 _requestRealloc(target_segment_infos_pos);
416 }
417
418 m_add_requests[thread] = elem;
419 m_add_requested = true; // TODO Atomic ?
420}
421
422/*---------------------------------------------------------------------------*/
423/*---------------------------------------------------------------------------*/
424
425void MpiDynamicMultiMachineMemoryWindowBaseInternal::
426executeAddToAnotherSegment()
427{
428 MPI_Barrier(m_comm_machine);
429
430 auto is_my_seg_edited = std::make_unique<bool[]>(m_comm_machine_size);
431 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
432 for (const Int32 rank_asked : m_target_segments) {
433 if (rank_asked == m_comm_machine_rank) {
434 is_my_seg_edited[num_seg] = true;
435 break;
436 }
437 }
438 }
439
440 if (!m_add_requested) {
441 _executeRealloc();
442 }
443
444 else {
445 m_add_requested = false;
446 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
447 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
448 const Int32 seg_needs_to_edit = m_target_segments[segment_infos_pos];
449 if (seg_needs_to_edit == -1)
450 continue;
451
452 bool is_found = false;
453 for (const Int32 rank_asked : m_target_segments) {
454 if (rank_asked == seg_needs_to_edit) {
455 if (!is_found) {
456 is_found = true;
457 }
458 else {
459 ARCCORE_FATAL("Two subdomains ask same rank for addToAnotherSegment()");
460 }
461 }
462 }
463 }
464
465 _executeRealloc();
466
467 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
468 if (m_add_requests[num_seg].empty() || m_add_requests[num_seg].data() == nullptr) {
469 continue;
470 }
471
472 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
473 const Int32 target_segment_infos_pos = m_target_segments[segment_infos_pos];
474 if (target_segment_infos_pos == -1) {
475 ARCCORE_FATAL("Ne devrait pas aller ici");
476 }
477
478 const Int64 actual_sizeof_win = m_sizeof_used_part[target_segment_infos_pos];
479 const Int64 future_sizeof_win = actual_sizeof_win + m_add_requests[num_seg].size();
480
481 Span<std::byte> rank_reserved_part_span;
482 {
483 MPI_Aint size_seg;
484 std::byte* ptr_seg = nullptr;
485 int size_type;
486 int error = MPI_Win_shared_query(m_all_mpi_win[target_segment_infos_pos], target_segment_infos_pos / m_nb_segments_per_proc, &size_seg, &size_type, &ptr_seg);
487
488 if (error != MPI_SUCCESS) {
489 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
490 }
491 rank_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
492 }
493
494 if (rank_reserved_part_span.size() < future_sizeof_win) {
495 ARCCORE_FATAL("Bad realloc -- New size : {1} -- Needed size : {2}", rank_reserved_part_span.size(), future_sizeof_win);
496 }
497
498 for (Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
499 rank_reserved_part_span[pos_win] = m_add_requests[num_seg][pos_elem];
500 }
501 m_sizeof_used_part[target_segment_infos_pos] = future_sizeof_win;
502
503 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
504 m_target_segments[segment_infos_pos] = -1;
505 }
506 }
507 MPI_Barrier(m_comm_machine);
508
509 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
510 if (is_my_seg_edited[num_seg]) {
511 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
512
513 MPI_Aint size_seg;
514 std::byte* ptr_seg = nullptr;
515 int size_type;
516 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
517
518 if (error != MPI_SUCCESS) {
519 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
520 }
521 m_reserved_part_span[num_seg] = Span<std::byte>{ ptr_seg, size_seg };
522 }
523 }
524}
525
526/*---------------------------------------------------------------------------*/
527/*---------------------------------------------------------------------------*/
528
529void MpiDynamicMultiMachineMemoryWindowBaseInternal::
530requestReserve(Int32 num_seg, Int64 new_capacity)
531{
532 if (new_capacity % m_sizeof_type) {
533 ARCCORE_FATAL("new_capacity not valid");
534 }
535
536 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
537
538 if (new_capacity <= m_reserved_part_span[num_seg].size()) {
539 _requestRealloc(segment_infos_pos);
540 return;
541 }
542 _requestRealloc(segment_infos_pos, new_capacity);
543}
544
545/*---------------------------------------------------------------------------*/
546/*---------------------------------------------------------------------------*/
547
548void MpiDynamicMultiMachineMemoryWindowBaseInternal::
549executeReserve()
550{
551 _executeRealloc();
552}
553
554/*---------------------------------------------------------------------------*/
555/*---------------------------------------------------------------------------*/
556
557void MpiDynamicMultiMachineMemoryWindowBaseInternal::
558requestResize(Int32 num_seg, Int64 new_size)
559{
560 if (new_size == -1) {
561 return;
562 }
563 if (new_size < 0 || new_size % m_sizeof_type) {
564 ARCCORE_FATAL("new_size not valid");
565 }
566
567 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
568
569 if (new_size > m_reserved_part_span[num_seg].size()) {
570 _requestRealloc(segment_infos_pos, new_size);
571 }
572 else {
573 _requestRealloc(segment_infos_pos);
574 }
575
576 m_resize_requests[num_seg] = new_size;
577 m_resize_requested = true; // TODO Atomic ?
578}
579
580/*---------------------------------------------------------------------------*/
581/*---------------------------------------------------------------------------*/
582
583void MpiDynamicMultiMachineMemoryWindowBaseInternal::
584executeResize()
585{
586 _executeRealloc();
587
588 if (!m_resize_requested) {
589 return;
590 }
591 m_resize_requested = false;
592
593 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
594 if (m_resize_requests[num_seg] == -1) {
595 continue;
596 }
597
598 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
599
600 if (m_reserved_part_span[num_seg].size() < m_resize_requests[num_seg]) {
601 ARCCORE_FATAL("Bad realloc -- New size : {0} -- Needed size : {1}", m_reserved_part_span[num_seg].size(), m_resize_requests[num_seg]);
602 }
603
604 m_sizeof_used_part[segment_infos_pos] = m_resize_requests[num_seg];
605 m_resize_requests[num_seg] = -1;
606 }
607}
608
609/*---------------------------------------------------------------------------*/
610/*---------------------------------------------------------------------------*/
611
612void MpiDynamicMultiMachineMemoryWindowBaseInternal::
613executeShrink()
614{
615 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
616 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
617
618 if (m_reserved_part_span[num_seg].size() == m_sizeof_used_part[segment_infos_pos]) {
619 _requestRealloc(segment_infos_pos);
620 }
621 else {
622 _requestRealloc(segment_infos_pos, m_sizeof_used_part[segment_infos_pos]);
623 }
624 }
625 _executeRealloc();
626}
627
628/*---------------------------------------------------------------------------*/
629/*---------------------------------------------------------------------------*/
630
631void MpiDynamicMultiMachineMemoryWindowBaseInternal::
632_requestRealloc(Int32 owner_pos_segment, Int64 new_capacity) const
633{
634 m_need_resize[owner_pos_segment] = new_capacity;
635}
636
637/*---------------------------------------------------------------------------*/
638/*---------------------------------------------------------------------------*/
639
640void MpiDynamicMultiMachineMemoryWindowBaseInternal::
641_requestRealloc(Int32 owner_pos_segment) const
642{
643 m_need_resize[owner_pos_segment] = -1;
644}
645
646/*---------------------------------------------------------------------------*/
647/*---------------------------------------------------------------------------*/
648
649void MpiDynamicMultiMachineMemoryWindowBaseInternal::
650_executeRealloc()
651{
652 // Barrière importante car tout le monde doit savoir que l'on doit
653 // redimensionner un des segments que nous possédons.
654 MPI_Barrier(m_comm_machine);
655
656 // Pas besoin de barrière car MPI_Win_allocate_shared() de _realloc() est
657 // bloquant.
658 _realloc();
659
660 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
661 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
662 m_need_resize[segment_infos_pos] = -1;
663 }
664
665 // Barrière importante dans le cas où un MPI_Win_shared_query() de
666 // _reallocCollective() durerait trop longtemps (un autre processus pourrait
667 // rappeler cette méthode et remettre m_need_resize[m_owner_segment] à
668 // true => deadlock dans _reallocCollective() sur MPI_Win_allocate_shared()
669 // à cause du continue).
670 MPI_Barrier(m_comm_machine);
671}
672
673/*---------------------------------------------------------------------------*/
674/*---------------------------------------------------------------------------*/
675
676void MpiDynamicMultiMachineMemoryWindowBaseInternal::
677_realloc()
678{
679 MPI_Info win_info;
680 MPI_Info_create(&win_info);
681 MPI_Info_set(win_info, "alloc_shared_noncontig", "true");
682
683 // Chacun réalloc ses segments, si demandé.
684 for (Integer rank = 0; rank < m_comm_machine_size; ++rank) {
685 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
686
687 const Int32 local_segment_infos_pos = num_seg + rank * m_nb_segments_per_proc;
688
689 if (m_need_resize[local_segment_infos_pos] == -1)
690 continue;
691
692 ARCCORE_ASSERT(m_need_resize[local_segment_infos_pos] >= 0, ("New size must be >= 0"));
693 ARCCORE_ASSERT(m_need_resize[local_segment_infos_pos] % m_sizeof_type == 0, ("New size must be % sizeof type"));
694
695 // Si on doit realloc notre segment, on alloue au moins une taille de m_sizeof_type.
696 // Si ce n'est pas notre segment, taille 0 pour que MPI n'alloue rien.
697 const Int64 size_seg = (m_comm_machine_rank == rank ? (m_need_resize[local_segment_infos_pos] == 0 ? m_sizeof_type : m_need_resize[local_segment_infos_pos]) : 0);
698
699 // On sauvegarde l'ancien segment pour déplacer les données.
700 MPI_Win old_win = m_all_mpi_win[local_segment_infos_pos];
701 std::byte* ptr_new_seg = nullptr;
702
703 // Si size_seg == 0 alors ptr_seg == nullptr.
704 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info, m_comm_machine, &ptr_new_seg, &m_all_mpi_win[local_segment_infos_pos]);
705 if (error != MPI_SUCCESS) {
706 MPI_Win_free(&old_win);
707 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
708 }
709
710 // Il n'y a que si c'est notre segment que l'on déplace les données.
711 if (m_comm_machine_rank == rank) {
712 // On a besoin de deux infos supplémentaires :
713 // - le pointeur vers l'ancien segment (pas possible de le récupérer
714 // via m_reserved_part_span à cause des échanges),
715 // - la taille du nouveau segment (MPI peut allouer plus que la taille
716 // que l'on a demandée).
717 std::byte* ptr_old_seg = nullptr;
718 MPI_Aint mpi_reserved_size_new_seg;
719
720 // Ancien segment.
721 {
722 MPI_Aint size_old_seg;
723 int size_type;
724 // Ici, ptr_seg n'est jamais == nullptr vu que l'on fait toujours un
725 // segment d'une taille d'au moins m_sizeof_type.
726 error = MPI_Win_shared_query(old_win, m_comm_machine_rank, &size_old_seg, &size_type, &ptr_old_seg);
727 if (error != MPI_SUCCESS || ptr_old_seg == nullptr) {
728 MPI_Win_free(&old_win);
729 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
730 }
731 }
732
733 // Nouveau segment.
734 {
735 std::byte* ptr_seg = nullptr;
736 int size_type;
737 // Ici, ptr_seg n'est jamais == nullptr vu que l'on fait toujours un
738 // segment d'une taille d'au moins m_sizeof_type.
739 error = MPI_Win_shared_query(m_all_mpi_win[local_segment_infos_pos], m_comm_machine_rank, &mpi_reserved_size_new_seg, &size_type, &ptr_seg);
740 if (error != MPI_SUCCESS || ptr_seg == nullptr || ptr_seg != ptr_new_seg) {
741 MPI_Win_free(&old_win);
742 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
743 }
744 }
745
746 // Si le realloc est une réduction de la taille du segment (espace
747 // utilisé par l'utilisateur, si resize par exemple), on ne peut pas
748 // copier toutes les anciennes données.
749 const Int64 min_size = std::min(m_need_resize[local_segment_infos_pos], m_sizeof_used_part[local_segment_infos_pos]);
750
751 memcpy(ptr_new_seg, ptr_old_seg, min_size);
752 }
753
754 MPI_Win_free(&old_win);
755 }
756 }
757 MPI_Info_free(&win_info);
758
759 // On reconstruit les spans des segments que l'on possède.
760 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
761 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
762
763 MPI_Aint size_seg;
764 int size_type;
765 std::byte* ptr_seg = nullptr;
766 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
767
768 if (error != MPI_SUCCESS) {
769 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
770 }
771
772 m_reserved_part_span[num_seg] = Span<std::byte>{ ptr_seg, size_seg };
773 }
774}
775
776/*---------------------------------------------------------------------------*/
777/*---------------------------------------------------------------------------*/
778
779Int32 MpiDynamicMultiMachineMemoryWindowBaseInternal::
780_worldToMachine(Int32 world) const
781{
782 for (Int32 i = 0; i < m_comm_machine_size; ++i) {
783 if (m_machine_ranks[i] == world) {
784 return i;
785 }
786 }
787 ARCCORE_FATAL("Rank is not in machine");
788}
789
790/*---------------------------------------------------------------------------*/
791/*---------------------------------------------------------------------------*/
792
793Int32 MpiDynamicMultiMachineMemoryWindowBaseInternal::
794_machineToWorld(Int32 machine) const
795{
796 return m_machine_ranks[machine];
797}
798
799/*---------------------------------------------------------------------------*/
800/*---------------------------------------------------------------------------*/
801
802} // namespace Arcane::MessagePassing::Mpi
803
804/*---------------------------------------------------------------------------*/
805/*---------------------------------------------------------------------------*/
std::int32_t Int32
Type entier signé sur 32 bits.