Arcane  v3.16.8.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
MpiDynamicMultiMachineMemoryWindowBaseInternal.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* MpiDynamicMultiMachineMemoryWindowBaseInternal.h (C) 2000-2025 */
9/* */
10/* Classe permettant de créer des fenêtres mémoires pour un noeud de calcul. */
11/* Les segments de ces fenêtres ne sont pas contigües en mémoire et peuvent */
12/* être redimensionnées. Un processus peut posséder plusieurs segments. */
13/*---------------------------------------------------------------------------*/
14
15#include "arccore/message_passing_mpi/internal/MpiDynamicMultiMachineMemoryWindowBaseInternal.h"
16
17#include "arccore/base/FatalErrorException.h"
18
19/*---------------------------------------------------------------------------*/
20/*---------------------------------------------------------------------------*/
21
22namespace Arcane::MessagePassing::Mpi
23{
24
25/*---------------------------------------------------------------------------*/
26/*---------------------------------------------------------------------------*/
27
30MpiDynamicMultiMachineMemoryWindowBaseInternal(SmallSpan<Int64> sizeof_segments, Int32 nb_segments_per_proc, Int32 sizeof_type, const MPI_Comm& comm_machine, Int32 comm_machine_rank, Int32 comm_machine_size, ConstArrayView<Int32> machine_ranks)
31: m_win_need_resize()
32, m_win_actual_sizeof()
33, m_win_target_segments()
34, m_comm_machine(comm_machine)
35, m_comm_machine_size(comm_machine_size)
36, m_comm_machine_rank(comm_machine_rank)
37, m_sizeof_type(sizeof_type)
38, m_nb_segments_per_proc(nb_segments_per_proc)
39, m_machine_ranks(machine_ranks)
40, m_add_requests(nb_segments_per_proc)
41, m_resize_requests(nb_segments_per_proc)
42{
43 if (m_sizeof_type <= 0) {
44 ARCCORE_FATAL("Invalid sizeof_type");
45 }
46 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
47 if (sizeof_segments[i] < 0 || sizeof_segments[i] % m_sizeof_type != 0) {
48 ARCCORE_FATAL("Invalid initial sizeof_segment");
49 }
50 }
51 if (m_nb_segments_per_proc <= 0) {
52 ARCCORE_FATAL("Invalid nb_segments_per_proc");
53 }
54 m_all_mpi_win.resize(m_comm_machine_size * m_nb_segments_per_proc);
55 m_reserved_part_span.resize(m_nb_segments_per_proc);
56
57 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
58 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
59 m_resize_requests[num_seg] = -1;
60 }
61
62 MPI_Info win_info_true;
63 MPI_Info_create(&win_info_true);
64 MPI_Info_set(win_info_true, "alloc_shared_noncontig", "true");
65
66 MPI_Info win_info_false;
67 MPI_Info_create(&win_info_false);
68 MPI_Info_set(win_info_false, "alloc_shared_noncontig", "false");
69
70 const Int32 pos_my_wins = m_comm_machine_rank * m_nb_segments_per_proc;
71
72 {
73 // On crée tous les segments de tous les processus.
74 for (Integer i = 0; i < m_comm_machine_size; ++i) {
75 for (Integer j = 0; j < m_nb_segments_per_proc; ++j) {
76 Int64 size_seg = 0;
77 if (m_comm_machine_rank == i) {
78 if (sizeof_segments[j] == 0)
79 size_seg = m_sizeof_type;
80 else
81 size_seg = sizeof_segments[j];
82 }
83 std::byte* ptr_seg = nullptr;
84 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info_true, m_comm_machine, &ptr_seg, &m_all_mpi_win[j + i * m_nb_segments_per_proc]);
85
86 if (error != MPI_SUCCESS) {
87 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
88 }
89 }
90 }
91
92
93 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
94 MPI_Aint size_seg;
95 int size_type;
96 std::byte* ptr_seg = nullptr;
97 int error = MPI_Win_shared_query(m_all_mpi_win[i + pos_my_wins], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
98
99 if (error != MPI_SUCCESS) {
100 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
101 }
102
103 // Attention : L'utilisateur demande un nombre minimum d'éléments réservés.
104 // Mais MPI réserve la taille qu'il veut (effet du alloc_shared_noncontig=true).
105 // On est juste sûr que la taille qu'il a réservée est supérieure ou égale à sizeof_segment.
106 m_reserved_part_span[i] = Span<std::byte>{ ptr_seg, size_seg };
107 }
108 }
109
110 {
111 Int64* ptr_seg = nullptr;
112 Int64* ptr_win = nullptr;
113 {
114 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int64)) * m_nb_segments_per_proc, sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_need_resize);
115
116 if (error != MPI_SUCCESS) {
117 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
118 }
119 }
120 {
121 MPI_Aint size_seg;
122 int size_type;
123 int error = MPI_Win_shared_query(m_win_need_resize, 0, &size_seg, &size_type, &ptr_win);
124
125 if (error != MPI_SUCCESS) {
126 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
127 }
128
129 m_need_resize = Span<Int64>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
130
131 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
132 m_need_resize[i + pos_my_wins] = -1;
133 }
134 }
135 if (ptr_win + pos_my_wins != ptr_seg) {
136 ARCCORE_FATAL("m_win_need_resize is noncontig");
137 }
138 }
139
140 {
141 Int64* ptr_seg = nullptr;
142 Int64* ptr_win = nullptr;
143 {
144 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int64)) * m_nb_segments_per_proc, sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_actual_sizeof);
145
146 if (error != MPI_SUCCESS) {
147 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
148 }
149 }
150 {
151 MPI_Aint size_seg;
152 int size_type;
153 int error = MPI_Win_shared_query(m_win_actual_sizeof, 0, &size_seg, &size_type, &ptr_win);
154
155 if (error != MPI_SUCCESS) {
156 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
157 }
158
159 m_sizeof_used_part = Span<Int64>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
160
161 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
162 m_sizeof_used_part[i + pos_my_wins] = sizeof_segments[i];
163 }
164 }
165 if (ptr_win + pos_my_wins != ptr_seg) {
166 ARCCORE_FATAL("m_win_actual_sizeof is noncontig");
167 }
168 }
169
170 {
171 Int32* ptr_seg = nullptr;
172 Int32* ptr_win = nullptr;
173 {
174 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int32)) * m_nb_segments_per_proc, sizeof(Int32), win_info_false, m_comm_machine, &ptr_seg, &m_win_target_segments);
175
176 if (error != MPI_SUCCESS) {
177 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
178 }
179 }
180 {
181 MPI_Aint size_seg;
182 int size_type;
183 int error = MPI_Win_shared_query(m_win_target_segments, 0, &size_seg, &size_type, &ptr_win);
184
185 if (error != MPI_SUCCESS) {
186 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
187 }
188
189 m_target_segments = Span<Int32>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
190
191 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
192 m_target_segments[i + pos_my_wins] = -1;
193 }
194 }
195 if (ptr_win + pos_my_wins != ptr_seg) {
196 ARCCORE_FATAL("m_win_owner_segments is noncontig");
197 }
198 }
199
200 MPI_Info_free(&win_info_false);
201 MPI_Info_free(&win_info_true);
202
203 MPI_Barrier(m_comm_machine);
204}
205
206/*---------------------------------------------------------------------------*/
207/*---------------------------------------------------------------------------*/
208
209MpiDynamicMultiMachineMemoryWindowBaseInternal::
210~MpiDynamicMultiMachineMemoryWindowBaseInternal()
211{
212 for (Integer i = 0; i < m_comm_machine_size * m_nb_segments_per_proc; ++i) {
213 MPI_Win_free(&m_all_mpi_win[i]);
214 }
215 MPI_Win_free(&m_win_need_resize);
216 MPI_Win_free(&m_win_actual_sizeof);
217 MPI_Win_free(&m_win_target_segments);
218}
219
220/*---------------------------------------------------------------------------*/
221/*---------------------------------------------------------------------------*/
222
224sizeofOneElem() const
225{
226 return m_sizeof_type;
227}
228
229/*---------------------------------------------------------------------------*/
230/*---------------------------------------------------------------------------*/
231
237
238/*---------------------------------------------------------------------------*/
239/*---------------------------------------------------------------------------*/
240
242barrier() const
243{
244 MPI_Barrier(m_comm_machine);
245}
246
247/*---------------------------------------------------------------------------*/
248/*---------------------------------------------------------------------------*/
249
251segmentView(Int32 num_seg)
252{
253 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
254 return m_reserved_part_span[num_seg].subSpan(0, m_sizeof_used_part[segment_infos_pos]);
255}
256
257/*---------------------------------------------------------------------------*/
258/*---------------------------------------------------------------------------*/
259
261segmentView(Int32 rank, Int32 num_seg)
262{
263 const Int32 segment_infos_pos = num_seg + _worldToMachine(rank) * m_nb_segments_per_proc;
264
265 MPI_Aint size_seg;
266 int size_type;
267 std::byte* ptr_seg = nullptr;
268 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], rank, &size_seg, &size_type, &ptr_seg);
269
270 if (error != MPI_SUCCESS) {
271 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
272 }
273
274 return Span<std::byte>{ ptr_seg, m_sizeof_used_part[segment_infos_pos] };
275}
276
277/*---------------------------------------------------------------------------*/
278/*---------------------------------------------------------------------------*/
279
281segmentConstView(Int32 num_seg) const
282{
283 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
284 return m_reserved_part_span[num_seg].subSpan(0, m_sizeof_used_part[segment_infos_pos]);
285}
286
287/*---------------------------------------------------------------------------*/
288/*---------------------------------------------------------------------------*/
289
291segmentConstView(Int32 rank, Int32 num_seg) const
292{
293 const Int32 segment_infos_pos = num_seg + _worldToMachine(rank) * m_nb_segments_per_proc;
294
295 MPI_Aint size_seg;
296 int size_type;
297 std::byte* ptr_seg = nullptr;
298 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], rank, &size_seg, &size_type, &ptr_seg);
299
300 if (error != MPI_SUCCESS) {
301 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
302 }
303
304 return Span<const std::byte>{ ptr_seg, m_sizeof_used_part[segment_infos_pos] };
305}
306
307/*---------------------------------------------------------------------------*/
308/*---------------------------------------------------------------------------*/
309
312{
313 if (elem.size() % m_sizeof_type) {
314 ARCCORE_FATAL("Sizeof elem not valid");
315 }
316 if (elem.empty() || elem.data() == nullptr) {
317 return;
318 }
319
320 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
321
322 const Int64 actual_sizeof_win = m_sizeof_used_part[segment_infos_pos];
323 const Int64 future_sizeof_win = actual_sizeof_win + elem.size();
324 const Int64 old_reserved = m_reserved_part_span[num_seg].size();
325
326 if (future_sizeof_win > old_reserved) {
327 _requestRealloc(segment_infos_pos, future_sizeof_win);
328 }
329 else {
330 _requestRealloc(segment_infos_pos);
331 }
332
333 m_add_requests[num_seg] = elem;
334 m_add_requested = true; // TODO Atomic ?
335}
336
337/*---------------------------------------------------------------------------*/
338/*---------------------------------------------------------------------------*/
339
342{
343 _executeRealloc();
344
345 if (!m_add_requested) {
346 return;
347 }
348 m_add_requested = false;
349
350 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
351 if (m_add_requests[num_seg].empty() || m_add_requests[num_seg].data() == nullptr) {
352 continue;
353 }
354
355 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
356
357 const Int64 actual_sizeof_win = m_sizeof_used_part[segment_infos_pos];
358 const Int64 future_sizeof_win = actual_sizeof_win + m_add_requests[num_seg].size();
359
360 if (m_reserved_part_span[num_seg].size() < future_sizeof_win) {
361 ARCCORE_FATAL("Bad realloc -- New size : {1} -- Needed size : {2}", m_reserved_part_span[num_seg].size(), future_sizeof_win);
362 }
363
364 for (Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
365 m_reserved_part_span[num_seg][pos_win] = m_add_requests[num_seg][pos_elem];
366 }
367 m_sizeof_used_part[segment_infos_pos] = future_sizeof_win;
368
369 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
370 }
371}
372
373/*---------------------------------------------------------------------------*/
374/*---------------------------------------------------------------------------*/
375
378{
379 if (elem.size() % m_sizeof_type) {
380 ARCCORE_FATAL("Sizeof elem not valid");
381 }
382 if (elem.empty() || elem.data() == nullptr) {
383 return;
384 }
385
386 const Int32 machine_rank = _worldToMachine(rank);
387 const Int32 target_segment_infos_pos = num_seg + machine_rank * m_nb_segments_per_proc;
388
389 {
390 const Int32 segment_infos_pos = thread + m_comm_machine_rank * m_nb_segments_per_proc;
391 m_target_segments[segment_infos_pos] = target_segment_infos_pos;
392 }
393
394 Span<std::byte> rank_reserved_part_span;
395 {
396 MPI_Aint size_seg;
397 std::byte* ptr_seg = nullptr;
398 int size_type;
399 int error = MPI_Win_shared_query(m_all_mpi_win[target_segment_infos_pos], machine_rank, &size_seg, &size_type, &ptr_seg);
400
401 if (error != MPI_SUCCESS) {
402 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
403 }
404 rank_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
405 }
406
407 const Int64 actual_sizeof_win = m_sizeof_used_part[target_segment_infos_pos];
408 const Int64 future_sizeof_win = actual_sizeof_win + elem.size();
409 const Int64 old_reserved = rank_reserved_part_span.size();
410
411 if (future_sizeof_win > old_reserved) {
412 _requestRealloc(target_segment_infos_pos, future_sizeof_win);
413 }
414 else {
415 _requestRealloc(target_segment_infos_pos);
416 }
417
418 m_add_requests[thread] = elem;
419 m_add_requested = true; // TODO Atomic ?
420}
421
422/*---------------------------------------------------------------------------*/
423/*---------------------------------------------------------------------------*/
424
427{
428 MPI_Barrier(m_comm_machine);
429
430 auto is_my_seg_edited = std::make_unique<bool[]>(m_comm_machine_size);
431 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
432 for (const Int32 rank_asked : m_target_segments) {
433 if (rank_asked == m_comm_machine_rank) {
434 is_my_seg_edited[num_seg] = true;
435 break;
436 }
437 }
438 }
439
440 if (!m_add_requested) {
441 _executeRealloc();
442 }
443
444 else {
445 m_add_requested = false;
446 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
447 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
448 const Int32 seg_needs_to_edit = m_target_segments[segment_infos_pos];
449 if (seg_needs_to_edit == -1)
450 continue;
451
452 bool is_found = false;
453 for (const Int32 rank_asked : m_target_segments) {
454 if (rank_asked == seg_needs_to_edit) {
455 if (!is_found) {
456 is_found = true;
457 }
458 else {
459 ARCCORE_FATAL("Two subdomains ask same rank for addToAnotherSegment()");
460 }
461 }
462 }
463 }
464
465 _executeRealloc();
466
467 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
468 if (m_add_requests[num_seg].empty() || m_add_requests[num_seg].data() == nullptr) {
469 continue;
470 }
471
472 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
473 const Int32 target_segment_infos_pos = m_target_segments[segment_infos_pos];
474 if (target_segment_infos_pos == -1) {
475 ARCCORE_FATAL("Ne devrait pas aller ici");
476 }
477
478 const Int64 actual_sizeof_win = m_sizeof_used_part[target_segment_infos_pos];
479 const Int64 future_sizeof_win = actual_sizeof_win + m_add_requests[num_seg].size();
480
481 Span<std::byte> rank_reserved_part_span;
482 {
483 MPI_Aint size_seg;
484 std::byte* ptr_seg = nullptr;
485 int size_type;
486 int error = MPI_Win_shared_query(m_all_mpi_win[target_segment_infos_pos], target_segment_infos_pos / m_nb_segments_per_proc, &size_seg, &size_type, &ptr_seg);
487
488 if (error != MPI_SUCCESS) {
489 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
490 }
491 rank_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
492 }
493
494 if (rank_reserved_part_span.size() < future_sizeof_win) {
495 ARCCORE_FATAL("Bad realloc -- New size : {1} -- Needed size : {2}", rank_reserved_part_span.size(), future_sizeof_win);
496 }
497
498 for (Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
499 rank_reserved_part_span[pos_win] = m_add_requests[num_seg][pos_elem];
500 }
501 m_sizeof_used_part[target_segment_infos_pos] = future_sizeof_win;
502
503 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
504 m_target_segments[segment_infos_pos] = -1;
505 }
506 }
507 MPI_Barrier(m_comm_machine);
508
509 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
510 if (is_my_seg_edited[num_seg]) {
511 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
512
513 MPI_Aint size_seg;
514 std::byte* ptr_seg = nullptr;
515 int size_type;
516 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
517
518 if (error != MPI_SUCCESS) {
519 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
520 }
521 m_reserved_part_span[num_seg] = Span<std::byte>{ ptr_seg, size_seg };
522 }
523 }
524}
525
526/*---------------------------------------------------------------------------*/
527/*---------------------------------------------------------------------------*/
528
530requestReserve(Int32 num_seg, Int64 new_capacity)
531{
532 if (new_capacity % m_sizeof_type) {
533 ARCCORE_FATAL("new_capacity not valid");
534 }
535
536 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
537
538 if (new_capacity <= m_reserved_part_span[num_seg].size()) {
539 _requestRealloc(segment_infos_pos);
540 return;
541 }
542 _requestRealloc(segment_infos_pos, new_capacity);
543}
544
545/*---------------------------------------------------------------------------*/
546/*---------------------------------------------------------------------------*/
547
550{
551 _executeRealloc();
552}
553
554/*---------------------------------------------------------------------------*/
555/*---------------------------------------------------------------------------*/
556
558requestResize(Int32 num_seg, Int64 new_size)
559{
560 if (new_size == -1) {
561 return;
562 }
563 if (new_size < 0 || new_size % m_sizeof_type) {
564 ARCCORE_FATAL("new_size not valid");
565 }
566
567 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
568
569 if (new_size > m_reserved_part_span[num_seg].size()) {
570 _requestRealloc(segment_infos_pos, new_size);
571 }
572 else {
573 _requestRealloc(segment_infos_pos);
574 }
575
576 m_resize_requests[num_seg] = new_size;
577 m_resize_requested = true; // TODO Atomic ?
578}
579
580/*---------------------------------------------------------------------------*/
581/*---------------------------------------------------------------------------*/
582
585{
586 _executeRealloc();
587
588 if (!m_resize_requested) {
589 return;
590 }
591 m_resize_requested = false;
592
593 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
594 if (m_resize_requests[num_seg] == -1) {
595 continue;
596 }
597
598 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
599
600 if (m_reserved_part_span[num_seg].size() < m_resize_requests[num_seg]) {
601 ARCCORE_FATAL("Bad realloc -- New size : {0} -- Needed size : {1}", m_reserved_part_span[num_seg].size(), m_resize_requests[num_seg]);
602 }
603
604 m_sizeof_used_part[segment_infos_pos] = m_resize_requests[num_seg];
605 m_resize_requests[num_seg] = -1;
606 }
607}
608
609/*---------------------------------------------------------------------------*/
610/*---------------------------------------------------------------------------*/
611
614{
615 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
616 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
617
618 if (m_reserved_part_span[num_seg].size() == m_sizeof_used_part[segment_infos_pos]) {
619 _requestRealloc(segment_infos_pos);
620 }
621 else {
622 _requestRealloc(segment_infos_pos, m_sizeof_used_part[segment_infos_pos]);
623 }
624 }
625 _executeRealloc();
626}
627
628/*---------------------------------------------------------------------------*/
629/*---------------------------------------------------------------------------*/
630
631void MpiDynamicMultiMachineMemoryWindowBaseInternal::
632_requestRealloc(Int32 owner_pos_segment, Int64 new_capacity) const
633{
634 m_need_resize[owner_pos_segment] = new_capacity;
635}
636
637/*---------------------------------------------------------------------------*/
638/*---------------------------------------------------------------------------*/
639
640void MpiDynamicMultiMachineMemoryWindowBaseInternal::
641_requestRealloc(Int32 owner_pos_segment) const
642{
643 m_need_resize[owner_pos_segment] = -1;
644}
645
646/*---------------------------------------------------------------------------*/
647/*---------------------------------------------------------------------------*/
648
649void MpiDynamicMultiMachineMemoryWindowBaseInternal::
650_executeRealloc()
651{
652 // Barrière importante car tout le monde doit savoir que l'on doit
653 // redimensionner un des segments que nous possédons.
654 MPI_Barrier(m_comm_machine);
655
656 // Pas besoin de barrière car MPI_Win_allocate_shared() de _realloc() est
657 // bloquant.
658 _realloc();
659
660 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
661 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
662 m_need_resize[segment_infos_pos] = -1;
663 }
664
665 // Barrière importante dans le cas où un MPI_Win_shared_query() de
666 // _reallocCollective() durerait trop longtemps (un autre processus pourrait
667 // rappeler cette méthode et remettre m_need_resize[m_owner_segment] à
668 // true => deadlock dans _reallocCollective() sur MPI_Win_allocate_shared()
669 // à cause du continue).
670 MPI_Barrier(m_comm_machine);
671}
672
673/*---------------------------------------------------------------------------*/
674/*---------------------------------------------------------------------------*/
675
676void MpiDynamicMultiMachineMemoryWindowBaseInternal::
677_realloc()
678{
679 MPI_Info win_info;
680 MPI_Info_create(&win_info);
681 MPI_Info_set(win_info, "alloc_shared_noncontig", "true");
682
683 // Chacun réalloc ses segments, si demandé.
684 for (Integer rank = 0; rank < m_comm_machine_size; ++rank) {
685 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
686
687 const Int32 local_segment_infos_pos = num_seg + rank * m_nb_segments_per_proc;
688
689 if (m_need_resize[local_segment_infos_pos] == -1)
690 continue;
691
692 ARCCORE_ASSERT(m_need_resize[local_segment_infos_pos] >= 0, ("New size must be >= 0"));
693 ARCCORE_ASSERT(m_need_resize[local_segment_infos_pos] % m_sizeof_type == 0, ("New size must be % sizeof type"));
694
695 // Si on doit realloc notre segment, on alloue au moins une taille de m_sizeof_type.
696 // Si ce n'est pas notre segment, taille 0 pour que MPI n'alloue rien.
697 const Int64 size_seg = (m_comm_machine_rank == rank ? (m_need_resize[local_segment_infos_pos] == 0 ? m_sizeof_type : m_need_resize[local_segment_infos_pos]) : 0);
698
699 // On sauvegarde l'ancien segment pour déplacer les données.
700 MPI_Win old_win = m_all_mpi_win[local_segment_infos_pos];
701 std::byte* ptr_new_seg = nullptr;
702
703 // Si size_seg == 0 alors ptr_seg == nullptr.
704 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info, m_comm_machine, &ptr_new_seg, &m_all_mpi_win[local_segment_infos_pos]);
705 if (error != MPI_SUCCESS) {
706 MPI_Win_free(&old_win);
707 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
708 }
709
710 // Il n'y a que si c'est notre segment que l'on déplace les données.
711 if (m_comm_machine_rank == rank) {
712 // On a besoin de deux infos supplémentaires :
713 // - le pointeur vers l'ancien segment (pas possible de le récupérer
714 // via m_reserved_part_span à cause des échanges),
715 // - la taille du nouveau segment (MPI peut allouer plus que la taille
716 // que l'on a demandée).
717 std::byte* ptr_old_seg = nullptr;
718 MPI_Aint mpi_reserved_size_new_seg;
719
720 // Ancien segment.
721 {
722 MPI_Aint size_old_seg;
723 int size_type;
724 // Ici, ptr_seg n'est jamais == nullptr vu que l'on fait toujours un
725 // segment d'une taille d'au moins m_sizeof_type.
726 error = MPI_Win_shared_query(old_win, m_comm_machine_rank, &size_old_seg, &size_type, &ptr_old_seg);
727 if (error != MPI_SUCCESS || ptr_old_seg == nullptr) {
728 MPI_Win_free(&old_win);
729 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
730 }
731 }
732
733 // Nouveau segment.
734 {
735 std::byte* ptr_seg = nullptr;
736 int size_type;
737 // Ici, ptr_seg n'est jamais == nullptr vu que l'on fait toujours un
738 // segment d'une taille d'au moins m_sizeof_type.
739 error = MPI_Win_shared_query(m_all_mpi_win[local_segment_infos_pos], m_comm_machine_rank, &mpi_reserved_size_new_seg, &size_type, &ptr_seg);
740 if (error != MPI_SUCCESS || ptr_seg == nullptr || ptr_seg != ptr_new_seg) {
741 MPI_Win_free(&old_win);
742 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
743 }
744 }
745
746 // Si le realloc est une réduction de la taille du segment (espace
747 // utilisé par l'utilisateur, si resize par exemple), on ne peut pas
748 // copier toutes les anciennes données.
749 const Int64 min_size = std::min(m_need_resize[local_segment_infos_pos], m_sizeof_used_part[local_segment_infos_pos]);
750
751 memcpy(ptr_new_seg, ptr_old_seg, min_size);
752 }
753
754 MPI_Win_free(&old_win);
755 }
756 }
757 MPI_Info_free(&win_info);
758
759 // On reconstruit les spans des segments que l'on possède.
760 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
761 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
762
763 MPI_Aint size_seg;
764 int size_type;
765 std::byte* ptr_seg = nullptr;
766 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
767
768 if (error != MPI_SUCCESS) {
769 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
770 }
771
772 m_reserved_part_span[num_seg] = Span<std::byte>{ ptr_seg, size_seg };
773 }
774}
775
776/*---------------------------------------------------------------------------*/
777/*---------------------------------------------------------------------------*/
778
779Int32 MpiDynamicMultiMachineMemoryWindowBaseInternal::
780_worldToMachine(Int32 world) const
781{
782 for (Int32 i = 0; i < m_comm_machine_size; ++i) {
783 if (m_machine_ranks[i] == world) {
784 return i;
785 }
786 }
787 ARCCORE_FATAL("Rank is not in machine");
788}
789
790/*---------------------------------------------------------------------------*/
791/*---------------------------------------------------------------------------*/
792
793Int32 MpiDynamicMultiMachineMemoryWindowBaseInternal::
794_machineToWorld(Int32 machine) const
795{
796 return m_machine_ranks[machine];
797}
798
799/*---------------------------------------------------------------------------*/
800/*---------------------------------------------------------------------------*/
801
802} // namespace Arcane::MessagePassing::Mpi
803
804/*---------------------------------------------------------------------------*/
805/*---------------------------------------------------------------------------*/
Vue constante d'un tableau de type T.
void executeResize()
Méthode permettant d'exécuter les requêtes de redimensionnement.
Span< const std::byte > segmentConstView(Int32 num_seg) const
Méthode permettant d'obtenir une vue sur l'un de nos segments.
Int32 sizeofOneElem() const
Méthode permettant d'obtenir la taille d'un élement de la fenêtre.
void requestReserve(Int32 num_seg, Int64 new_capacity)
Méthode permettant de demander la réservation d'espace mémoire pour un de nos segments.
ConstArrayView< Int32 > machineRanks() const
Méthode permettant d'obtenir les rangs qui possèdent un segment dans la fenêtre.
void requestResize(Int32 num_seg, Int64 new_size)
Méthode permettant de demander le redimensionnement d'un de nos segments.
void requestAdd(Int32 num_seg, Span< const std::byte > elem)
Méthode permettant de demander l'ajout d'éléments dans l'un de nos segments.
void executeShrink()
Méthode permettant de réduire l'espace mémoire réservé pour les segments au minimum nécessaire.
void executeAddToAnotherSegment()
Méthode permettant d'exécuter les requêtes d'ajout dans les segments d'autres processus.
void requestAddToAnotherSegment(Int32 thread, Int32 rank, Int32 num_seg, Span< const std::byte > elem)
Méthode permettant de demander l'ajout d'éléments dans un des segments de la fenêtre.
MpiDynamicMultiMachineMemoryWindowBaseInternal(SmallSpan< Int64 > sizeof_segments, Int32 nb_segments_per_proc, Int32 sizeof_type, const MPI_Comm &comm_machine, Int32 comm_machine_rank, Int32 comm_machine_size, ConstArrayView< Int32 > machine_ranks)
Le sizeof_segments ne doit pas être conservé !
Span< std::byte > segmentView(Int32 num_seg)
Méthode permettant d'obtenir une vue sur l'un de nos segments.
void barrier() const
Méthode permettant d'attendre que tous les processus du noeud appellent cette méthode pour continuer ...
Vue d'un tableau d'éléments de type T.
Definition Span.h:673
constexpr __host__ __device__ bool empty() const noexcept
Retourne true si le tableau est vide (dimension nulle)
Definition Span.h:375
constexpr __host__ __device__ SizeType size() const noexcept
Retourne la taille du tableau.
Definition Span.h:212
constexpr __host__ __device__ pointer data() const noexcept
Pointeur sur le début de la vue.
Definition Span.h:422
Vue d'un tableau d'éléments de type T.
Definition Span.h:513
std::int64_t Int64
Type entier signé sur 64 bits.
Int32 Integer
Type représentant un entier.
std::int32_t Int32
Type entier signé sur 32 bits.