Arcane  v4.1.7.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
MpiMultiMachineShMemWinBaseInternal.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* MpiMultiMachineShMemWinBaseInternal.h (C) 2000-2026 */
9/* */
10/* Classe permettant de créer des fenêtres mémoires pour un noeud de calcul. */
11/* Les segments de ces fenêtres ne sont pas contigües en mémoire et peuvent */
12/* être redimensionnées. Un processus peut posséder plusieurs segments. */
13/*---------------------------------------------------------------------------*/
14
15#include "arccore/message_passing_mpi/internal/MpiMultiMachineShMemWinBaseInternal.h"
16
17#include "arccore/base/FatalErrorException.h"
18
19/*---------------------------------------------------------------------------*/
20/*---------------------------------------------------------------------------*/
21
22namespace Arcane::MessagePassing::Mpi
23{
24
25/*---------------------------------------------------------------------------*/
26/*---------------------------------------------------------------------------*/
27
30MpiMultiMachineShMemWinBaseInternal(SmallSpan<Int64> sizeof_segments, Int32 nb_segments_per_proc, Int32 sizeof_type, const MPI_Comm& comm_machine, Int32 comm_machine_rank, Int32 comm_machine_size, ConstArrayView<Int32> machine_ranks)
34, m_comm_machine(comm_machine)
35, m_comm_machine_size(comm_machine_size)
36, m_comm_machine_rank(comm_machine_rank)
37, m_sizeof_type(sizeof_type)
38, m_nb_segments_per_proc(nb_segments_per_proc)
39, m_machine_ranks(machine_ranks)
40, m_add_requests(nb_segments_per_proc)
41, m_resize_requests(nb_segments_per_proc)
42{
43 if (m_sizeof_type <= 0) {
44 ARCCORE_FATAL("Invalid sizeof_type");
45 }
46 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
47 if (sizeof_segments[i] < 0 || sizeof_segments[i] % m_sizeof_type != 0) {
48 ARCCORE_FATAL("Invalid initial sizeof_segment");
49 }
50 }
51 if (m_nb_segments_per_proc <= 0) {
52 ARCCORE_FATAL("Invalid nb_segments_per_proc");
53 }
54 m_all_mpi_win.resize(m_comm_machine_size * m_nb_segments_per_proc);
55 m_reserved_part_span.resize(m_nb_segments_per_proc);
56
57 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
58 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
59 m_resize_requests[num_seg] = -1;
60 }
61
62 MPI_Info win_info_true;
63 MPI_Info_create(&win_info_true);
64 MPI_Info_set(win_info_true, "alloc_shared_noncontig", "true");
65
66 MPI_Info win_info_false;
67 MPI_Info_create(&win_info_false);
68 MPI_Info_set(win_info_false, "alloc_shared_noncontig", "false");
69
70 const Int32 pos_my_wins = m_comm_machine_rank * m_nb_segments_per_proc;
71
72 {
73 // On crée tous les segments de tous les processus.
74 for (Integer i = 0; i < m_comm_machine_size; ++i) {
75 for (Integer j = 0; j < m_nb_segments_per_proc; ++j) {
76 Int64 size_seg = 0;
77 if (m_comm_machine_rank == i) {
78 if (sizeof_segments[j] == 0)
79 size_seg = m_sizeof_type;
80 else
81 size_seg = sizeof_segments[j];
82 }
83 std::byte* ptr_seg = nullptr;
84 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info_true, m_comm_machine, &ptr_seg, &m_all_mpi_win[j + i * m_nb_segments_per_proc]);
85
86 if (error != MPI_SUCCESS) {
87 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
88 }
89 }
90 }
91
92 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
93 MPI_Aint size_seg;
94 int size_type;
95 std::byte* ptr_seg = nullptr;
96 int error = MPI_Win_shared_query(m_all_mpi_win[i + pos_my_wins], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
97
98 if (error != MPI_SUCCESS) {
99 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
100 }
101
102 // Attention : L'utilisateur demande un nombre minimum d'éléments réservés.
103 // Mais MPI réserve la taille qu'il veut (effet du alloc_shared_noncontig=true).
104 // On est juste sûr que la taille qu'il a réservée est supérieure ou égale à sizeof_segment.
105 m_reserved_part_span[i] = Span<std::byte>{ ptr_seg, size_seg };
106 }
107 }
108
109 {
110 Int64* ptr_seg = nullptr;
111 Int64* ptr_win = nullptr;
112 {
113 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int64)) * m_nb_segments_per_proc, sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_need_resize);
114
115 if (error != MPI_SUCCESS) {
116 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
117 }
118 }
119 {
120 MPI_Aint size_seg;
121 int size_type;
122 int error = MPI_Win_shared_query(m_win_need_resize, 0, &size_seg, &size_type, &ptr_win);
123
124 if (error != MPI_SUCCESS) {
125 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
126 }
127
128 m_need_resize = Span<Int64>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
129
130 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
131 m_need_resize[i + pos_my_wins] = -1;
132 }
133 }
134 if (ptr_win + pos_my_wins != ptr_seg) {
135 ARCCORE_FATAL("m_win_need_resize is noncontig");
136 }
137 }
138
139 {
140 Int64* ptr_seg = nullptr;
141 Int64* ptr_win = nullptr;
142 {
143 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int64)) * m_nb_segments_per_proc, sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_actual_sizeof);
144
145 if (error != MPI_SUCCESS) {
146 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
147 }
148 }
149 {
150 MPI_Aint size_seg;
151 int size_type;
152 int error = MPI_Win_shared_query(m_win_actual_sizeof, 0, &size_seg, &size_type, &ptr_win);
153
154 if (error != MPI_SUCCESS) {
155 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
156 }
157
158 m_sizeof_used_part = Span<Int64>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
159
160 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
161 m_sizeof_used_part[i + pos_my_wins] = sizeof_segments[i];
162 }
163 }
164 if (ptr_win + pos_my_wins != ptr_seg) {
165 ARCCORE_FATAL("m_win_actual_sizeof is noncontig");
166 }
167 }
168
169 {
170 Int32* ptr_seg = nullptr;
171 Int32* ptr_win = nullptr;
172 {
173 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int32)) * m_nb_segments_per_proc, sizeof(Int32), win_info_false, m_comm_machine, &ptr_seg, &m_win_target_segments);
174
175 if (error != MPI_SUCCESS) {
176 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
177 }
178 }
179 {
180 MPI_Aint size_seg;
181 int size_type;
182 int error = MPI_Win_shared_query(m_win_target_segments, 0, &size_seg, &size_type, &ptr_win);
183
184 if (error != MPI_SUCCESS) {
185 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
186 }
187
188 m_target_segments = Span<Int32>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
189
190 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
191 m_target_segments[i + pos_my_wins] = -1;
192 }
193 }
194 if (ptr_win + pos_my_wins != ptr_seg) {
195 ARCCORE_FATAL("m_win_owner_segments is noncontig");
196 }
197 }
198
199 MPI_Info_free(&win_info_false);
200 MPI_Info_free(&win_info_true);
201
202 MPI_Barrier(m_comm_machine);
203}
204
205/*---------------------------------------------------------------------------*/
206/*---------------------------------------------------------------------------*/
207
208MpiMultiMachineShMemWinBaseInternal::
209~MpiMultiMachineShMemWinBaseInternal()
210{
211 for (Integer i = 0; i < m_comm_machine_size * m_nb_segments_per_proc; ++i) {
212 MPI_Win_free(&m_all_mpi_win[i]);
213 }
214 MPI_Win_free(&m_win_need_resize);
215 MPI_Win_free(&m_win_actual_sizeof);
216 MPI_Win_free(&m_win_target_segments);
217}
218
219/*---------------------------------------------------------------------------*/
220/*---------------------------------------------------------------------------*/
221
223sizeofOneElem() const
224{
225 return m_sizeof_type;
226}
227
228/*---------------------------------------------------------------------------*/
229/*---------------------------------------------------------------------------*/
230
232machineRanks() const
233{
234 return m_machine_ranks;
235}
236
237/*---------------------------------------------------------------------------*/
238/*---------------------------------------------------------------------------*/
239
241barrier() const
242{
243 MPI_Barrier(m_comm_machine);
244}
245
246/*---------------------------------------------------------------------------*/
247/*---------------------------------------------------------------------------*/
248
250segmentView(Int32 num_seg)
251{
252 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
253 return m_reserved_part_span[num_seg].subSpan(0, m_sizeof_used_part[segment_infos_pos]);
254}
255
256/*---------------------------------------------------------------------------*/
257/*---------------------------------------------------------------------------*/
258
260segmentView(Int32 rank, Int32 num_seg)
261{
262 const Int32 segment_infos_pos = num_seg + _worldToMachine(rank) * m_nb_segments_per_proc;
263
264 MPI_Aint size_seg;
265 int size_type;
266 std::byte* ptr_seg = nullptr;
267 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], rank, &size_seg, &size_type, &ptr_seg);
268
269 if (error != MPI_SUCCESS) {
270 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
271 }
272
273 return Span<std::byte>{ ptr_seg, m_sizeof_used_part[segment_infos_pos] };
274}
275
276/*---------------------------------------------------------------------------*/
277/*---------------------------------------------------------------------------*/
278
280segmentConstView(Int32 num_seg) const
281{
282 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
283 return m_reserved_part_span[num_seg].subSpan(0, m_sizeof_used_part[segment_infos_pos]);
284}
285
286/*---------------------------------------------------------------------------*/
287/*---------------------------------------------------------------------------*/
288
290segmentConstView(Int32 rank, Int32 num_seg) const
291{
292 const Int32 segment_infos_pos = num_seg + _worldToMachine(rank) * m_nb_segments_per_proc;
293
294 MPI_Aint size_seg;
295 int size_type;
296 std::byte* ptr_seg = nullptr;
297 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], rank, &size_seg, &size_type, &ptr_seg);
298
299 if (error != MPI_SUCCESS) {
300 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
301 }
302
303 return Span<const std::byte>{ ptr_seg, m_sizeof_used_part[segment_infos_pos] };
304}
305
306/*---------------------------------------------------------------------------*/
307/*---------------------------------------------------------------------------*/
308
311{
312 if (elem.size() % m_sizeof_type) {
313 ARCCORE_FATAL("Sizeof elem not valid");
314 }
315 if (elem.empty() || elem.data() == nullptr) {
316 return;
317 }
318
319 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
320
321 const Int64 actual_sizeof_win = m_sizeof_used_part[segment_infos_pos];
322 const Int64 future_sizeof_win = actual_sizeof_win + elem.size();
323 const Int64 old_reserved = m_reserved_part_span[num_seg].size();
324
325 if (future_sizeof_win > old_reserved) {
326 _requestRealloc(segment_infos_pos, future_sizeof_win);
327 }
328 else {
329 _requestRealloc(segment_infos_pos);
330 }
331
332 m_add_requests[num_seg] = elem;
333 m_add_requested = true; // TODO Atomic ?
334}
335
336/*---------------------------------------------------------------------------*/
337/*---------------------------------------------------------------------------*/
338
341{
342 _executeRealloc();
343
344 if (!m_add_requested) {
345 return;
346 }
347 m_add_requested = false;
348
349 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
350 if (m_add_requests[num_seg].empty() || m_add_requests[num_seg].data() == nullptr) {
351 continue;
352 }
353
354 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
355
356 const Int64 actual_sizeof_win = m_sizeof_used_part[segment_infos_pos];
357 const Int64 future_sizeof_win = actual_sizeof_win + m_add_requests[num_seg].size();
358
359 if (m_reserved_part_span[num_seg].size() < future_sizeof_win) {
360 ARCCORE_FATAL("Bad realloc -- New size : {1} -- Needed size : {2}", m_reserved_part_span[num_seg].size(), future_sizeof_win);
361 }
362
363 for (Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
364 m_reserved_part_span[num_seg][pos_win] = m_add_requests[num_seg][pos_elem];
365 }
366 m_sizeof_used_part[segment_infos_pos] = future_sizeof_win;
367
368 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
369 }
370}
371
372/*---------------------------------------------------------------------------*/
373/*---------------------------------------------------------------------------*/
374
377{
378 if (elem.size() % m_sizeof_type) {
379 ARCCORE_FATAL("Sizeof elem not valid");
380 }
381 if (elem.empty() || elem.data() == nullptr) {
382 return;
383 }
384
385 const Int32 machine_rank = _worldToMachine(rank);
386 const Int32 target_segment_infos_pos = num_seg + machine_rank * m_nb_segments_per_proc;
387
388 {
389 const Int32 segment_infos_pos = thread + m_comm_machine_rank * m_nb_segments_per_proc;
390 m_target_segments[segment_infos_pos] = target_segment_infos_pos;
391 }
392
393 Span<std::byte> rank_reserved_part_span;
394 {
395 MPI_Aint size_seg;
396 std::byte* ptr_seg = nullptr;
397 int size_type;
398 int error = MPI_Win_shared_query(m_all_mpi_win[target_segment_infos_pos], machine_rank, &size_seg, &size_type, &ptr_seg);
399
400 if (error != MPI_SUCCESS) {
401 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
402 }
403 rank_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
404 }
405
406 const Int64 actual_sizeof_win = m_sizeof_used_part[target_segment_infos_pos];
407 const Int64 future_sizeof_win = actual_sizeof_win + elem.size();
408 const Int64 old_reserved = rank_reserved_part_span.size();
409
410 if (future_sizeof_win > old_reserved) {
411 _requestRealloc(target_segment_infos_pos, future_sizeof_win);
412 }
413 else {
414 _requestRealloc(target_segment_infos_pos);
415 }
416
417 m_add_requests[thread] = elem;
418 m_add_requested = true; // TODO Atomic ?
419}
420
421/*---------------------------------------------------------------------------*/
422/*---------------------------------------------------------------------------*/
423
426{
427 MPI_Barrier(m_comm_machine);
428
429 // Pour chaque segment appartenant à mon processus, je regarde si quelqu'un
430 // veut le modifier.
431 // is_my_seg_edited sera utilisé à la fin de la méthode.
432 auto is_my_seg_edited = std::make_unique<bool[]>(m_comm_machine_size);
433 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
434 for (const Int32 rank_asked : m_target_segments) {
435 if (rank_asked == m_comm_machine_rank) {
436 is_my_seg_edited[num_seg] = true;
437 break;
438 }
439 }
440 }
441
442 // Si je n'ai pas demandé de modification de segment, j'effectue uniquement
443 // les réallocations si besoin (d'autres processus peuvent me demander des
444 // réallocations).
445 if (!m_add_requested) {
446 _executeRealloc();
447 }
448
449 else {
450 m_add_requested = false;
451
452 // Un segment ne peut être modifié que par un seul thread à la fois. On vérifie cela ici :
453 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
454 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
455 const Int32 seg_needs_to_edit = m_target_segments[segment_infos_pos];
456 if (seg_needs_to_edit == -1)
457 continue;
458
459 bool is_found = false;
460 for (const Int32 rank_asked : m_target_segments) {
461 if (rank_asked == seg_needs_to_edit) {
462 if (!is_found) {
463 is_found = true;
464 }
465 else {
466 ARCCORE_FATAL("Two subdomains ask same rank for addToAnotherSegment()");
467 }
468 }
469 }
470 }
471
472 _executeRealloc();
473
474 // On ajoute les éléments dans les segments.
475 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
476 if (m_add_requests[num_seg].empty() || m_add_requests[num_seg].data() == nullptr) {
477 continue;
478 }
479
480 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
481 const Int32 target_segment_infos_pos = m_target_segments[segment_infos_pos];
482 if (target_segment_infos_pos == -1) {
483 ARCCORE_FATAL("Ne devrait pas aller ici");
484 }
485
486 const Int64 actual_sizeof_win = m_sizeof_used_part[target_segment_infos_pos];
487 const Int64 future_sizeof_win = actual_sizeof_win + m_add_requests[num_seg].size();
488
489 // On récupère le segment à modifier.
490 Span<std::byte> rank_reserved_part_span;
491 {
492 MPI_Aint size_seg;
493 std::byte* ptr_seg = nullptr;
494 int size_type;
495 int error = MPI_Win_shared_query(m_all_mpi_win[target_segment_infos_pos], target_segment_infos_pos / m_nb_segments_per_proc, &size_seg, &size_type, &ptr_seg);
496
497 if (error != MPI_SUCCESS) {
498 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
499 }
500 rank_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
501 }
502
503 if (rank_reserved_part_span.size() < future_sizeof_win) {
504 ARCCORE_FATAL("Bad realloc -- New size : {1} -- Needed size : {2}", rank_reserved_part_span.size(), future_sizeof_win);
505 }
506
507 // On le modifie avec les éléments donnés dans la méthode requestAddToAnotherSegment().
508 for (Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
509 rank_reserved_part_span[pos_win] = m_add_requests[num_seg][pos_elem];
510 }
511 m_sizeof_used_part[target_segment_infos_pos] = future_sizeof_win;
512
513 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
514 m_target_segments[segment_infos_pos] = -1;
515 }
516 }
517 MPI_Barrier(m_comm_machine);
518
519 // Vu que d'autres ont pu modifier nos segments, on recrée les vues.
520 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
521 if (is_my_seg_edited[num_seg]) {
522 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
523
524 MPI_Aint size_seg;
525 std::byte* ptr_seg = nullptr;
526 int size_type;
527 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
528
529 if (error != MPI_SUCCESS) {
530 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
531 }
532 m_reserved_part_span[num_seg] = Span<std::byte>{ ptr_seg, size_seg };
533 }
534 }
535}
536
537/*---------------------------------------------------------------------------*/
538/*---------------------------------------------------------------------------*/
539
541requestReserve(Int32 num_seg, Int64 new_capacity)
542{
543 if (new_capacity % m_sizeof_type) {
544 ARCCORE_FATAL("new_capacity not valid");
545 }
546
547 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
548
549 if (new_capacity > m_reserved_part_span[num_seg].size()) {
550 _requestRealloc(segment_infos_pos, new_capacity);
551 }
552 else {
553 _requestRealloc(segment_infos_pos);
554 }
555}
556
557/*---------------------------------------------------------------------------*/
558/*---------------------------------------------------------------------------*/
559
562{
563 _executeRealloc();
564}
565
566/*---------------------------------------------------------------------------*/
567/*---------------------------------------------------------------------------*/
568
570requestResize(Int32 num_seg, Int64 new_size)
571{
572 if (new_size == -1) {
573 return;
574 }
575 if (new_size < 0 || new_size % m_sizeof_type) {
576 ARCCORE_FATAL("new_size not valid");
577 }
578
579 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
580
581 if (new_size > m_reserved_part_span[num_seg].size()) {
582 _requestRealloc(segment_infos_pos, new_size);
583 }
584 else {
585 _requestRealloc(segment_infos_pos);
586 }
587
588 m_resize_requests[num_seg] = new_size;
589 m_resize_requested = true; // TODO Atomic ?
590}
591
592/*---------------------------------------------------------------------------*/
593/*---------------------------------------------------------------------------*/
594
597{
598 _executeRealloc();
599
600 if (!m_resize_requested) {
601 return;
602 }
603 m_resize_requested = false;
604
605 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
606 if (m_resize_requests[num_seg] == -1) {
607 continue;
608 }
609
610 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
611
612 if (m_reserved_part_span[num_seg].size() < m_resize_requests[num_seg]) {
613 ARCCORE_FATAL("Bad realloc -- New size : {0} -- Needed size : {1}", m_reserved_part_span[num_seg].size(), m_resize_requests[num_seg]);
614 }
615
616 m_sizeof_used_part[segment_infos_pos] = m_resize_requests[num_seg];
617 m_resize_requests[num_seg] = -1;
618 }
619}
620
621/*---------------------------------------------------------------------------*/
622/*---------------------------------------------------------------------------*/
623
626{
627 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
628 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
629
630 if (m_reserved_part_span[num_seg].size() == m_sizeof_used_part[segment_infos_pos]) {
631 _requestRealloc(segment_infos_pos);
632 }
633 else {
634 _requestRealloc(segment_infos_pos, m_sizeof_used_part[segment_infos_pos]);
635 }
636 }
637 _executeRealloc();
638}
639
640/*---------------------------------------------------------------------------*/
641/*---------------------------------------------------------------------------*/
642
644_requestRealloc(Int32 owner_pos_segment, Int64 new_capacity) const
645{
646 m_need_resize[owner_pos_segment] = new_capacity;
647}
648
649/*---------------------------------------------------------------------------*/
650/*---------------------------------------------------------------------------*/
651
653_requestRealloc(Int32 owner_pos_segment) const
654{
655 m_need_resize[owner_pos_segment] = -1;
656}
657
658/*---------------------------------------------------------------------------*/
659/*---------------------------------------------------------------------------*/
660
661void MpiMultiMachineShMemWinBaseInternal::
662_executeRealloc()
663{
664 // Barrière importante car tout le monde doit savoir que l'on doit
665 // redimensionner un des segments que nous possédons.
666 MPI_Barrier(m_comm_machine);
667
668 // Pas besoin de barrière car MPI_Win_allocate_shared() de _realloc() est
669 // bloquant.
670 _realloc();
671
672 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
673 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
674 m_need_resize[segment_infos_pos] = -1;
675 }
676
677 // Barrière importante dans le cas où un MPI_Win_shared_query() de
678 // _reallocCollective() durerait trop longtemps (un autre processus pourrait
679 // rappeler cette méthode et remettre m_need_resize[m_owner_segment] à
680 // true => deadlock dans _reallocCollective() sur MPI_Win_allocate_shared()
681 // à cause du continue).
682 MPI_Barrier(m_comm_machine);
683}
684
685/*---------------------------------------------------------------------------*/
686/*---------------------------------------------------------------------------*/
687
688void MpiMultiMachineShMemWinBaseInternal::
689_realloc()
690{
691 MPI_Info win_info;
692 MPI_Info_create(&win_info);
693 MPI_Info_set(win_info, "alloc_shared_noncontig", "true");
694
695 // Chacun réalloc ses segments, si demandé.
696 for (Integer rank = 0; rank < m_comm_machine_size; ++rank) {
697 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
698
699 const Int32 local_segment_infos_pos = num_seg + rank * m_nb_segments_per_proc;
700
701 if (m_need_resize[local_segment_infos_pos] == -1)
702 continue;
703
704 ARCCORE_ASSERT(m_need_resize[local_segment_infos_pos] >= 0, ("New size must be >= 0"));
705 ARCCORE_ASSERT(m_need_resize[local_segment_infos_pos] % m_sizeof_type == 0, ("New size must be % sizeof type"));
706
707 // Si on doit realloc notre segment, on alloue au moins une taille de m_sizeof_type.
708 // Si ce n'est pas notre segment, taille 0 pour que MPI n'alloue rien.
709 const Int64 size_seg = (m_comm_machine_rank == rank ? (m_need_resize[local_segment_infos_pos] == 0 ? m_sizeof_type : m_need_resize[local_segment_infos_pos]) : 0);
710
711 // On sauvegarde l'ancien segment pour déplacer les données.
712 MPI_Win old_win = m_all_mpi_win[local_segment_infos_pos];
713 std::byte* ptr_new_seg = nullptr;
714
715 // Si size_seg == 0 alors ptr_seg == nullptr.
716 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info, m_comm_machine, &ptr_new_seg, &m_all_mpi_win[local_segment_infos_pos]);
717 if (error != MPI_SUCCESS) {
718 MPI_Win_free(&old_win);
719 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
720 }
721
722 // Il n'y a que si c'est notre segment que l'on déplace les données.
723 if (m_comm_machine_rank == rank) {
724 // On a besoin de deux infos supplémentaires :
725 // - le pointeur vers l'ancien segment (pas possible de le récupérer
726 // via m_reserved_part_span à cause des échanges),
727 // - la taille du nouveau segment (MPI peut allouer plus que la taille
728 // que l'on a demandée).
729 std::byte* ptr_old_seg = nullptr;
730 MPI_Aint mpi_reserved_size_new_seg;
731
732 // Ancien segment.
733 {
734 MPI_Aint size_old_seg;
735 int size_type;
736 // Ici, ptr_seg n'est jamais == nullptr vu que l'on fait toujours un
737 // segment d'une taille d'au moins m_sizeof_type.
738 error = MPI_Win_shared_query(old_win, m_comm_machine_rank, &size_old_seg, &size_type, &ptr_old_seg);
739 if (error != MPI_SUCCESS || ptr_old_seg == nullptr) {
740 MPI_Win_free(&old_win);
741 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
742 }
743 }
744
745 // Nouveau segment.
746 {
747 std::byte* ptr_seg = nullptr;
748 int size_type;
749 // Ici, ptr_seg n'est jamais == nullptr vu que l'on fait toujours un
750 // segment d'une taille d'au moins m_sizeof_type.
751 error = MPI_Win_shared_query(m_all_mpi_win[local_segment_infos_pos], m_comm_machine_rank, &mpi_reserved_size_new_seg, &size_type, &ptr_seg);
752 if (error != MPI_SUCCESS || ptr_seg == nullptr || ptr_seg != ptr_new_seg) {
753 MPI_Win_free(&old_win);
754 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
755 }
756 }
757
758 // Si le realloc est une réduction de la taille du segment (espace
759 // utilisé par l'utilisateur, si resize par exemple), on ne peut pas
760 // copier toutes les anciennes données.
761 const Int64 min_size = std::min(m_need_resize[local_segment_infos_pos], m_sizeof_used_part[local_segment_infos_pos]);
762
763 memcpy(ptr_new_seg, ptr_old_seg, min_size);
764 }
765
766 MPI_Win_free(&old_win);
767 }
768 }
769 MPI_Info_free(&win_info);
770
771 // On reconstruit les spans des segments que l'on possède.
772 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
773 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
774
775 MPI_Aint size_seg;
776 int size_type;
777 std::byte* ptr_seg = nullptr;
778 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
779
780 if (error != MPI_SUCCESS) {
781 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
782 }
783
784 m_reserved_part_span[num_seg] = Span<std::byte>{ ptr_seg, size_seg };
785 }
786}
787
788/*---------------------------------------------------------------------------*/
789/*---------------------------------------------------------------------------*/
790
791Int32 MpiMultiMachineShMemWinBaseInternal::
792_worldToMachine(Int32 world) const
793{
794 for (Int32 i = 0; i < m_comm_machine_size; ++i) {
795 if (m_machine_ranks[i] == world) {
796 return i;
797 }
798 }
799 ARCCORE_FATAL("Rank is not in machine");
800}
801
802/*---------------------------------------------------------------------------*/
803/*---------------------------------------------------------------------------*/
804
805Int32 MpiMultiMachineShMemWinBaseInternal::
806_machineToWorld(Int32 machine) const
807{
808 return m_machine_ranks[machine];
809}
810
811/*---------------------------------------------------------------------------*/
812/*---------------------------------------------------------------------------*/
813
814} // namespace Arcane::MessagePassing::Mpi
815
816/*---------------------------------------------------------------------------*/
817/*---------------------------------------------------------------------------*/
#define ARCCORE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Vue constante d'un tableau de type T.
void requestAdd(Int32 num_seg, Span< const std::byte > elem)
Méthode permettant de demander l'ajout d'éléments dans l'un de nos segments.
Span< const std::byte > segmentConstView(Int32 num_seg) const
Méthode permettant d'obtenir une vue sur l'un de nos segments.
void executeResize()
Méthode permettant d'exécuter les requêtes de redimensionnement.
void executeShrink()
Méthode permettant de réduire l'espace mémoire réservé pour les segments au minimum nécessaire.
void requestReserve(Int32 num_seg, Int64 new_capacity)
Méthode permettant de demander la réservation d'espace mémoire pour un de nos segments.
void requestAddToAnotherSegment(Int32 thread, Int32 rank, Int32 num_seg, Span< const std::byte > elem)
Méthode permettant de demander l'ajout d'éléments dans un des segments de la fenêtre.
void executeReserve()
Méthode permettant d'exécuter les requêtes de réservation.
void _requestRealloc(Int32 owner_pos_segment, Int64 new_capacity) const
Méthode permettant de demander une réallocation.
MpiMultiMachineShMemWinBaseInternal(SmallSpan< Int64 > sizeof_segments, Int32 nb_segments_per_proc, Int32 sizeof_type, const MPI_Comm &comm_machine, Int32 comm_machine_rank, Int32 comm_machine_size, ConstArrayView< Int32 > machine_ranks)
Le sizeof_segments ne doit pas être conservé !
void executeAdd()
Méthode permettant d'exécuter les requêtes d'ajout.
void requestResize(Int32 num_seg, Int64 new_size)
Méthode permettant de demander le redimensionnement d'un de nos segments.
void barrier() const
Méthode permettant d'attendre que tous les processus du noeud appellent cette méthode pour continuer ...
void executeAddToAnotherSegment()
Méthode permettant d'exécuter les requêtes d'ajout dans les segments d'autres processus.
MPI_Win m_win_actual_sizeof
Fenêtre contiguë avec taille des fenêtres principales.
Int32 sizeofOneElem() const
Méthode permettant d'obtenir la taille d'un élement de la fenêtre.
Span< std::byte > segmentView(Int32 num_seg)
Méthode permettant d'obtenir une vue sur l'un de nos segments.
ConstArrayView< Int32 > machineRanks() const
Méthode permettant d'obtenir les rangs qui possèdent un segment dans la fenêtre.
Vue d'un tableau d'éléments de type T.
Definition Span.h:801
constexpr __host__ __device__ pointer data() const noexcept
Pointeur sur le début de la vue.
Definition Span.h:537
constexpr __host__ __device__ bool empty() const noexcept
Retourne true si le tableau est vide (dimension nulle)
Definition Span.h:490
constexpr __host__ __device__ SizeType size() const noexcept
Retourne la taille du tableau.
Definition Span.h:325
Vue d'un tableau d'éléments de type T.
Definition Span.h:633
std::int64_t Int64
Type entier signé sur 64 bits.
Int32 Integer
Type représentant un entier.
std::int32_t Int32
Type entier signé sur 32 bits.