Arcane  v4.1.5.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
MpiMultiMachineShMemWinBaseInternal.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* MpiMultiMachineShMemWinBaseInternal.h (C) 2000-2026 */
9/* */
10/* Classe permettant de créer des fenêtres mémoires pour un noeud de calcul. */
11/* Les segments de ces fenêtres ne sont pas contigües en mémoire et peuvent */
12/* être redimensionnées. Un processus peut posséder plusieurs segments. */
13/*---------------------------------------------------------------------------*/
14
15#include "arccore/message_passing_mpi/internal/MpiMultiMachineShMemWinBaseInternal.h"
16
17#include "arccore/base/FatalErrorException.h"
18
19/*---------------------------------------------------------------------------*/
20/*---------------------------------------------------------------------------*/
21
22namespace Arcane::MessagePassing::Mpi
23{
24
25/*---------------------------------------------------------------------------*/
26/*---------------------------------------------------------------------------*/
27
30MpiMultiMachineShMemWinBaseInternal(SmallSpan<Int64> sizeof_segments, Int32 nb_segments_per_proc, Int32 sizeof_type, const MPI_Comm& comm_machine, Int32 comm_machine_rank, Int32 comm_machine_size, ConstArrayView<Int32> machine_ranks)
31: m_win_need_resize()
32, m_win_actual_sizeof()
33, m_win_target_segments()
34, m_comm_machine(comm_machine)
35, m_comm_machine_size(comm_machine_size)
36, m_comm_machine_rank(comm_machine_rank)
37, m_sizeof_type(sizeof_type)
38, m_nb_segments_per_proc(nb_segments_per_proc)
39, m_machine_ranks(machine_ranks)
40, m_add_requests(nb_segments_per_proc)
41, m_resize_requests(nb_segments_per_proc)
42{
43 if (m_sizeof_type <= 0) {
44 ARCCORE_FATAL("Invalid sizeof_type");
45 }
46 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
47 if (sizeof_segments[i] < 0 || sizeof_segments[i] % m_sizeof_type != 0) {
48 ARCCORE_FATAL("Invalid initial sizeof_segment");
49 }
50 }
51 if (m_nb_segments_per_proc <= 0) {
52 ARCCORE_FATAL("Invalid nb_segments_per_proc");
53 }
54 m_all_mpi_win.resize(m_comm_machine_size * m_nb_segments_per_proc);
55 m_reserved_part_span.resize(m_nb_segments_per_proc);
56
57 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
58 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
59 m_resize_requests[num_seg] = -1;
60 }
61
62 MPI_Info win_info_true;
63 MPI_Info_create(&win_info_true);
64 MPI_Info_set(win_info_true, "alloc_shared_noncontig", "true");
65
66 MPI_Info win_info_false;
67 MPI_Info_create(&win_info_false);
68 MPI_Info_set(win_info_false, "alloc_shared_noncontig", "false");
69
70 const Int32 pos_my_wins = m_comm_machine_rank * m_nb_segments_per_proc;
71
72 {
73 // On crée tous les segments de tous les processus.
74 for (Integer i = 0; i < m_comm_machine_size; ++i) {
75 for (Integer j = 0; j < m_nb_segments_per_proc; ++j) {
76 Int64 size_seg = 0;
77 if (m_comm_machine_rank == i) {
78 if (sizeof_segments[j] == 0)
79 size_seg = m_sizeof_type;
80 else
81 size_seg = sizeof_segments[j];
82 }
83 std::byte* ptr_seg = nullptr;
84 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info_true, m_comm_machine, &ptr_seg, &m_all_mpi_win[j + i * m_nb_segments_per_proc]);
85
86 if (error != MPI_SUCCESS) {
87 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
88 }
89 }
90 }
91
92 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
93 MPI_Aint size_seg;
94 int size_type;
95 std::byte* ptr_seg = nullptr;
96 int error = MPI_Win_shared_query(m_all_mpi_win[i + pos_my_wins], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
97
98 if (error != MPI_SUCCESS) {
99 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
100 }
101
102 // Attention : L'utilisateur demande un nombre minimum d'éléments réservés.
103 // Mais MPI réserve la taille qu'il veut (effet du alloc_shared_noncontig=true).
104 // On est juste sûr que la taille qu'il a réservée est supérieure ou égale à sizeof_segment.
105 m_reserved_part_span[i] = Span<std::byte>{ ptr_seg, size_seg };
106 }
107 }
108
109 {
110 Int64* ptr_seg = nullptr;
111 Int64* ptr_win = nullptr;
112 {
113 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int64)) * m_nb_segments_per_proc, sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_need_resize);
114
115 if (error != MPI_SUCCESS) {
116 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
117 }
118 }
119 {
120 MPI_Aint size_seg;
121 int size_type;
122 int error = MPI_Win_shared_query(m_win_need_resize, 0, &size_seg, &size_type, &ptr_win);
123
124 if (error != MPI_SUCCESS) {
125 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
126 }
127
128 m_need_resize = Span<Int64>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
129
130 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
131 m_need_resize[i + pos_my_wins] = -1;
132 }
133 }
134 if (ptr_win + pos_my_wins != ptr_seg) {
135 ARCCORE_FATAL("m_win_need_resize is noncontig");
136 }
137 }
138
139 {
140 Int64* ptr_seg = nullptr;
141 Int64* ptr_win = nullptr;
142 {
143 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int64)) * m_nb_segments_per_proc, sizeof(Int64), win_info_false, m_comm_machine, &ptr_seg, &m_win_actual_sizeof);
144
145 if (error != MPI_SUCCESS) {
146 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
147 }
148 }
149 {
150 MPI_Aint size_seg;
151 int size_type;
152 int error = MPI_Win_shared_query(m_win_actual_sizeof, 0, &size_seg, &size_type, &ptr_win);
153
154 if (error != MPI_SUCCESS) {
155 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
156 }
157
158 m_sizeof_used_part = Span<Int64>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
159
160 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
161 m_sizeof_used_part[i + pos_my_wins] = sizeof_segments[i];
162 }
163 }
164 if (ptr_win + pos_my_wins != ptr_seg) {
165 ARCCORE_FATAL("m_win_actual_sizeof is noncontig");
166 }
167 }
168
169 {
170 Int32* ptr_seg = nullptr;
171 Int32* ptr_win = nullptr;
172 {
173 int error = MPI_Win_allocate_shared(static_cast<Int64>(sizeof(Int32)) * m_nb_segments_per_proc, sizeof(Int32), win_info_false, m_comm_machine, &ptr_seg, &m_win_target_segments);
174
175 if (error != MPI_SUCCESS) {
176 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
177 }
178 }
179 {
180 MPI_Aint size_seg;
181 int size_type;
182 int error = MPI_Win_shared_query(m_win_target_segments, 0, &size_seg, &size_type, &ptr_win);
183
184 if (error != MPI_SUCCESS) {
185 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
186 }
187
188 m_target_segments = Span<Int32>{ ptr_win, m_comm_machine_size * m_nb_segments_per_proc };
189
190 for (Integer i = 0; i < m_nb_segments_per_proc; ++i) {
191 m_target_segments[i + pos_my_wins] = -1;
192 }
193 }
194 if (ptr_win + pos_my_wins != ptr_seg) {
195 ARCCORE_FATAL("m_win_owner_segments is noncontig");
196 }
197 }
198
199 MPI_Info_free(&win_info_false);
200 MPI_Info_free(&win_info_true);
201
202 MPI_Barrier(m_comm_machine);
203}
204
205/*---------------------------------------------------------------------------*/
206/*---------------------------------------------------------------------------*/
207
208MpiMultiMachineShMemWinBaseInternal::
209~MpiMultiMachineShMemWinBaseInternal()
210{
211 for (Integer i = 0; i < m_comm_machine_size * m_nb_segments_per_proc; ++i) {
212 MPI_Win_free(&m_all_mpi_win[i]);
213 }
214 MPI_Win_free(&m_win_need_resize);
215 MPI_Win_free(&m_win_actual_sizeof);
216 MPI_Win_free(&m_win_target_segments);
217}
218
219/*---------------------------------------------------------------------------*/
220/*---------------------------------------------------------------------------*/
221
223sizeofOneElem() const
224{
225 return m_sizeof_type;
226}
227
228/*---------------------------------------------------------------------------*/
229/*---------------------------------------------------------------------------*/
230
232machineRanks() const
233{
234 return m_machine_ranks;
235}
236
237/*---------------------------------------------------------------------------*/
238/*---------------------------------------------------------------------------*/
239
241barrier() const
242{
243 MPI_Barrier(m_comm_machine);
244}
245
246/*---------------------------------------------------------------------------*/
247/*---------------------------------------------------------------------------*/
248
250segmentView(Int32 num_seg)
251{
252 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
253 return m_reserved_part_span[num_seg].subSpan(0, m_sizeof_used_part[segment_infos_pos]);
254}
255
256/*---------------------------------------------------------------------------*/
257/*---------------------------------------------------------------------------*/
258
260segmentView(Int32 rank, Int32 num_seg)
261{
262 const Int32 segment_infos_pos = num_seg + _worldToMachine(rank) * m_nb_segments_per_proc;
263
264 MPI_Aint size_seg;
265 int size_type;
266 std::byte* ptr_seg = nullptr;
267 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], rank, &size_seg, &size_type, &ptr_seg);
268
269 if (error != MPI_SUCCESS) {
270 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
271 }
272
273 return Span<std::byte>{ ptr_seg, m_sizeof_used_part[segment_infos_pos] };
274}
275
276/*---------------------------------------------------------------------------*/
277/*---------------------------------------------------------------------------*/
278
280segmentConstView(Int32 num_seg) const
281{
282 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
283 return m_reserved_part_span[num_seg].subSpan(0, m_sizeof_used_part[segment_infos_pos]);
284}
285
286/*---------------------------------------------------------------------------*/
287/*---------------------------------------------------------------------------*/
288
290segmentConstView(Int32 rank, Int32 num_seg) const
291{
292 const Int32 segment_infos_pos = num_seg + _worldToMachine(rank) * m_nb_segments_per_proc;
293
294 MPI_Aint size_seg;
295 int size_type;
296 std::byte* ptr_seg = nullptr;
297 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], rank, &size_seg, &size_type, &ptr_seg);
298
299 if (error != MPI_SUCCESS) {
300 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
301 }
302
303 return Span<const std::byte>{ ptr_seg, m_sizeof_used_part[segment_infos_pos] };
304}
305
306/*---------------------------------------------------------------------------*/
307/*---------------------------------------------------------------------------*/
308
311{
312 if (elem.size() % m_sizeof_type) {
313 ARCCORE_FATAL("Sizeof elem not valid");
314 }
315 if (elem.empty() || elem.data() == nullptr) {
316 return;
317 }
318
319 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
320
321 const Int64 actual_sizeof_win = m_sizeof_used_part[segment_infos_pos];
322 const Int64 future_sizeof_win = actual_sizeof_win + elem.size();
323 const Int64 old_reserved = m_reserved_part_span[num_seg].size();
324
325 if (future_sizeof_win > old_reserved) {
326 _requestRealloc(segment_infos_pos, future_sizeof_win);
327 }
328 else {
329 _requestRealloc(segment_infos_pos);
330 }
331
332 m_add_requests[num_seg] = elem;
333 m_add_requested = true; // TODO Atomic ?
334}
335
336/*---------------------------------------------------------------------------*/
337/*---------------------------------------------------------------------------*/
338
341{
342 _executeRealloc();
343
344 if (!m_add_requested) {
345 return;
346 }
347 m_add_requested = false;
348
349 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
350 if (m_add_requests[num_seg].empty() || m_add_requests[num_seg].data() == nullptr) {
351 continue;
352 }
353
354 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
355
356 const Int64 actual_sizeof_win = m_sizeof_used_part[segment_infos_pos];
357 const Int64 future_sizeof_win = actual_sizeof_win + m_add_requests[num_seg].size();
358
359 if (m_reserved_part_span[num_seg].size() < future_sizeof_win) {
360 ARCCORE_FATAL("Bad realloc -- New size : {1} -- Needed size : {2}", m_reserved_part_span[num_seg].size(), future_sizeof_win);
361 }
362
363 for (Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
364 m_reserved_part_span[num_seg][pos_win] = m_add_requests[num_seg][pos_elem];
365 }
366 m_sizeof_used_part[segment_infos_pos] = future_sizeof_win;
367
368 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
369 }
370}
371
372/*---------------------------------------------------------------------------*/
373/*---------------------------------------------------------------------------*/
374
377{
378 if (elem.size() % m_sizeof_type) {
379 ARCCORE_FATAL("Sizeof elem not valid");
380 }
381 if (elem.empty() || elem.data() == nullptr) {
382 return;
383 }
384
385 const Int32 machine_rank = _worldToMachine(rank);
386 const Int32 target_segment_infos_pos = num_seg + machine_rank * m_nb_segments_per_proc;
387
388 {
389 const Int32 segment_infos_pos = thread + m_comm_machine_rank * m_nb_segments_per_proc;
390 m_target_segments[segment_infos_pos] = target_segment_infos_pos;
391 }
392
393 Span<std::byte> rank_reserved_part_span;
394 {
395 MPI_Aint size_seg;
396 std::byte* ptr_seg = nullptr;
397 int size_type;
398 int error = MPI_Win_shared_query(m_all_mpi_win[target_segment_infos_pos], machine_rank, &size_seg, &size_type, &ptr_seg);
399
400 if (error != MPI_SUCCESS) {
401 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
402 }
403 rank_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
404 }
405
406 const Int64 actual_sizeof_win = m_sizeof_used_part[target_segment_infos_pos];
407 const Int64 future_sizeof_win = actual_sizeof_win + elem.size();
408 const Int64 old_reserved = rank_reserved_part_span.size();
409
410 if (future_sizeof_win > old_reserved) {
411 _requestRealloc(target_segment_infos_pos, future_sizeof_win);
412 }
413 else {
414 _requestRealloc(target_segment_infos_pos);
415 }
416
417 m_add_requests[thread] = elem;
418 m_add_requested = true; // TODO Atomic ?
419}
420
421/*---------------------------------------------------------------------------*/
422/*---------------------------------------------------------------------------*/
423
426{
427 MPI_Barrier(m_comm_machine);
428
429 auto is_my_seg_edited = std::make_unique<bool[]>(m_comm_machine_size);
430 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
431 for (const Int32 rank_asked : m_target_segments) {
432 if (rank_asked == m_comm_machine_rank) {
433 is_my_seg_edited[num_seg] = true;
434 break;
435 }
436 }
437 }
438
439 if (!m_add_requested) {
440 _executeRealloc();
441 }
442
443 else {
444 m_add_requested = false;
445 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
446 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
447 const Int32 seg_needs_to_edit = m_target_segments[segment_infos_pos];
448 if (seg_needs_to_edit == -1)
449 continue;
450
451 bool is_found = false;
452 for (const Int32 rank_asked : m_target_segments) {
453 if (rank_asked == seg_needs_to_edit) {
454 if (!is_found) {
455 is_found = true;
456 }
457 else {
458 ARCCORE_FATAL("Two subdomains ask same rank for addToAnotherSegment()");
459 }
460 }
461 }
462 }
463
464 _executeRealloc();
465
466 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
467 if (m_add_requests[num_seg].empty() || m_add_requests[num_seg].data() == nullptr) {
468 continue;
469 }
470
471 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
472 const Int32 target_segment_infos_pos = m_target_segments[segment_infos_pos];
473 if (target_segment_infos_pos == -1) {
474 ARCCORE_FATAL("Ne devrait pas aller ici");
475 }
476
477 const Int64 actual_sizeof_win = m_sizeof_used_part[target_segment_infos_pos];
478 const Int64 future_sizeof_win = actual_sizeof_win + m_add_requests[num_seg].size();
479
480 Span<std::byte> rank_reserved_part_span;
481 {
482 MPI_Aint size_seg;
483 std::byte* ptr_seg = nullptr;
484 int size_type;
485 int error = MPI_Win_shared_query(m_all_mpi_win[target_segment_infos_pos], target_segment_infos_pos / m_nb_segments_per_proc, &size_seg, &size_type, &ptr_seg);
486
487 if (error != MPI_SUCCESS) {
488 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
489 }
490 rank_reserved_part_span = Span<std::byte>{ ptr_seg, size_seg };
491 }
492
493 if (rank_reserved_part_span.size() < future_sizeof_win) {
494 ARCCORE_FATAL("Bad realloc -- New size : {1} -- Needed size : {2}", rank_reserved_part_span.size(), future_sizeof_win);
495 }
496
497 for (Int64 pos_win = actual_sizeof_win, pos_elem = 0; pos_win < future_sizeof_win; ++pos_win, ++pos_elem) {
498 rank_reserved_part_span[pos_win] = m_add_requests[num_seg][pos_elem];
499 }
500 m_sizeof_used_part[target_segment_infos_pos] = future_sizeof_win;
501
502 m_add_requests[num_seg] = Span<const std::byte>{ nullptr, 0 };
503 m_target_segments[segment_infos_pos] = -1;
504 }
505 }
506 MPI_Barrier(m_comm_machine);
507
508 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
509 if (is_my_seg_edited[num_seg]) {
510 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
511
512 MPI_Aint size_seg;
513 std::byte* ptr_seg = nullptr;
514 int size_type;
515 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
516
517 if (error != MPI_SUCCESS) {
518 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
519 }
520 m_reserved_part_span[num_seg] = Span<std::byte>{ ptr_seg, size_seg };
521 }
522 }
523}
524
525/*---------------------------------------------------------------------------*/
526/*---------------------------------------------------------------------------*/
527
529requestReserve(Int32 num_seg, Int64 new_capacity)
530{
531 if (new_capacity % m_sizeof_type) {
532 ARCCORE_FATAL("new_capacity not valid");
533 }
534
535 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
536
537 if (new_capacity <= m_reserved_part_span[num_seg].size()) {
538 _requestRealloc(segment_infos_pos);
539 return;
540 }
541 _requestRealloc(segment_infos_pos, new_capacity);
542}
543
544/*---------------------------------------------------------------------------*/
545/*---------------------------------------------------------------------------*/
546
549{
550 _executeRealloc();
551}
552
553/*---------------------------------------------------------------------------*/
554/*---------------------------------------------------------------------------*/
555
557requestResize(Int32 num_seg, Int64 new_size)
558{
559 if (new_size == -1) {
560 return;
561 }
562 if (new_size < 0 || new_size % m_sizeof_type) {
563 ARCCORE_FATAL("new_size not valid");
564 }
565
566 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
567
568 if (new_size > m_reserved_part_span[num_seg].size()) {
569 _requestRealloc(segment_infos_pos, new_size);
570 }
571 else {
572 _requestRealloc(segment_infos_pos);
573 }
574
575 m_resize_requests[num_seg] = new_size;
576 m_resize_requested = true; // TODO Atomic ?
577}
578
579/*---------------------------------------------------------------------------*/
580/*---------------------------------------------------------------------------*/
581
584{
585 _executeRealloc();
586
587 if (!m_resize_requested) {
588 return;
589 }
590 m_resize_requested = false;
591
592 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
593 if (m_resize_requests[num_seg] == -1) {
594 continue;
595 }
596
597 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
598
599 if (m_reserved_part_span[num_seg].size() < m_resize_requests[num_seg]) {
600 ARCCORE_FATAL("Bad realloc -- New size : {0} -- Needed size : {1}", m_reserved_part_span[num_seg].size(), m_resize_requests[num_seg]);
601 }
602
603 m_sizeof_used_part[segment_infos_pos] = m_resize_requests[num_seg];
604 m_resize_requests[num_seg] = -1;
605 }
606}
607
608/*---------------------------------------------------------------------------*/
609/*---------------------------------------------------------------------------*/
610
613{
614 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
615 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
616
617 if (m_reserved_part_span[num_seg].size() == m_sizeof_used_part[segment_infos_pos]) {
618 _requestRealloc(segment_infos_pos);
619 }
620 else {
621 _requestRealloc(segment_infos_pos, m_sizeof_used_part[segment_infos_pos]);
622 }
623 }
624 _executeRealloc();
625}
626
627/*---------------------------------------------------------------------------*/
628/*---------------------------------------------------------------------------*/
629
630void MpiMultiMachineShMemWinBaseInternal::
631_requestRealloc(Int32 owner_pos_segment, Int64 new_capacity) const
632{
633 m_need_resize[owner_pos_segment] = new_capacity;
634}
635
636/*---------------------------------------------------------------------------*/
637/*---------------------------------------------------------------------------*/
638
639void MpiMultiMachineShMemWinBaseInternal::
640_requestRealloc(Int32 owner_pos_segment) const
641{
642 m_need_resize[owner_pos_segment] = -1;
643}
644
645/*---------------------------------------------------------------------------*/
646/*---------------------------------------------------------------------------*/
647
648void MpiMultiMachineShMemWinBaseInternal::
649_executeRealloc()
650{
651 // Barrière importante car tout le monde doit savoir que l'on doit
652 // redimensionner un des segments que nous possédons.
653 MPI_Barrier(m_comm_machine);
654
655 // Pas besoin de barrière car MPI_Win_allocate_shared() de _realloc() est
656 // bloquant.
657 _realloc();
658
659 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
660 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
661 m_need_resize[segment_infos_pos] = -1;
662 }
663
664 // Barrière importante dans le cas où un MPI_Win_shared_query() de
665 // _reallocCollective() durerait trop longtemps (un autre processus pourrait
666 // rappeler cette méthode et remettre m_need_resize[m_owner_segment] à
667 // true => deadlock dans _reallocCollective() sur MPI_Win_allocate_shared()
668 // à cause du continue).
669 MPI_Barrier(m_comm_machine);
670}
671
672/*---------------------------------------------------------------------------*/
673/*---------------------------------------------------------------------------*/
674
675void MpiMultiMachineShMemWinBaseInternal::
676_realloc()
677{
678 MPI_Info win_info;
679 MPI_Info_create(&win_info);
680 MPI_Info_set(win_info, "alloc_shared_noncontig", "true");
681
682 // Chacun réalloc ses segments, si demandé.
683 for (Integer rank = 0; rank < m_comm_machine_size; ++rank) {
684 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
685
686 const Int32 local_segment_infos_pos = num_seg + rank * m_nb_segments_per_proc;
687
688 if (m_need_resize[local_segment_infos_pos] == -1)
689 continue;
690
691 ARCCORE_ASSERT(m_need_resize[local_segment_infos_pos] >= 0, ("New size must be >= 0"));
692 ARCCORE_ASSERT(m_need_resize[local_segment_infos_pos] % m_sizeof_type == 0, ("New size must be % sizeof type"));
693
694 // Si on doit realloc notre segment, on alloue au moins une taille de m_sizeof_type.
695 // Si ce n'est pas notre segment, taille 0 pour que MPI n'alloue rien.
696 const Int64 size_seg = (m_comm_machine_rank == rank ? (m_need_resize[local_segment_infos_pos] == 0 ? m_sizeof_type : m_need_resize[local_segment_infos_pos]) : 0);
697
698 // On sauvegarde l'ancien segment pour déplacer les données.
699 MPI_Win old_win = m_all_mpi_win[local_segment_infos_pos];
700 std::byte* ptr_new_seg = nullptr;
701
702 // Si size_seg == 0 alors ptr_seg == nullptr.
703 int error = MPI_Win_allocate_shared(size_seg, m_sizeof_type, win_info, m_comm_machine, &ptr_new_seg, &m_all_mpi_win[local_segment_infos_pos]);
704 if (error != MPI_SUCCESS) {
705 MPI_Win_free(&old_win);
706 ARCCORE_FATAL("Error with MPI_Win_allocate_shared() call");
707 }
708
709 // Il n'y a que si c'est notre segment que l'on déplace les données.
710 if (m_comm_machine_rank == rank) {
711 // On a besoin de deux infos supplémentaires :
712 // - le pointeur vers l'ancien segment (pas possible de le récupérer
713 // via m_reserved_part_span à cause des échanges),
714 // - la taille du nouveau segment (MPI peut allouer plus que la taille
715 // que l'on a demandée).
716 std::byte* ptr_old_seg = nullptr;
717 MPI_Aint mpi_reserved_size_new_seg;
718
719 // Ancien segment.
720 {
721 MPI_Aint size_old_seg;
722 int size_type;
723 // Ici, ptr_seg n'est jamais == nullptr vu que l'on fait toujours un
724 // segment d'une taille d'au moins m_sizeof_type.
725 error = MPI_Win_shared_query(old_win, m_comm_machine_rank, &size_old_seg, &size_type, &ptr_old_seg);
726 if (error != MPI_SUCCESS || ptr_old_seg == nullptr) {
727 MPI_Win_free(&old_win);
728 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
729 }
730 }
731
732 // Nouveau segment.
733 {
734 std::byte* ptr_seg = nullptr;
735 int size_type;
736 // Ici, ptr_seg n'est jamais == nullptr vu que l'on fait toujours un
737 // segment d'une taille d'au moins m_sizeof_type.
738 error = MPI_Win_shared_query(m_all_mpi_win[local_segment_infos_pos], m_comm_machine_rank, &mpi_reserved_size_new_seg, &size_type, &ptr_seg);
739 if (error != MPI_SUCCESS || ptr_seg == nullptr || ptr_seg != ptr_new_seg) {
740 MPI_Win_free(&old_win);
741 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
742 }
743 }
744
745 // Si le realloc est une réduction de la taille du segment (espace
746 // utilisé par l'utilisateur, si resize par exemple), on ne peut pas
747 // copier toutes les anciennes données.
748 const Int64 min_size = std::min(m_need_resize[local_segment_infos_pos], m_sizeof_used_part[local_segment_infos_pos]);
749
750 memcpy(ptr_new_seg, ptr_old_seg, min_size);
751 }
752
753 MPI_Win_free(&old_win);
754 }
755 }
756 MPI_Info_free(&win_info);
757
758 // On reconstruit les spans des segments que l'on possède.
759 for (Integer num_seg = 0; num_seg < m_nb_segments_per_proc; ++num_seg) {
760 const Int32 segment_infos_pos = num_seg + m_comm_machine_rank * m_nb_segments_per_proc;
761
762 MPI_Aint size_seg;
763 int size_type;
764 std::byte* ptr_seg = nullptr;
765 int error = MPI_Win_shared_query(m_all_mpi_win[segment_infos_pos], m_comm_machine_rank, &size_seg, &size_type, &ptr_seg);
766
767 if (error != MPI_SUCCESS) {
768 ARCCORE_FATAL("Error with MPI_Win_shared_query() call");
769 }
770
771 m_reserved_part_span[num_seg] = Span<std::byte>{ ptr_seg, size_seg };
772 }
773}
774
775/*---------------------------------------------------------------------------*/
776/*---------------------------------------------------------------------------*/
777
778Int32 MpiMultiMachineShMemWinBaseInternal::
779_worldToMachine(Int32 world) const
780{
781 for (Int32 i = 0; i < m_comm_machine_size; ++i) {
782 if (m_machine_ranks[i] == world) {
783 return i;
784 }
785 }
786 ARCCORE_FATAL("Rank is not in machine");
787}
788
789/*---------------------------------------------------------------------------*/
790/*---------------------------------------------------------------------------*/
791
792Int32 MpiMultiMachineShMemWinBaseInternal::
793_machineToWorld(Int32 machine) const
794{
795 return m_machine_ranks[machine];
796}
797
798/*---------------------------------------------------------------------------*/
799/*---------------------------------------------------------------------------*/
800
801} // namespace Arcane::MessagePassing::Mpi
802
803/*---------------------------------------------------------------------------*/
804/*---------------------------------------------------------------------------*/
#define ARCCORE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Vue constante d'un tableau de type T.
void requestAdd(Int32 num_seg, Span< const std::byte > elem)
Méthode permettant de demander l'ajout d'éléments dans l'un de nos segments.
Span< const std::byte > segmentConstView(Int32 num_seg) const
Méthode permettant d'obtenir une vue sur l'un de nos segments.
void executeResize()
Méthode permettant d'exécuter les requêtes de redimensionnement.
void executeShrink()
Méthode permettant de réduire l'espace mémoire réservé pour les segments au minimum nécessaire.
void requestReserve(Int32 num_seg, Int64 new_capacity)
Méthode permettant de demander la réservation d'espace mémoire pour un de nos segments.
void requestAddToAnotherSegment(Int32 thread, Int32 rank, Int32 num_seg, Span< const std::byte > elem)
Méthode permettant de demander l'ajout d'éléments dans un des segments de la fenêtre.
void executeReserve()
Méthode permettant d'exécuter les requêtes de réservation.
MpiMultiMachineShMemWinBaseInternal(SmallSpan< Int64 > sizeof_segments, Int32 nb_segments_per_proc, Int32 sizeof_type, const MPI_Comm &comm_machine, Int32 comm_machine_rank, Int32 comm_machine_size, ConstArrayView< Int32 > machine_ranks)
Le sizeof_segments ne doit pas être conservé !
void executeAdd()
Méthode permettant d'exécuter les requêtes d'ajout.
void requestResize(Int32 num_seg, Int64 new_size)
Méthode permettant de demander le redimensionnement d'un de nos segments.
void barrier() const
Méthode permettant d'attendre que tous les processus du noeud appellent cette méthode pour continuer ...
void executeAddToAnotherSegment()
Méthode permettant d'exécuter les requêtes d'ajout dans les segments d'autres processus.
Int32 sizeofOneElem() const
Méthode permettant d'obtenir la taille d'un élement de la fenêtre.
Span< std::byte > segmentView(Int32 num_seg)
Méthode permettant d'obtenir une vue sur l'un de nos segments.
ConstArrayView< Int32 > machineRanks() const
Méthode permettant d'obtenir les rangs qui possèdent un segment dans la fenêtre.
Vue d'un tableau d'éléments de type T.
Definition Span.h:801
constexpr __host__ __device__ pointer data() const noexcept
Pointeur sur le début de la vue.
Definition Span.h:537
constexpr __host__ __device__ bool empty() const noexcept
Retourne true si le tableau est vide (dimension nulle)
Definition Span.h:490
constexpr __host__ __device__ SizeType size() const noexcept
Retourne la taille du tableau.
Definition Span.h:325
Vue d'un tableau d'éléments de type T.
Definition Span.h:633
std::int64_t Int64
Type entier signé sur 64 bits.
Int32 Integer
Type représentant un entier.
std::int32_t Int32
Type entier signé sur 32 bits.