Arcane  4.1.12.0
Developer documentation
Loading...
Searching...
No Matches
MpiParallelSuperMng.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* MpiParallelSuperMng.cc (C) 2000-2025 */
9/* */
10/* Parallelism manager using MPI. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/utils/ApplicationInfo.h"
15#include "arcane/utils/NullThreadMng.h"
16#include "arcane/utils/ITraceMng.h"
17#include "arcane/utils/PlatformUtils.h"
18#include "arcane/utils/ValueConvert.h"
19#include "arcane/utils/NotImplementedException.h"
20#include "arcane/utils/ArgumentException.h"
21#include "arcane/utils/FatalErrorException.h"
22#include "arcane/utils/CommandLineArguments.h"
23
24#include "arcane/parallel/IStat.h"
25
26#include "arccore/message_passing_mpi/internal/MpiAdapter.h"
27#include "arcane/parallel/mpi/MpiParallelMng.h"
28#include "arcane/parallel/mpi/MpiParallelDispatch.h"
29#include "arcane/parallel/mpi/MpiErrorHandler.h"
30
31#include "arcane/core/FactoryService.h"
32#include "arcane/core/IApplication.h"
33#include "arcane/core/ParallelSuperMngDispatcher.h"
34
35#include "arcane/impl/SequentialParallelSuperMng.h"
36
37/*---------------------------------------------------------------------------*/
38/*---------------------------------------------------------------------------*/
39
40namespace Arcane
41{
42
43/*---------------------------------------------------------------------------*/
44/*---------------------------------------------------------------------------*/
45
49class MpiParallelSuperMng
50: public ParallelSuperMngDispatcher
51{
52 public:
53
54 explicit MpiParallelSuperMng(const ServiceBuildInfo& sbi);
55 ~MpiParallelSuperMng() override;
56
57 void initialize() override;
58 void build() override;
59
60 IApplication* application() const override { return m_application; }
61 IThreadMng* threadMng() const override { return m_thread_mng; }
62 bool isParallel() const override { return m_is_parallel; }
63 Int32 commRank() const override { return m_rank; }
64 Int32 commSize() const override { return m_nb_rank; }
65 Int32 traceRank() const override { return m_rank; }
66 void* getMPICommunicator() override { return &m_mpi_main_communicator; }
69 void tryAbort() override;
70 bool isMasterIO() const override { return commRank() == 0; }
71 Integer masterIORank() const override { return 0; }
73 void barrier() override;
74
75 public:
76
77 static void initMPI(IApplication* app);
78
79 public:
80
82 IThreadMng* m_thread_mng;
83 Parallel::IStat* m_stat;
89 MP::Communicator m_main_communicator;
90 MP::Communicator m_machine_communicator;
91 MpiErrorHandler m_error_handler;
92 MpiAdapter* m_adapter;
93 MpiDatatypeList* m_datatype_list;
94
95 private:
96
97 // Error handler
98 static void _ErrorHandler(MPI_Comm*, int*, ...);
99};
100
101/*---------------------------------------------------------------------------*/
102/*---------------------------------------------------------------------------*/
103
104MpiParallelSuperMng::
105MpiParallelSuperMng(const ServiceBuildInfo& sbi)
106: m_application(sbi.application())
107, m_thread_mng(nullptr)
108, m_stat(nullptr)
109, m_is_parallel(false)
110, m_rank(0)
111, m_nb_rank(0)
112, m_nb_local_sub_domain(1)
113, m_mpi_main_communicator(MPI_COMM_NULL)
114, m_main_communicator(MPI_COMM_NULL)
115, m_machine_communicator(MPI_COMM_NULL)
116, m_adapter(nullptr)
117, m_datatype_list(nullptr)
118{
119 m_thread_mng = new NullThreadMng();
121}
122
123/*---------------------------------------------------------------------------*/
124/*---------------------------------------------------------------------------*/
125
126MpiParallelSuperMng::
127~MpiParallelSuperMng()
128{
129 _finalize();
130
131 try {
132 delete m_datatype_list;
133 if (m_adapter)
134 m_adapter->destroy();
135 delete m_stat;
136 delete m_thread_mng;
137 }
138 catch (const Exception& ex) {
139 m_application->traceMng()->error() << ex;
140 }
141
142 MPI_Barrier(m_mpi_main_communicator);
143 m_error_handler.removeHandler();
144
145 MPI_Comm_free(&m_mpi_main_communicator);
146 m_mpi_main_communicator = MPI_COMM_NULL;
147
148 MPI_Comm_free(static_cast<MPI_Comm*>(m_machine_communicator.communicatorAddress()));
149
151}
152
153/*---------------------------------------------------------------------------*/
154/*---------------------------------------------------------------------------*/
155
160
161/*---------------------------------------------------------------------------*/
162/*---------------------------------------------------------------------------*/
163
164void MpiParallelSuperMng::
165initMPI(IApplication* app)
166{
167 int* argc = nullptr;
168 char*** argv = nullptr;
169
170 Request::setNullRequest(Request(0, nullptr, MPI_REQUEST_NULL));
171 Communicator::setNullCommunicator(Communicator(MPI_COMM_NULL));
172
174 argc = app_args.commandLineArgc();
175 argv = app_args.commandLineArgv();
176
177 // TODO:
178 // Be able to use a communicator other than MPI_COMM_WORLD
179 int thread_wanted = MPI_THREAD_SERIALIZED;
180 int thread_provided = 0;
181 arcaneInitializeMPI(argc, argv, thread_wanted);
182
183#ifndef ARCANE_USE_MPC
184 // MPC (v 2.4.1) does not know this function
185 MPI_Query_thread(&thread_provided);
186#else
187 thread_provided = MPI_THREAD_MULTIPLE;
188#endif
189
190 if (thread_provided < thread_wanted) {
191 int my_rank = 0;
192 // Display a message but only on one processor.
193 MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
194 if (my_rank == 0)
195 app->traceMng()->info() << "WARNING: MPI thread level provided!=wanted ("
196 << thread_provided << "!=" << thread_wanted << ")";
197 }
198}
199
200/*---------------------------------------------------------------------------*/
201/*---------------------------------------------------------------------------*/
202
203void MpiParallelSuperMng::
204_ErrorHandler(MPI_Comm* comm, int* error_code, ...)
205{
206 ARCANE_UNUSED(comm);
207
208 char error_buf[MPI_MAX_ERROR_STRING + 1];
209 int error_len = 0;
210 int e = *error_code;
211 // int MPI_Error_string(int errorcode, char *string, int *resultlen);
212 MPI_Error_string(e, error_buf, &error_len);
213 error_buf[error_len] = '\0';
214 error_buf[MPI_MAX_ERROR_STRING] = '\0';
215
216 // int MPI_Error_class(int errorcode, int *errorclass);
217
218 ARCANE_FATAL("Error in MPI call code={0} msg={1}", *error_code, error_buf);
219}
220
221/*---------------------------------------------------------------------------*/
222/*---------------------------------------------------------------------------*/
223
225build()
226{
227 ITraceMng* tm = m_application->traceMng();
228
229 // TODO: Check if a reduction needs to be done over all times.
230 Real start_time = platform::getRealTime();
231 initMPI(m_application);
232 Real end_time = platform::getRealTime();
233
234 MPI_Comm_dup(MPI_COMM_WORLD, &m_mpi_main_communicator);
236 int rank, size;
237 MPI_Comm_rank(m_mpi_main_communicator, &rank);
238 MPI_Comm_size(m_mpi_main_communicator, &size);
239
240 MPI_Comm mpi_machine_communicator = MPI_COMM_NULL;
241 MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &mpi_machine_communicator);
242
243 m_machine_communicator = MP::Communicator(mpi_machine_communicator);
244
245#ifndef ARCANE_USE_MPC
246 m_error_handler.registerHandler(m_main_communicator);
247#endif
248
249 if (rank == 0) {
250 tm->info() << "MPI has non blocking collective";
251 tm->info() << "MPI: sizeof(MPI_Count)=" << sizeof(MPI_Count);
252 tm->info() << "MPI: is GPU Aware?=" << arcaneIsAcceleratorAwareMPI();
253 tm->info() << "MPI: init_time (seconds)=" << (end_time - start_time);
254 }
255
256 m_rank = rank;
257 m_nb_rank = size;
258 m_is_parallel = true;
259 auto astat = m_stat->toArccoreStat();
260 m_datatype_list = new MpiDatatypeList(false);
261 m_adapter = new MpiAdapter(tm, astat, m_main_communicator, nullptr);
262 auto c = createBuiltInDispatcher<Byte>(tm, nullptr, m_adapter, m_datatype_list);
263 auto i32 = createBuiltInDispatcher<Int32>(tm, nullptr, m_adapter, m_datatype_list);
264 auto i64 = createBuiltInDispatcher<Int64>(tm, nullptr, m_adapter, m_datatype_list);
265 auto r = createBuiltInDispatcher<Real>(tm, nullptr, m_adapter, m_datatype_list);
266 _setDispatchers(c, i32, i64, r);
267}
268
269/*---------------------------------------------------------------------------*/
270/*---------------------------------------------------------------------------*/
271
274{
275 ITraceMng* tm = m_application->traceMng();
276 tm->debug() << "[MpiParallelSuperMng::internalCreateWorldParallelMng]";
277 if (local_rank != 0)
278 ARCANE_THROW(ArgumentException, "local_rank has to be '0'");
279
280 MPI_Comm comm = MPI_COMM_NULL;
281 MPI_Comm_dup(m_main_communicator, &comm);
282
283 MPI_Comm machine_comm = MPI_COMM_NULL;
284 MPI_Comm_dup(m_machine_communicator, &machine_comm);
285
286 int rank = -1;
287 int nb_rank = -1;
288 MPI_Comm_rank(comm, &rank);
289 MPI_Comm_size(comm, &nb_rank);
290
291 bool is_parallel = nb_rank > 1;
292
293 MpiParallelMngBuildInfo bi(comm, machine_comm);
294 bi.is_parallel = is_parallel;
295 bi.stat = m_stat;
296 bi.trace_mng = tm;
297 bi.timer_mng = nullptr;
298 bi.thread_mng = m_thread_mng;
299 bi.mpi_lock = nullptr;
300
301 tm->debug() << "[MpiParallelSuperMng::internalCreateWorldParallelMng] pm->build()";
303 pm->build();
304 return pm;
305}
306
307/*---------------------------------------------------------------------------*/
308/*---------------------------------------------------------------------------*/
309
311tryAbort()
312{
313 m_application->traceMng()->info() << "MpiParallelSuperMng: rank " << m_rank << " calling MPI_Abort";
314 m_application->traceMng()->flush();
315 MPI_Abort(m_main_communicator, 2);
316}
317
318/*---------------------------------------------------------------------------*/
319/*---------------------------------------------------------------------------*/
320
322barrier()
323{
324 MPI_Barrier(m_main_communicator);
325}
326
327/*---------------------------------------------------------------------------*/
328/*---------------------------------------------------------------------------*/
329
335class MpiSequentialParallelSuperMng
336: public SequentialParallelSuperMng
337{
338 public:
339
340 explicit MpiSequentialParallelSuperMng(const ServiceBuildInfo& sbi)
341 : SequentialParallelSuperMng(sbi, Parallel::Communicator(MPI_COMM_WORLD))
342 {
343 }
344
345 ~MpiSequentialParallelSuperMng() override
346 {
348 }
349
350 void build() override
351 {
352 MpiParallelSuperMng::initMPI(application());
354 }
355};
356
357/*---------------------------------------------------------------------------*/
358/*---------------------------------------------------------------------------*/
359
360ARCANE_REGISTER_SERVICE(MpiParallelSuperMng,
361 ServiceProperty("MpiParallelSuperMng", ST_Application),
362 ARCANE_SERVICE_INTERFACE(IParallelSuperMng));
363
364/*---------------------------------------------------------------------------*/
365/*---------------------------------------------------------------------------*/
366
367ARCANE_REGISTER_SERVICE(MpiSequentialParallelSuperMng,
368 ServiceProperty("MpiSequentialParallelSuperMng", ST_Application),
369 ARCANE_SERVICE_INTERFACE(IParallelSuperMng));
370
371/*---------------------------------------------------------------------------*/
372/*---------------------------------------------------------------------------*/
373
374} // End namespace Arcane
375
376/*---------------------------------------------------------------------------*/
377/*---------------------------------------------------------------------------*/
#define ARCANE_THROW(exception_class,...)
Macro for throwing an exception with formatting.
#define ARCANE_FATAL(...)
Macro throwing a FatalErrorException.
#define ARCANE_SERVICE_INTERFACE(ainterface)
Macro to declare an interface when registering a service.
const CommandLineArguments & commandLineArguments() const
Command line arguments.
Application interface.
virtual const ApplicationInfo & applicationInfo() const =0
Executable information.
virtual ITraceMng * traceMng() const =0
Trace manager.
Interface of a thread manager.
Definition IThreadMng.h:32
virtual TraceMessage info()=0
Stream for an information message.
virtual TraceMessageDbg debug(Trace::eDebugLevel=Trace::Medium)=0
Stream for a debug message.
Communicator for message exchange.
void destroy()
Destroys the instance. It should no longer be used afterward.
Manages the MPI_Datatypes associated with Arcane types.
Message interface for the Type.
bool isParallel() const override
Returns true if the execution is parallel.
void initialize() override
Initializes the instance.
Int32 m_rank
MPI rank in the global communicator of this process.
Int32 m_nb_local_sub_domain
Number of local sub-domains.
Int32 commSize() const override
Returns the total number of processes used.
Integer nbLocalSubDomain() override
Number of subdomains to create locally.
IThreadMng * threadMng() const override
Thread manager.
Int32 commRank() const override
Returns the process number (between 0 and nbProcess()-1).
MP::Communicator m_machine_communicator
MPI Machine Communicator.
Integer masterIORank() const override
Rank of the instance managing input/output (for which isMasterIO() is true).
void barrier() override
Parallelism manager for all allocated resources.
MP::Communicator m_main_communicator
MPI Communicator.
MPI_Comm m_mpi_main_communicator
MPI Communicator.
Ref< IParallelMng > internalCreateWorldParallelMng(Int32 local_rank) override
Creates a parallelism manager for all allocated cores.
Int32 traceRank() const override
Rank of this instance for traces.
bool isMasterIO() const override
Returns true if the instance is a master I/O manager.
IApplication * application() const override
Returns the main manager.
Int32 m_nb_rank
Number of MPI processes in the global communicator.
void tryAbort() override
Attempts to abort.
void build() override
Constructs the instance members.
void * getMPICommunicator() override
Address of the MPI communicator associated with this manager.
Parallel::Communicator communicator() const override
MPI communicator associated with this manager.
IApplication * m_application
Main manager.
void build() override
Build-level construction of the service.
Thread manager in single-threaded mode.
Reference to an instance.
void build() override
Build-level construction of the service.
IApplication * application() const override
Returns the main manager.
Structure containing the information to create a service.
#define ARCANE_REGISTER_SERVICE(aclass, a_service_property,...)
Macro for registering a service.
IStat * createDefaultStat()
Creates a default instance.
Real getRealTime()
Real time used in seconds.
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --
Ref< TrueType > createRef(Args &&... args)
Creates an instance of type TrueType with arguments Args and returns a reference to it.
ARCANE_MPI_EXPORT void arcaneInitializeMPI(int *argc, char ***argv, int wanted_thread_level)
Definition ArcaneMpi.cc:226
ARCANE_MPI_EXPORT bool arcaneIsAcceleratorAwareMPI()
Indicates if the current MPI runtime supports accelerators.
Definition ArcaneMpi.cc:85
Int32 Integer
Type representing an integer.
@ ST_Application
The service is used at the application level.
double Real
Type representing a real number.
ARCANE_MPI_EXPORT void arcaneFinalizeMPI()
Definition ArcaneMpi.cc:232
std::int32_t Int32
Signed integer type of 32 bits.
Info to construct an MpiParallelMng.