Arcane  v4.1.1.0
Documentation utilisateur
Chargement...
Recherche...
Aucune correspondance
RunCommandLoop.h
Aller à la documentation de ce fichier.
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* RunCommandLoop.h (C) 2000-2025 */
9/* */
10/* Macros pour exécuter une boucle sur une commande. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCANE_ACCELERATOR_RUNCOMMANDLOOP_H
13#define ARCANE_ACCELERATOR_RUNCOMMANDLOOP_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16
17// Pour compatibilité avec l'existant
19
20#include "arcane/accelerator/core/RunCommand.h"
21#include "arcane/accelerator/KernelLauncher.h"
22
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26namespace Arcane
27{
28
29template <int N, typename IndexType_>
30constexpr ARCCORE_HOST_DEVICE SimpleForLoopRanges<N, IndexType_>::LoopIndexType
31arcaneGetLoopIndexCudaHip(const SimpleForLoopRanges<N, IndexType_>& bounds, Int32 i)
32{
33 return bounds.getIndices(i);
34}
35
36template <int N, typename IndexType_>
37constexpr ARCCORE_HOST_DEVICE ComplexForLoopRanges<N, IndexType_>::LoopIndexType
38arcaneGetLoopIndexCudaHip(const ComplexForLoopRanges<N, IndexType_>& bounds, Int32 i)
39{
40 return bounds.getIndices(i);
41}
42
43#if defined(ARCANE_COMPILING_SYCL)
44
45template <int N, typename IndexType_>
46SimpleForLoopRanges<N, IndexType_>::LoopIndexType
47arcaneGetLoopIndexSycl(const SimpleForLoopRanges<N, IndexType_>& bounds, sycl::nd_item<1> x)
48{
49 return bounds.getIndices(static_cast<Int32>(x.get_global_id(0)));
50}
51
52template <int N, typename IndexType_>
53ComplexForLoopRanges<N, IndexType_>::LoopIndexType
54arcaneGetLoopIndexSycl(const ComplexForLoopRanges<N, IndexType_>& bounds, sycl::nd_item<1> x)
55{
56 return bounds.getIndices(static_cast<Int32>(x.get_global_id(0)));
57}
58
59template <int N, typename IndexType_>
60SimpleForLoopRanges<N, IndexType_>::LoopIndexType
61arcaneGetLoopIndexSycl(const SimpleForLoopRanges<N, IndexType_>& bounds, sycl::id<1> x)
62{
63 return bounds.getIndices(static_cast<Int32>(x));
64}
65
66template <int N, typename IndexType_>
67ComplexForLoopRanges<N, IndexType_>::LoopIndexType
68arcaneGetLoopIndexSycl(const ComplexForLoopRanges<N, IndexType_>& bounds, sycl::id<1> x)
69{
70 return bounds.getIndices(static_cast<Int32>(x));
71}
72
73#endif
74
75/*---------------------------------------------------------------------------*/
76/*---------------------------------------------------------------------------*/
77
78} // namespace Arcane
79
80/*---------------------------------------------------------------------------*/
81/*---------------------------------------------------------------------------*/
82
83namespace Arcane::Accelerator::Impl
84{
85
86/*---------------------------------------------------------------------------*/
87/*---------------------------------------------------------------------------*/
88
89// On utilise 'Argument dependent lookup' pour trouver 'arcaneGetLoopIndexCudaHip'
90#if defined(ARCANE_COMPILING_CUDA_OR_HIP)
91
92template <typename LoopBoundType, typename Lambda, typename... RemainingArgs> __global__ void
93doDirectGPULambdaArrayBounds2(LoopBoundType bounds, Lambda func, RemainingArgs... remaining_args)
94{
95 // TODO: a supprimer quand il n'y aura plus les anciennes réductions
96 auto privatizer = privatize(func);
97 auto& body = privatizer.privateCopy();
98
99 Int32 i = blockDim.x * blockIdx.x + threadIdx.x;
100
101 using namespace Arcane::Accelerator::Impl;
102
104 if (i < bounds.nbElement()) {
105 body(arcaneGetLoopIndexCudaHip(bounds, i), remaining_args...);
106 }
108}
109
110#endif
111
112/*---------------------------------------------------------------------------*/
113/*---------------------------------------------------------------------------*/
114
115#if defined(ARCANE_COMPILING_SYCL)
116
117//! Boucle N-dimension sans indirection
118template <typename LoopBoundType, typename Lambda, typename... RemainingArgs>
119class DoDirectSYCLLambdaArrayBounds
120{
121 public:
122
123 void operator()(sycl::nd_item<1> x, SmallSpan<std::byte> shared_memory,
124 LoopBoundType bounds, Lambda func,
125 RemainingArgs... remaining_args) const
126 {
127 auto privatizer = privatize(func);
128 auto& body = privatizer.privateCopy();
129 Int32 i = static_cast<Int32>(x.get_global_id(0));
130 SyclKernelRemainingArgsHelper::applyAtBegin(x, shared_memory, remaining_args...);
131 if (i < bounds.nbElement()) {
132 // Si possible, on passe \a x en argument
133 body(arcaneGetLoopIndexSycl(bounds, x), remaining_args...);
134 }
135 SyclKernelRemainingArgsHelper::applyAtEnd(x, shared_memory, remaining_args...);
136 }
137 void operator()(sycl::id<1> x, LoopBoundType bounds, Lambda func) const
138 {
139 auto privatizer = privatize(func);
140 auto& body = privatizer.privateCopy();
141
142 Int32 i = static_cast<Int32>(x);
143 if (i < bounds.nbElement()) {
144 body(arcaneGetLoopIndexSycl(bounds, i));
145 }
146 }
147};
148
149#endif
150
151/*---------------------------------------------------------------------------*/
152/*---------------------------------------------------------------------------*/
153
154} // namespace Arcane::Accelerator::Impl
155
156/*---------------------------------------------------------------------------*/
157/*---------------------------------------------------------------------------*/
158
159namespace Arcane::Accelerator::impl
160{
161
162/*---------------------------------------------------------------------------*/
163/*---------------------------------------------------------------------------*/
164/*!
165 * \brief Applique la lambda \a func sur une boucle \a bounds.
166 *
167 * La lambda \a func est appliqué à la commande \a command.
168 * \a bound est le type de la boucle. Les types supportés sont:
169 *
170 * - SimpleForLoopRanges
171 * - ComplexForLoopRanges
172 *
173 * Les arguments supplémentaires \a other_args sont utilisés pour supporter
174 * des fonctionnalités telles que les réductions (ReducerSum2, ReducerMax2, ...)
175 * ou la gestion de la mémoire locale (via LocalMemory).
176 */
177template <typename LoopBoundType, typename Lambda, typename... RemainingArgs> void
178_applyGenericLoop(RunCommand& command, LoopBoundType bounds,
179 const Lambda& func, const RemainingArgs&... other_args)
180{
181 Int64 vsize = bounds.nbElement();
182 if (vsize == 0)
183 return;
184 Impl::RunCommandLaunchInfo launch_info(command, vsize);
185 const eExecutionPolicy exec_policy = launch_info.executionPolicy();
186 launch_info.beginExecute();
187 switch (exec_policy) {
188 case eExecutionPolicy::CUDA:
189 _applyKernelCUDA(launch_info, ARCANE_KERNEL_CUDA_FUNC(Impl::doDirectGPULambdaArrayBounds2) < LoopBoundType, Lambda, RemainingArgs... >, func, bounds, other_args...);
190 break;
191 case eExecutionPolicy::HIP:
192 _applyKernelHIP(launch_info, ARCANE_KERNEL_HIP_FUNC(Impl::doDirectGPULambdaArrayBounds2) < LoopBoundType, Lambda, RemainingArgs... >, func, bounds, other_args...);
193 break;
194 case eExecutionPolicy::SYCL:
195 _applyKernelSYCL(launch_info, ARCANE_KERNEL_SYCL_FUNC(Impl::DoDirectSYCLLambdaArrayBounds) < LoopBoundType, Lambda, RemainingArgs... > {}, func, bounds, other_args...);
196 break;
197 case eExecutionPolicy::Sequential:
198 arcaneSequentialFor(bounds, func, other_args...);
199 break;
200 case eExecutionPolicy::Thread:
201 arccoreParallelFor(bounds, launch_info.loopRunInfo(), func, other_args...);
202 break;
203 default:
204 ARCANE_FATAL("Invalid execution policy '{0}'", exec_policy);
205 }
206 launch_info.endExecute();
207}
208
209/*---------------------------------------------------------------------------*/
210/*---------------------------------------------------------------------------*/
211/*!
212 * \brief Classe pour conserver les arguments d'une RunCommand.
213 *
214 * `LoopBoundType` est le type de la boucle. Par exemple, ce peut être
215 * une SimpleForLoopRanges ou une ComplexForLoopRanges.
216 */
217template <typename LoopBoundType, typename... RemainingArgs>
218class ArrayBoundRunCommand
219{
220 public:
221
222 ArrayBoundRunCommand(RunCommand& command, const LoopBoundType& bounds)
223 : m_command(command)
224 , m_bounds(bounds)
225 {
226 }
227 ArrayBoundRunCommand(RunCommand& command, const LoopBoundType& bounds, const std::tuple<RemainingArgs...>& args)
228 : m_command(command)
229 , m_bounds(bounds)
230 , m_remaining_args(args)
231 {
232 }
233 RunCommand& m_command;
234 LoopBoundType m_bounds;
235 std::tuple<RemainingArgs...> m_remaining_args;
236};
237
238/*---------------------------------------------------------------------------*/
239/*---------------------------------------------------------------------------*/
240/*!
241 * \brief Classe pour gérer les paramètres supplémentaires des commandes.
242 */
243template <typename LoopBoundType, typename... RemainingArgs>
244class ExtendedArrayBoundLoop
245{
246 public:
247
248 ExtendedArrayBoundLoop(const LoopBoundType& bounds, RemainingArgs... args)
249 : m_bounds(bounds)
250 , m_remaining_args(args...)
251 {
252 }
253 LoopBoundType m_bounds;
254 std::tuple<RemainingArgs...> m_remaining_args;
255};
256
257template <typename LoopBoundType, typename... RemainingArgs> auto
258makeExtendedArrayBoundLoop(const LoopBoundType& bounds, RemainingArgs... args)
259-> ExtendedArrayBoundLoop<LoopBoundType, RemainingArgs...>
260{
261 return ExtendedArrayBoundLoop<LoopBoundType, RemainingArgs...>(bounds, args...);
262}
263
264template <typename LoopBoundType, typename... RemainingArgs> auto
265makeExtendedLoop(const LoopBoundType& bounds, RemainingArgs... args)
266-> ExtendedArrayBoundLoop<LoopBoundType, RemainingArgs...>
267{
268 return ExtendedArrayBoundLoop<LoopBoundType, RemainingArgs...>(bounds, args...);
269}
270
271/*---------------------------------------------------------------------------*/
272/*---------------------------------------------------------------------------*/
273/*!
274 * \brief Applique la lambda \a func sur l'intervalle d'itération donnée par \a bounds.
275 *
276 * \a other_args contient les éventuels arguments supplémentaires passés à
277 * la lambda.
278 */
279template <typename LoopBoundType, typename Lambda, typename... RemainingArgs> void
280runExtended(RunCommand& command, LoopBoundType bounds,
281 const Lambda& func, const std::tuple<RemainingArgs...>& other_args)
282{
283 std::apply([&](auto... vs) { _applyGenericLoop(command, bounds, func, vs...); }, other_args);
284}
285
286/*---------------------------------------------------------------------------*/
287/*---------------------------------------------------------------------------*/
288
289//! Applique la lambda \a func sur l'intervalle d'itération donnée par \a bounds
290template <typename LoopBoundType, typename Lambda> void
291runGeneric(RunCommand& command, const LoopBoundType& bounds, const Lambda& func)
292{
293 impl::_applyGenericLoop(command, bounds, func);
294}
295
296// Spécialisation pour ArrayBound.
297//! Applique la lambda \a func sur l'intervalle d'itération donnée par \a bounds
298template <typename ExtentType, typename Lambda> void
299runGeneric(RunCommand& command, ArrayBounds<ExtentType> bounds, const Lambda& func)
300{
301 impl::_applyGenericLoop(command, SimpleForLoopRanges<ExtentType::rank()>(bounds), func);
302}
303
304/*---------------------------------------------------------------------------*/
305/*---------------------------------------------------------------------------*/
306
307} // namespace Arcane::Accelerator::impl
308
309namespace Arcane::Accelerator
310{
311
312/*---------------------------------------------------------------------------*/
313/*---------------------------------------------------------------------------*/
314
315// TODO: a rendre obsolète et supprimer
316//! Applique la lambda \a func sur l'intervalle d'itération donnée par \a bounds
317template <typename ExtentType, typename Lambda> void
318run(RunCommand& command, ArrayBounds<ExtentType> bounds, const Lambda& func)
319{
320 impl::_applyGenericLoop(command, SimpleForLoopRanges<ExtentType::rank()>(bounds), func);
321}
322
323/*---------------------------------------------------------------------------*/
324/*---------------------------------------------------------------------------*/
325
326// TODO: a rendre obsolète et supprimer
327//! Applique la lambda \a func sur l'intervalle d'itération donnée par \a bounds
328template <int N, typename Lambda> void
329run(RunCommand& command, SimpleForLoopRanges<N, Int32> bounds, const Lambda& func)
330{
331 impl::_applyGenericLoop(command, bounds, func);
332}
333
334/*---------------------------------------------------------------------------*/
335/*---------------------------------------------------------------------------*/
336
337// TODO: a rendre obsolète et supprimer
338//! Applique la lambda \a func sur l'intervalle d'itération donnée par \a bounds
339template <int N, typename Lambda> void
340run(RunCommand& command, ComplexForLoopRanges<N, Int32> bounds, const Lambda& func)
341{
342 impl::_applyGenericLoop(command, bounds, func);
343}
344
345/*---------------------------------------------------------------------------*/
346/*---------------------------------------------------------------------------*/
347
348template <typename ExtentType> auto
349operator<<(RunCommand& command, const ArrayBounds<ExtentType>& bounds)
351{
352 return { command, bounds };
353}
354
355template <typename LoopBoundType, typename... RemainingArgs> auto
356operator<<(RunCommand& command, const impl::ExtendedArrayBoundLoop<LoopBoundType, RemainingArgs...>& ex_loop)
357 -> impl::ArrayBoundRunCommand<LoopBoundType, RemainingArgs...>
358{
359 return { command, ex_loop.m_bounds, ex_loop.m_remaining_args };
360}
361
363operator<<(RunCommand& command, const SimpleForLoopRanges<N, Int32>& bounds)
364{
365 return { command, bounds };
366}
367
369operator<<(RunCommand& command, const ComplexForLoopRanges<N, Int32>& bounds)
370{
371 return { command, bounds };
372}
373
374/*---------------------------------------------------------------------------*/
375/*---------------------------------------------------------------------------*/
376
377} // namespace Arcane::Accelerator
378
379/*---------------------------------------------------------------------------*/
380/*---------------------------------------------------------------------------*/
381
382namespace Arcane::Accelerator::impl
383{
384
385/*---------------------------------------------------------------------------*/
386/*---------------------------------------------------------------------------*/
387
388template <typename LoopBoundType, typename Lambda, typename... RemainingArgs>
389inline void operator<<(ArrayBoundRunCommand<LoopBoundType, RemainingArgs...>&& nr, const Lambda& f)
390{
391 if constexpr (sizeof...(RemainingArgs) > 0) {
392 runExtended(nr.m_command, nr.m_bounds, f, nr.m_remaining_args);
393 }
394 else {
395 runGeneric(nr.m_command, nr.m_bounds, f);
396 }
397}
398
399/*---------------------------------------------------------------------------*/
400/*---------------------------------------------------------------------------*/
401
402} // End namespace Arcane::Accelerator
403
404/*---------------------------------------------------------------------------*/
405/*---------------------------------------------------------------------------*/
406
407//! Boucle sur accélérateur
408#define RUNCOMMAND_LOOP(iter_name, bounds, ...) \
409 A_FUNCINFO << ::Arcane::Accelerator::impl::makeExtendedLoop(bounds __VA_OPT__(, __VA_ARGS__)) \
410 << [=] ARCCORE_HOST_DEVICE(typename decltype(bounds)::LoopIndexType iter_name __VA_OPT__(ARCANE_RUNCOMMAND_REDUCER_FOR_EACH(__VA_ARGS__)))
411
412//! Boucle sur accélérateur
413#define RUNCOMMAND_LOOPN(iter_name, N, ...) \
414 A_FUNCINFO << Arcane::ArrayBounds<typename Arcane::MDDimType<N>::DimType>(__VA_ARGS__) << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<N> iter_name)
415
416//! Boucle 2D sur accélérateur
417#define RUNCOMMAND_LOOP2(iter_name, x1, x2) \
418 A_FUNCINFO << Arcane::ArrayBounds<MDDim2>(x1, x2) << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<2> iter_name)
419
420//! Boucle 3D sur accélérateur
421#define RUNCOMMAND_LOOP3(iter_name, x1, x2, x3) \
422 A_FUNCINFO << Arcane::ArrayBounds<MDDim3>(x1, x2, x3) << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<3> iter_name)
423
424//! Boucle 4D sur accélérateur
425#define RUNCOMMAND_LOOP4(iter_name, x1, x2, x3, x4) \
426 A_FUNCINFO << Arcane::ArrayBounds<MDDim4>(x1, x2, x3, x4) << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<4> iter_name)
427
428/*!
429 * \brief Boucle 1D sur accélérateur avec arguments supplémentaires.
430 *
431 * Cette macro permet d'ajouter des arguments. Ces arguments peuvent être
432 * des valeurs à réduire (telles que les classes Arcane::Accelerator::ReducerSum2,
433 * Arcane::Accelerator::ReducerMax2 ou Arcane::Accelerator::ReducerMin2) ou des données
434 * en mémoire locale (via la classe Arcane::Accelerator::RunCommandLocalMemory).
435 */
436#define RUNCOMMAND_LOOP1(iter_name, x1, ...) \
437 A_FUNCINFO << ::Arcane::Accelerator::impl::makeExtendedArrayBoundLoop(::Arcane::SimpleForLoopRanges<1>(x1) __VA_OPT__(, __VA_ARGS__)) \
438 << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<1> iter_name __VA_OPT__(ARCANE_RUNCOMMAND_REDUCER_FOR_EACH(__VA_ARGS__)))
439
440/*!
441 * \brief Boucle sur accélérateur pour exécution avec un seul thread.
442 */
443#define RUNCOMMAND_SINGLE(...) \
444 A_FUNCINFO << ::Arcane::Accelerator::impl::makeExtendedArrayBoundLoop(::Arcane::SimpleForLoopRanges<1>(1) __VA_OPT__(, __VA_ARGS__)) \
445 << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<1> __VA_OPT__(ARCANE_RUNCOMMAND_REDUCER_FOR_EACH(__VA_ARGS__)))
446
447
448/*---------------------------------------------------------------------------*/
449/*---------------------------------------------------------------------------*/
450
451/*---------------------------------------------------------------------------*/
452/*---------------------------------------------------------------------------*/
453
454#endif
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Classes, Types et macros pour gérer la concurrence.
void runGeneric(RunCommand &command, const LoopBoundType &bounds, const Lambda &func)
Applique la lambda func sur l'intervalle d'itération donnée par bounds.
void _applyGenericLoop(RunCommand &command, LoopBoundType bounds, const Lambda &func, const RemainingArgs &... other_args)
Applique la lambda func sur une boucle bounds.
void runExtended(RunCommand &command, LoopBoundType bounds, const Lambda &func, const std::tuple< RemainingArgs... > &other_args)
Applique la lambda func sur l'intervalle d'itération donnée par bounds.
static ARCCORE_DEVICE void applyAtEnd(Int32 index, RemainingArgs &... remaining_args)
Applique les fonctors des arguments additionnels en fin de kernel.
static ARCCORE_DEVICE void applyAtBegin(Int32 index, RemainingArgs &... remaining_args)
Applique les fonctors des arguments additionnels en début de kernel.
Classe pour conserver les arguments d'une RunCommand.
Classe pour gérer les paramètres supplémentaires des commandes.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
std::int64_t Int64
Type entier signé sur 64 bits.
std::ostream & operator<<(std::ostream &ostr, eItemKind item_kind)
Opérateur de sortie sur un flot.
std::int32_t Int32
Type entier signé sur 32 bits.