12#ifndef ARCANE_ACCELERATOR_RUNCOMMANDLOOP_H
13#define ARCANE_ACCELERATOR_RUNCOMMANDLOOP_H
17#include "arcane/utils/ArcaneCxx20.h"
19#include "arcane/accelerator/RunCommand.h"
20#include "arcane/accelerator/RunQueueInternal.h"
25namespace Arcane::Accelerator::impl
37template <
int N,
template <
int T,
typename>
class LoopBoundType,
typename Lambda,
typename... RemainingArgs>
void
39 const Lambda& func,
const RemainingArgs&... other_args)
41 Int64 vsize = bounds.nbElement();
47 switch (exec_policy) {
49 _applyKernelCUDA(launch_info, ARCANE_KERNEL_CUDA_FUNC(impl::doDirectGPULambdaArrayBounds2) < LoopBoundType<N, Int32>, Lambda, RemainingArgs... >, func, bounds, other_args...);
52 _applyKernelHIP(launch_info, ARCANE_KERNEL_HIP_FUNC(impl::doDirectGPULambdaArrayBounds2) < LoopBoundType<N, Int32>, Lambda, RemainingArgs... >, func, bounds, other_args...);
55 _applyKernelSYCL(launch_info, ARCANE_KERNEL_SYCL_FUNC(impl::DoDirectSYCLLambdaArrayBounds) < LoopBoundType<N, Int32>, Lambda, RemainingArgs... > {}, func, bounds, other_args...);
64 ARCANE_FATAL(
"Invalid execution policy '{0}'", exec_policy);
72template <
typename LoopBoundType,
typename... RemainingArgs>
79 , m_remaining_args(args...)
82 LoopBoundType m_bounds;
83 std::tuple<RemainingArgs...> m_remaining_args;
86template <
typename ExtentType,
typename... RemainingArgs>
auto
105template <
typename ExtentType,
typename Lambda>
void
115template <
int N,
typename Lambda>
void
118 impl::_applyGenericLoop(command, bounds, func);
125template <
int N,
typename Lambda>
void
128 impl::_applyGenericLoop(command, bounds, func);
132template <
int N,
template <
int T,
typename>
class LoopBoundType,
typename Lambda,
typename... RemainingArgs>
void
134 const Lambda& func,
const std::tuple<RemainingArgs...>& other_args)
136 std::apply([&](
auto... vs) { impl::_applyGenericLoop(command, bounds, func, vs...); }, other_args);
142template <
int N,
typename LoopBoundType,
typename... RemainingArgs>
155 , m_remaining_args(args)
159 LoopBoundType m_bounds;
160 std::tuple<RemainingArgs...> m_remaining_args;
163template <
typename ExtentType>
auto
167 return { command, bounds };
170template <
typename ExtentType,
typename... RemainingArgs>
auto
171operator<<(RunCommand& command,
const impl::ExtendedArrayBoundLoop<ExtentType, RemainingArgs...>& ex_loop)
172-> ArrayBoundRunCommand<1, SimpleForLoopRanges<1, Int32>, RemainingArgs...>
174 return { command, ex_loop.m_bounds, ex_loop.m_remaining_args };
177template <
int N> ArrayBoundRunCommand<N, SimpleForLoopRanges<N>>
178operator<<(RunCommand& command,
const SimpleForLoopRanges<N, Int32>& bounds)
180 return { command, bounds };
183template <
int N> ArrayBoundRunCommand<N, ComplexForLoopRanges<N>>
184operator<<(RunCommand& command,
const ComplexForLoopRanges<N, Int32>& bounds)
186 return { command, bounds };
189template <
int N,
template <
int,
typename>
class ForLoopBoundType,
typename Lambda,
typename... RemainingArgs>
190void operator<<(ArrayBoundRunCommand<N, ForLoopBoundType<N, Int32>, RemainingArgs...>&& nr,
const Lambda& f)
192 if constexpr (
sizeof...(RemainingArgs) > 0) {
193 runExtended(nr.m_command, nr.m_bounds, f, nr.m_remaining_args);
196 run(nr.m_command, nr.m_bounds, f);
209#define RUNCOMMAND_LOOP(iter_name, bounds) \
210 A_FUNCINFO << bounds << [=] ARCCORE_HOST_DEVICE(typename decltype(bounds)::IndexType iter_name)
213#define RUNCOMMAND_LOOPN(iter_name, N, ...) \
214 A_FUNCINFO << Arcane::ArrayBounds<typename Arcane::MDDimType<N>::DimType>(__VA_ARGS__) << [=] ARCCORE_HOST_DEVICE(Arcane::ArrayIndex<N> iter_name)
217#define RUNCOMMAND_LOOP2(iter_name, x1, x2) \
218 A_FUNCINFO << Arcane::ArrayBounds<MDDim2>(x1, x2) << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<2> iter_name)
221#define RUNCOMMAND_LOOP3(iter_name, x1, x2, x3) \
222 A_FUNCINFO << Arcane::ArrayBounds<MDDim3>(x1, x2, x3) << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<3> iter_name)
225#define RUNCOMMAND_LOOP4(iter_name, x1, x2, x3, x4) \
226 A_FUNCINFO << Arcane::ArrayBounds<MDDim4>(x1, x2, x3, x4) << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<4> iter_name)
235#define RUNCOMMAND_LOOP1(iter_name, x1, ...) \
236 A_FUNCINFO << ::Arcane::Accelerator::impl::makeExtendedArrayBoundLoop(Arcane::ArrayBounds<MDDim1>(x1) __VA_OPT__(, __VA_ARGS__)) \
237 << [=] ARCCORE_HOST_DEVICE(Arcane::ArrayIndex<1> iter_name __VA_OPT__(ARCANE_RUNCOMMAND_REDUCER_FOR_EACH(__VA_ARGS__)))
242#define RUNCOMMAND_SINGLE(...) \
243 A_FUNCINFO << ::Arcane::Accelerator::impl::makeExtendedArrayBoundLoop(Arcane::ArrayBounds<MDDim1>(1) __VA_OPT__(, __VA_ARGS__)) \
244 << [=] ARCCORE_HOST_DEVICE(Arcane::ArrayIndex<1> __VA_OPT__(ARCANE_RUNCOMMAND_REDUCER_FOR_EACH(__VA_ARGS__)))
251#define RUNCOMMAND_LOOP1_EX(iter_name, x1, ...) \
252 RUNCOMMAND_LOOP1(iter_name, x1, __VA_ARGS__)
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
void _applyGenericLoop(RunCommand &command, LoopBoundType< N, Int32 > bounds, const Lambda &func, const RemainingArgs &... other_args)
Applique la lambda func sur une boucle bounds.
Gestion d'une commande sur accélérateur.
void beginExecute()
Indique qu'on commence l'exécution de la commande.
ParallelLoopOptions computeParallelLoopOptions() const
Calcul les informations pour les boucles multi-thread.
void endExecute()
Signale la fin de l'exécution.
Interval d'itération simple.
void arcaneParallelFor(Integer i0, Integer size, InstanceType *itype, void(InstanceType::*lambda_function)(Integer i0, Integer size))
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération [i0,...
Espace de nom pour l'utilisation des accélérateurs.
void runExtended(RunCommand &command, LoopBoundType< N, Int32 > bounds, const Lambda &func, const std::tuple< RemainingArgs... > &other_args)
Applique la lambda func sur l'intervalle d'itération donnée par bounds.
std::ostream & operator<<(std::ostream &o, eExecutionPolicy exec_policy)
Affiche le nom de la politique d'exécution.
eExecutionPolicy
Politique d'exécution pour un Runner.
@ SYCL
Politique d'exécution utilisant l'environnement SYCL.
@ HIP
Politique d'exécution utilisant l'environnement HIP.
@ CUDA
Politique d'exécution utilisant l'environnement CUDA.
@ Sequential
Politique d'exécution séquentielle.
@ Thread
Politique d'exécution multi-thread.
void arcaneSequentialFor(LoopBoundType< 1, IndexType > bounds, const Lambda &func, ReducerArgs... reducer_args)
Applique le fonctor func sur une boucle 1D.
std::int64_t Int64
Type entier signé sur 64 bits.