12#ifndef ARCANE_ACCELERATOR_RUNCOMMANDLOOP_H
13#define ARCANE_ACCELERATOR_RUNCOMMANDLOOP_H
20#include "arcane/accelerator/RunCommand.h"
21#include "arcane/accelerator/KernelLauncher.h"
26namespace Arcane::Accelerator::impl
38template <
int N,
template <
int T,
typename>
class LoopBoundType,
typename Lambda,
typename... RemainingArgs>
void
40 const Lambda& func,
const RemainingArgs&... other_args)
42 Int64 vsize = bounds.nbElement();
48 switch (exec_policy) {
50 _applyKernelCUDA(launch_info, ARCANE_KERNEL_CUDA_FUNC(impl::doDirectGPULambdaArrayBounds2) < LoopBoundType<N, Int32>, Lambda, RemainingArgs... >, func, bounds, other_args...);
53 _applyKernelHIP(launch_info, ARCANE_KERNEL_HIP_FUNC(impl::doDirectGPULambdaArrayBounds2) < LoopBoundType<N, Int32>, Lambda, RemainingArgs... >, func, bounds, other_args...);
56 _applyKernelSYCL(launch_info, ARCANE_KERNEL_SYCL_FUNC(impl::DoDirectSYCLLambdaArrayBounds) < LoopBoundType<N, Int32>, Lambda, RemainingArgs... > {}, func, bounds, other_args...);
65 ARCANE_FATAL(
"Invalid execution policy '{0}'", exec_policy);
73template <
typename LoopBoundType,
typename... RemainingArgs>
74class ExtendedArrayBoundLoop
78 ExtendedArrayBoundLoop(
const LoopBoundType& bounds, RemainingArgs... args)
80 , m_remaining_args(args...)
83 LoopBoundType m_bounds;
84 std::tuple<RemainingArgs...> m_remaining_args;
87template <
typename ExtentType,
typename... RemainingArgs>
auto
106template <
typename ExtentType,
typename Lambda>
void
116template <
int N,
typename Lambda>
void
126template <
int N,
typename Lambda>
void
133template <
int N,
template <
int T,
typename>
class LoopBoundType,
typename Lambda,
typename... RemainingArgs>
void
135 const Lambda& func,
const std::tuple<RemainingArgs...>& other_args)
143template <
int N,
typename LoopBoundType,
typename... RemainingArgs>
144class ArrayBoundRunCommand
148 ArrayBoundRunCommand(
RunCommand& command,
const LoopBoundType& bounds)
153 ArrayBoundRunCommand(
RunCommand& command,
const LoopBoundType& bounds,
const std::tuple<RemainingArgs...>& args)
156 , m_remaining_args(args)
160 LoopBoundType m_bounds;
161 std::tuple<RemainingArgs...> m_remaining_args;
164template <
typename ExtentType>
auto
168 return { command, bounds };
171template <
typename ExtentType,
typename... RemainingArgs>
auto
172operator<<(RunCommand& command,
const impl::ExtendedArrayBoundLoop<ExtentType, RemainingArgs...>& ex_loop)
173-> ArrayBoundRunCommand<1, SimpleForLoopRanges<1, Int32>, RemainingArgs...>
175 return { command, ex_loop.m_bounds, ex_loop.m_remaining_args };
181 return { command, bounds };
187 return { command, bounds };
190template <
int N,
template <
int,
typename>
class ForLoopBoundType,
typename Lambda,
typename... RemainingArgs>
193 if constexpr (
sizeof...(RemainingArgs) > 0) {
194 runExtended(nr.m_command, nr.m_bounds, f, nr.m_remaining_args);
197 run(nr.m_command, nr.m_bounds, f);
210#define RUNCOMMAND_LOOP(iter_name, bounds) \
211 A_FUNCINFO << bounds << [=] ARCCORE_HOST_DEVICE(typename decltype(bounds)::IndexType iter_name)
214#define RUNCOMMAND_LOOPN(iter_name, N, ...) \
215 A_FUNCINFO << Arcane::ArrayBounds<typename Arcane::MDDimType<N>::DimType>(__VA_ARGS__) << [=] ARCCORE_HOST_DEVICE(Arcane::ArrayIndex<N> iter_name)
218#define RUNCOMMAND_LOOP2(iter_name, x1, x2) \
219 A_FUNCINFO << Arcane::ArrayBounds<MDDim2>(x1, x2) << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<2> iter_name)
222#define RUNCOMMAND_LOOP3(iter_name, x1, x2, x3) \
223 A_FUNCINFO << Arcane::ArrayBounds<MDDim3>(x1, x2, x3) << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<3> iter_name)
226#define RUNCOMMAND_LOOP4(iter_name, x1, x2, x3, x4) \
227 A_FUNCINFO << Arcane::ArrayBounds<MDDim4>(x1, x2, x3, x4) << [=] ARCCORE_HOST_DEVICE(Arcane::MDIndex<4> iter_name)
236#define RUNCOMMAND_LOOP1(iter_name, x1, ...) \
237 A_FUNCINFO << ::Arcane::Accelerator::impl::makeExtendedArrayBoundLoop(Arcane::ArrayBounds<MDDim1>(x1) __VA_OPT__(, __VA_ARGS__)) \
238 << [=] ARCCORE_HOST_DEVICE(Arcane::ArrayIndex<1> iter_name __VA_OPT__(ARCANE_RUNCOMMAND_REDUCER_FOR_EACH(__VA_ARGS__)))
243#define RUNCOMMAND_SINGLE(...) \
244 A_FUNCINFO << ::Arcane::Accelerator::impl::makeExtendedArrayBoundLoop(Arcane::ArrayBounds<MDDim1>(1) __VA_OPT__(, __VA_ARGS__)) \
245 << [=] ARCCORE_HOST_DEVICE(Arcane::ArrayIndex<1> __VA_OPT__(ARCANE_RUNCOMMAND_REDUCER_FOR_EACH(__VA_ARGS__)))
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Classes, Types et macros pour gérer la concurrence.
void _applyGenericLoop(RunCommand &command, LoopBoundType< N, Int32 > bounds, const Lambda &func, const RemainingArgs &... other_args)
Applique la lambda func sur une boucle bounds.
Gestion d'une commande sur accélérateur.
Object temporaire pour conserver les informations d'exécution d'une commande et regrouper les tests.
void beginExecute()
Indique qu'on commence l'exécution de la commande.
void endExecute()
Signale la fin de l'exécution.
const ForLoopRunInfo & loopRunInfo() const
Informations d'exécution de la boucle.
Interval d'itération complexe.
Interval d'itération simple.
Espace de nom pour l'utilisation des accélérateurs.
void runExtended(RunCommand &command, LoopBoundType< N, Int32 > bounds, const Lambda &func, const std::tuple< RemainingArgs... > &other_args)
Applique la lambda func sur l'intervalle d'itération donnée par bounds.
std::ostream & operator<<(std::ostream &o, eExecutionPolicy exec_policy)
Affiche le nom de la politique d'exécution.
eExecutionPolicy
Politique d'exécution pour un Runner.
@ SYCL
Politique d'exécution utilisant l'environnement SYCL.
@ HIP
Politique d'exécution utilisant l'environnement HIP.
@ CUDA
Politique d'exécution utilisant l'environnement CUDA.
@ Sequential
Politique d'exécution séquentielle.
@ Thread
Politique d'exécution multi-thread.
std::int64_t Int64
Type entier signé sur 64 bits.
void arcaneSequentialFor(LoopBoundType< 1, IndexType > bounds, const Lambda &func, ReducerArgs... reducer_args)
Applique le fonctor func sur une boucle 1D.
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération donné par loo...
std::int32_t Int32
Type entier signé sur 32 bits.