12#ifndef ARCANE_ACCELERATOR_RUNCOMMANDENUMERATE_H
13#define ARCANE_ACCELERATOR_RUNCOMMANDENUMERATE_H
17#include "arccore/common/accelerator/RunCommand.h"
18#include "arcane/accelerator/KernelLauncher.h"
21#include "arcane/core/ItemGroup.h"
24#include "arccore/common/HostKernelRemainingArgsHelper.h"
28#if defined(ARCCORE_EXPERIMENTAL_GRID_STRIDE)
29#include "arccore/common/StridedLoopRanges.h"
35namespace Arcane::Accelerator::Impl
44template <
typename TraitsType_>
45class ItemLocalIdsLoopRanges
49 using TraitsType = TraitsType_;
50 using BuilderType = TraitsType::BuilderType;
58 constexpr Int64 nbElement()
const {
return m_ids.size(); }
68#if defined(ARCCORE_COMPILING_CUDA_OR_HIP)
70template <
typename LoopBoundsType,
typename Lambda,
typename... RemainingArgs> __global__
void
71doIndirectGPULambda2(LoopBoundsType bounds, Lambda func, RemainingArgs... remaining_args)
74 auto privatizer = privatize(func);
75 auto& body = privatizer.privateCopy();
77 Int32 i = blockDim.x * blockIdx.x + threadIdx.x;
81 if constexpr (
requires { bounds.nbStride(); }) {
84 using BuilderType = LoopBoundsType::LoopBoundType::BuilderType;
86 Int32 nb_grid_stride = bounds.nbStride();
87 Int32 offset = blockDim.x * gridDim.x;
88 Int64 nb_item = bounds.nbOriginalElement();
91 for (
Int32 k = 0; k < nb_grid_stride; ++k) {
92 Int32 true_i = i + (offset * k);
93 if (true_i < nb_item) {
95 body(BuilderType::create(true_i, lid), remaining_args...);
100 using BuilderType = LoopBoundsType::BuilderType;
103 SmallSpan<const Int32> ids = bounds.ids();
104 if (i < ids.size()) {
106 body(BuilderType::create(i, lid), remaining_args...);
121#if defined(ARCCORE_COMPILING_SYCL)
124template <
typename TraitsType,
typename Lambda,
typename... RemainingArgs>
125class DoIndirectSYCLLambda
129 void operator()(sycl::nd_item<1> x, SmallSpan<std::byte> shared_memory,
130 SmallSpan<const Int32> ids, Lambda func,
131 RemainingArgs... remaining_args)
const
133 using BuilderType = TraitsType::BuilderType;
135 auto privatizer = privatize(func);
136 auto& body = privatizer.privateCopy();
138 Int32 i =
static_cast<Int32
>(x.get_global_id(0));
139 SyclKernelRemainingArgsHelper::applyAtBegin(x, shared_memory, remaining_args...);
140 if (i < ids.size()) {
142 body(BuilderType::create(i, lid), remaining_args...);
144 SyclKernelRemainingArgsHelper::applyAtEnd(x, shared_memory, remaining_args...);
146 void operator()(sycl::id<1> x, SmallSpan<const Int32> ids, Lambda func)
const
148 using BuilderType = TraitsType::BuilderType;
150 auto privatizer = privatize(func);
151 auto& body = privatizer.privateCopy();
153 Int32 i =
static_cast<Int32
>(x);
154 if (i < ids.size()) {
156 body(BuilderType::create(i, lid));
185class IteratorWithIndex
190 constexpr ARCCORE_HOST_DEVICE IteratorWithIndex(
Int32 i, T v)
197 constexpr ARCCORE_HOST_DEVICE
Int32 index()
const {
return m_index; }
198 constexpr ARCCORE_HOST_DEVICE T value()
const {
return m_value; }
211namespace Arcane::impl
238 constexpr ARCCORE_HOST_DEVICE
static T create(
Int32, T value)
249namespace Arcane::Accelerator::impl
281 using ItemType =
typename ItemLocalIdT<T>::ItemType;
292 using ItemType =
typename T::ItemType;
310template <
typename ItemType>
311class RunCommandItemContainer
316 : m_item_group(group)
320 : m_item_vector_view(item_vector_view)
321 , m_unpadded_vector_view(item_vector_view)
327 Int32 size()
const {
return m_unpadded_vector_view.size(); }
331 if (!m_item_group.null())
332 return m_item_group._paddedView();
333 return m_item_vector_view;
355template <RunCommandEnumerateIteratorConcept IteratorValueType_>
356class RunCommandItemEnumeratorTraitsT
361 using ItemType =
typename SubTraitsType::ItemType;
363 using ValueType =
typename SubTraitsType::ValueType;
365 using BuilderType =
typename SubTraitsType::BuilderType;
370 : m_item_container(group)
373 : m_item_container(vector_view)
385template <
typename TraitsType,
typename ContainerType,
typename Lambda,
typename... RemainingArgs>
386void _doItemsLambda(
Int32 base_index,
ContainerType sub_items,
const Lambda& func, RemainingArgs... remaining_args)
388 using ItemType = TraitsType::ItemType;
389 using BuilderType = TraitsType::BuilderType;
391 auto privatizer = Impl::privatize(func);
392 auto& body = privatizer.privateCopy();
397 body(BuilderType::create(iitem.index() + base_index,
LocalIdType(iitem.itemLocalId())), remaining_args...);
408template <
typename TraitsType,
typename Lambda,
typename... RemainingArgs>
void
410 const Lambda& func,
const RemainingArgs&... remaining_args)
416 using ItemType =
typename TraitsType::ItemType;
419 [[maybe_unused]] LoopBoundType bounds(ids);
421#if defined(ARCCORE_EXPERIMENTAL_GRID_STRIDE) && defined(ARCCORE_COMPILING_CUDA_OR_HIP)
423 TrueLoopBoundType bounds2(command.
nbStride(), bounds);
426 using TrueLoopBoundType = LoopBoundType;
427 [[maybe_unused]]
const TrueLoopBoundType& bounds2 = bounds;
434 switch (exec_policy) {
436 ARCCORE_KERNEL_CUDA_FUNC((Impl::doIndirectGPULambda2<TrueLoopBoundType, Lambda, RemainingArgs...>),
437 launch_info, func, bounds2, remaining_args...);
440 ARCCORE_KERNEL_HIP_FUNC((Impl::doIndirectGPULambda2<TrueLoopBoundType, Lambda, RemainingArgs...>),
441 launch_info, func, bounds2, remaining_args...);
444 ARCCORE_KERNEL_SYCL_FUNC((Impl::DoIndirectSYCLLambda<TraitsType, Lambda, RemainingArgs...>{}),
445 launch_info, func, ids, remaining_args...);
448 impl::_doItemsLambda<TraitsType>(0, items.paddedView(), func, remaining_args...);
453 impl::_doItemsLambda<TraitsType>(base_index, sub_items, func, remaining_args...);
457 ARCCORE_FATAL(
"Invalid execution policy '{0}'", exec_policy);
465template <
typename TraitsType,
typename... RemainingArgs>
466class ItemRunCommandArgs
470 ItemRunCommandArgs(
const TraitsType& traits,
const RemainingArgs&... remaining_args)
472 , m_remaining_args(remaining_args...)
479 std::tuple<RemainingArgs...> m_remaining_args;
493template <
typename TraitsType,
typename Lambda>
void
494run(
RunCommand& command,
const TraitsType& traits,
const Lambda& func)
502template <
typename TraitsType,
typename... RemainingArgs>
507 ItemRunCommand(
RunCommand& command,
const TraitsType& traits)
513 ItemRunCommand(
RunCommand& command,
const TraitsType& traits,
const std::tuple<RemainingArgs...>& remaining_args)
516 , m_remaining_args(remaining_args)
524 std::tuple<RemainingArgs...> m_remaining_args;
530template <
typename ItemType>
auto
542template <
typename ItemType>
auto
545 using TraitsType = impl::RunCommandItemEnumeratorTraitsT<ItemType>;
551template <
typename ItemType>
auto
552operator<<(RunCommand& command,
const ItemGroupT<ItemType>& items)
554 using TraitsType = impl::RunCommandItemEnumeratorTraitsT<ItemType>;
555 return ItemRunCommand<TraitsType>(command, TraitsType(items));
558template <
typename TraitsType,
typename Lambda>
561 run(nr.m_command, nr.m_traits, f);
564template <
typename TraitsType,
typename... RemainingArgs>
auto
567 return ItemRunCommand<TraitsType, RemainingArgs...>(command, args.m_traits, args.m_remaining_args);
570template <
typename TraitsType,
typename Lambda,
typename... RemainingArgs>
573 if constexpr (
sizeof...(RemainingArgs) > 0) {
577 run(nr.m_command, nr.m_traits, f);
585namespace Arcane::Accelerator::impl
591template <
typename ItemTypeName,
typename ItemContainerType,
typename... RemainingArgs>
auto
592makeExtendedItemEnumeratorLoop(
const ItemContainerType& container_type,
593 const RemainingArgs&... remaining_args)
596 return ItemRunCommandArgs<TraitsType, RemainingArgs...>(TraitsType(container_type), remaining_args...);
626#define RUNCOMMAND_ENUMERATE(ItemTypeName, iter_name, item_group, ...) \
627 A_FUNCINFO << ::Arcane::Accelerator::impl::makeExtendedItemEnumeratorLoop<ItemTypeName>(item_group __VA_OPT__(, __VA_ARGS__)) \
628 << [=] ARCCORE_HOST_DEVICE(::Arcane::Accelerator::impl::RunCommandItemEnumeratorTraitsT<ItemTypeName>::ValueType iter_name \
629 __VA_OPT__(ARCCORE_RUNCOMMAND_REMAINING_FOR_EACH(__VA_ARGS__)))
#define ARCCORE_FATAL(...)
Macro throwing a FatalErrorException.
Classes, Types, and macros for managing concurrency.
Declarations of types on entities.
void _applyItems(RunCommand &command, typename TraitsType::ContainerType items, const Lambda &func, const RemainingArgs &... remaining_args)
Applies the enumeration func on the entity list items.
static ARCCORE_DEVICE void applyAtEnd(Int32 index, RemainingArgs &... remaining_args)
Applies the functors of additional arguments at the end of the kernel.
static ARCCORE_DEVICE void applyAtBegin(Int32 index, RemainingArgs &... remaining_args)
Applies the functors of additional arguments at the beginning of the kernel.
Information for the accelerator loop over entities.
Temporary object to store the execution information of a command and group tests.
void beginExecute()
Indicates that command execution is starting.
void endExecute()
Signals the end of execution.
Class to manage the decomposition of a loop into multiple parts.
Management of an accelerator command.
Int32 nbStride() const
Number of loop decomposition strides.
Container for RunCommandEnumerate.
Template to know the entity type associated with T.
Characteristics of an enumerator for a command on entities.
static void applyAtEnd(RemainingArgs &... remaining_args)
Applies the functors of additional arguments at the end of the iteration.
static void applyAtBegin(RemainingArgs &... remaining_args)
Applies the functors of additional arguments at the beginning of the iteration.
Reference to a group of a given kind.
ItemVectorView _unpaddedView() const
View of the group entities without padding for vectorization.
Index of an ItemType entity in a variable.
View on a typed array of entities.
View on a vector of entities.
Base class for an iterator that preserves the iteration index.
View of an array of elements of type T.
Concept to constrain values in RUNCOMMAND_ENUMERATE.
void arcaneParallelForeach(const ItemVectorView &items_view, const ForLoopRunInfo &run_info, InstanceType *instance, void(InstanceType::*function)(ItemVectorViewT< ItemType > items))
Applies the method function of the instance instance concurrently on the view items_view with the opt...
Namespace for accelerator usage.
eExecutionPolicy
Execution policy for a Runner.
@ SYCL
Execution policy using the SYCL environment.
@ HIP
Execution policy using the HIP environment.
@ CUDA
Execution policy using the CUDA environment.
@ Sequential
Sequential execution policy.
@ Thread
Multi-threaded execution policy.
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --
std::int64_t Int64
Signed integer type of 64 bits.
Int32 Integer
Type representing an integer.
Int32 LocalIdType
Type of integers used to store local identifiers of entities.
std::int32_t Int32
Signed integer type of 32 bits.