12#ifndef ARCCORE_ACCELERATOR_COOPERATIVEWORKGROUPLOOPRANGE_H
13#define ARCCORE_ACCELERATOR_COOPERATIVEWORKGROUPLOOPRANGE_H
17#include "arccore/accelerator/WorkGroupLoopRange.h"
37 template <
typename T>
friend class CooperativeWorkGroupLoopContext;
43 : m_nb_block(nb_block)
68#if defined(ARCCORE_COMPILING_CUDA_OR_HIP)
77class CooperativeDeviceWorkItemGrid
89 __device__ CooperativeDeviceWorkItemGrid()
90 : m_grid_group(cooperative_groups::this_grid())
96 __device__ Int32 nbBlock()
const {
return m_grid_group.group_dim().x; }
99 __device__
void barrier() { m_grid_group.sync(); }
103 cooperative_groups::grid_group m_grid_group;
121template <
typename IndexType_>
133 using IndexType = IndexType_;
141 : BaseClass(loop_index, group_index, group_size, nb_active_item, total_size)
142 , m_nb_block(nb_block)
151 : BaseClass(total_size)
156#if defined(ARCCORE_DEVICE_CODE) && !defined(ARCCORE_COMPILING_SYCL)
158 __device__ CooperativeDeviceWorkItemGrid
grid()
const {
return CooperativeDeviceWorkItemGrid{}; }
166 Int32 m_nb_block = 0;
176#if defined(ARCCORE_COMPILING_SYCL)
181class SyclCooperativeDeviceWorkItemGrid
187 explicit SyclCooperativeDeviceWorkItemGrid(sycl::nd_item<1> n)
195 Int32 nbBlock()
const {
return static_cast<Int32
>(m_nd_item.get_group_range(0)); }
202 sycl::nd_item<1> m_nd_item;
213template <
typename IndexType_>
217 friend CooperativeWorkGroupLoopRange<IndexType_>;
218 friend Impl::WorkGroupLoopContextBuilder;
222 using IndexType = IndexType_;
227 explicit SyclCooperativeWorkGroupLoopContext(sycl::nd_item<1> nd_item, IndexType total_size)
228 : SyclWorkGroupLoopContextBase<IndexType_>(nd_item, total_size)
235 SyclCooperativeDeviceWorkItemGrid grid()
const
237 return SyclCooperativeDeviceWorkItemGrid(this->m_nd_item);
254template <
typename IndexType_>
255class CooperativeWorkGroupLoopRange
256:
public WorkGroupLoopRangeBase<true, IndexType_>
261 using IndexType = IndexType_;
265 CooperativeWorkGroupLoopRange() =
default;
266 explicit CooperativeWorkGroupLoopRange(IndexType total_nb_element)
267 : WorkGroupLoopRangeBase<true, IndexType_>(total_nb_element)
Manages a WorkItem grid in a CooperativeWorkGroupLoopRange for the host.
CooperativeHostWorkItemGrid(Int32 nb_block, Impl::ThreadGridSynchronizer *syncer)
Constructor for the host.
Int32 nbBlock() const
Number of blocks in the grid.
void barrier()
Blocks until all WorkItems in the grid have arrived here.
Execution context for a command on a set of blocks.
CooperativeHostWorkItemGrid grid() const
Current group.
constexpr CooperativeWorkGroupLoopContext(IndexType loop_index, Int32 group_index, Int32 group_size, Int32 nb_active_item, IndexType total_size, Int32 nb_block, Impl::ThreadGridSynchronizer *syncer)
This constructor is used in the host implementation.
Iteration range of a loop using cooperative hierarchical parallelism.
Class to manage grid synchronization in multi-thread;.
Class to execute a portion of the loop sequentially on the host.
constexpr WorkGroupLoopContextBase(IndexType loop_index, Int32 group_index, Int32 group_size, Int32 nb_active_item, IndexType total_size)
This constructor is used in the host implementation.
Namespace for accelerator usage.
std::int32_t Int32
Signed integer type of 32 bits.