Arcane  4.1.12.0
User documentation
Loading...
Searching...
No Matches
CooperativeWorkGroupLoopRange.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* CooperativeWorkGroupLoopRange.h (C) 2000-2026 */
9/* */
10/* Loop for cooperative hierarchical parallelism. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ACCELERATOR_COOPERATIVEWORKGROUPLOOPRANGE_H
13#define ARCCORE_ACCELERATOR_COOPERATIVEWORKGROUPLOOPRANGE_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16
17#include "arccore/accelerator/WorkGroupLoopRange.h"
18
19/*---------------------------------------------------------------------------*/
20/*---------------------------------------------------------------------------*/
21
22namespace Arcane::Accelerator
23{
24
25/*---------------------------------------------------------------------------*/
26/*---------------------------------------------------------------------------*/
27
28/*!
29 * \brief Manages a WorkItem grid in a
30 * CooperativeWorkGroupLoopRange for the host.
31 *
32 * This class only has a barrier() method which performs
33 * a barrier on all participating threads in multi-threaded mode.
34 */
35class CooperativeHostWorkItemGrid
36{
37 template <typename T> friend class CooperativeWorkGroupLoopContext;
38
39 private:
40
41 //! Constructor for the host
42 explicit CooperativeHostWorkItemGrid(Int32 nb_block, Impl::ThreadGridSynchronizer* syncer)
43 : m_nb_block(nb_block)
44 , m_syncer(syncer)
45 {}
46
47 public:
48
49 //! Number of blocks in the grid
50 Int32 nbBlock() const { return m_nb_block; }
51
52 //! Blocks until all \a WorkItems in the grid have arrived here.
53 void barrier()
54 {
55 if (m_syncer)
56 m_syncer->sync();
57 }
58
59 private:
60
61 Int32 m_nb_block = 0;
62 Impl::ThreadGridSynchronizer* m_syncer = nullptr;
63};
64
65/*---------------------------------------------------------------------------*/
66/*---------------------------------------------------------------------------*/
67
68#if defined(ARCCORE_COMPILING_CUDA_OR_HIP)
69
70/*---------------------------------------------------------------------------*/
71/*---------------------------------------------------------------------------*/
72
73/*!
74 * \brief Manages the WorkItem grid in a
75 * CooperativeWorkGroupLoopRange for a CUDA or HIP device.
76 */
77class CooperativeDeviceWorkItemGrid
78{
79 template <typename T> friend class CooperativeWorkGroupLoopContext;
80
81 private:
82
83 /*!
84 * \brief Constructor for the device.
85 *
86 * This constructor does not need specific information because everything is
87 * retrieved via cooperative_groups::this_grid()
88 */
89 __device__ CooperativeDeviceWorkItemGrid()
90 : m_grid_group(cooperative_groups::this_grid())
91 {}
92
93 public:
94
95 //! Number of blocks in the grid
96 __device__ Int32 nbBlock() const { return m_grid_group.group_dim().x; }
97
98 //! Blocks until all \a WorkItems in the grid have arrived here.
99 __device__ void barrier() { m_grid_group.sync(); }
100
101 private:
102
103 cooperative_groups::grid_group m_grid_group;
104};
105
106/*---------------------------------------------------------------------------*/
107/*---------------------------------------------------------------------------*/
108
109#endif
110
111/*---------------------------------------------------------------------------*/
112/*---------------------------------------------------------------------------*/
113
114/*!
115 * \brief Execution context for a command on a set of blocks.
116 *
117 * This class is used for the host (sequential and multi-threaded) and
118 * for CUDA and ROCM/HIP. The group() method is different on accelerator and on the host, which
119 * allows for specialized command processing.
120 */
121template <typename IndexType_>
122class CooperativeWorkGroupLoopContext
123: public WorkGroupLoopContextBase<IndexType_>
124{
125 // For accessing constructors
126 friend class CooperativeWorkGroupLoopRange<IndexType_>;
129 using BaseClass = WorkGroupLoopContextBase<IndexType_>;
130
131 public:
132
133 using IndexType = IndexType_;
134
135 private:
136
137 //! This constructor is used in the host implementation.
138 constexpr CooperativeWorkGroupLoopContext(IndexType loop_index, Int32 group_index,
139 Int32 group_size, Int32 nb_active_item,
140 IndexType total_size, Int32 nb_block, Impl::ThreadGridSynchronizer* syncer)
141 : BaseClass(loop_index, group_index, group_size, nb_active_item, total_size)
142 , m_nb_block(nb_block)
143 , m_syncer(syncer)
144 {
145 }
146
147 // This constructor is only used on the device
148 // It does nothing because useful values are retrieved via
149 // cooperative_groups::this_thread_block()
150 explicit constexpr ARCCORE_DEVICE CooperativeWorkGroupLoopContext(IndexType total_size)
151 : BaseClass(total_size)
152 {}
153
154 public:
155
156#if defined(ARCCORE_DEVICE_CODE) && !defined(ARCCORE_COMPILING_SYCL)
157 //! Current group. For CUDA/ROCM, this is a thread block.
158 __device__ CooperativeDeviceWorkItemGrid grid() const { return CooperativeDeviceWorkItemGrid{}; }
159#else
160 //! Current group
161 CooperativeHostWorkItemGrid grid() const { return CooperativeHostWorkItemGrid(m_nb_block, m_syncer); }
162#endif
163
164 private:
165
166 Int32 m_nb_block = 0;
167 Impl::ThreadGridSynchronizer* m_syncer = nullptr;
168};
169
170/*---------------------------------------------------------------------------*/
171/*---------------------------------------------------------------------------*/
172
173/*
174 * Implementation for SYCL.
175 */
176#if defined(ARCCORE_COMPILING_SYCL)
177
178/*!
179 * \brief Manages the WorkItem grid in a CooperativeWorkGroupLoopRange for a Sycl device.
180 */
181class SyclCooperativeDeviceWorkItemGrid
182{
183 template <typename T> friend class SyclCooperativeWorkGroupLoopContext;
184
185 private:
186
187 explicit SyclCooperativeDeviceWorkItemGrid(sycl::nd_item<1> n)
188 : m_nd_item(n)
189 {
190 }
191
192 public:
193
194 //! Number of blocks in the grid
195 Int32 nbBlock() const { return static_cast<Int32>(m_nd_item.get_group_range(0)); }
196
197 //! Blocks until all \a CooperativeWorkItems in the grid have arrived here.
198 void barrier() { /* Not Yet Implemented */ }
199
200 private:
201
202 sycl::nd_item<1> m_nd_item;
203};
204
205/*---------------------------------------------------------------------------*/
206/*---------------------------------------------------------------------------*/
207
208/*!
209 * \brief Execution context of a CooperativeWorkGroupLoopRange for Sycl.
210 *
211 * This class is used only for the eAcceleratorPolicy::SYCL execution policy.
212 */
213template <typename IndexType_>
215: public SyclWorkGroupLoopContextBase<IndexType_>
216{
217 friend CooperativeWorkGroupLoopRange<IndexType_>;
218 friend Impl::WorkGroupLoopContextBuilder;
219
220 public:
221
222 using IndexType = IndexType_;
223
224 private:
225
226 // This constructor is only used on the device
227 explicit SyclCooperativeWorkGroupLoopContext(sycl::nd_item<1> nd_item, IndexType total_size)
228 : SyclWorkGroupLoopContextBase<IndexType_>(nd_item, total_size)
229 {
230 }
231
232 public:
233
234 //! Current grid
235 SyclCooperativeDeviceWorkItemGrid grid() const
236 {
237 return SyclCooperativeDeviceWorkItemGrid(this->m_nd_item);
238 }
239};
240
241/*---------------------------------------------------------------------------*/
242/*---------------------------------------------------------------------------*/
243
244#endif // ARCCORE_COMPILING_SYCL
245
246/*---------------------------------------------------------------------------*/
247/*---------------------------------------------------------------------------*/
248
249/*!
250 * \brief Iteration range of a loop using cooperative hierarchical parallelism.
251 *
252 * \sa WorkGroupLoopRangeBase
253 */
254template <typename IndexType_>
255class CooperativeWorkGroupLoopRange
256: public WorkGroupLoopRangeBase<true, IndexType_>
257{
258 public:
259
261 using IndexType = IndexType_;
262
263 public:
264
265 CooperativeWorkGroupLoopRange() = default;
266 explicit CooperativeWorkGroupLoopRange(IndexType total_nb_element)
267 : WorkGroupLoopRangeBase<true, IndexType_>(total_nb_element)
268 {}
269
270 public:
271};
272
273/*---------------------------------------------------------------------------*/
274/*---------------------------------------------------------------------------*/
275
276} // namespace Arcane::Accelerator
277
278/*---------------------------------------------------------------------------*/
279/*---------------------------------------------------------------------------*/
280
281#endif
282
283/*---------------------------------------------------------------------------*/
284/*---------------------------------------------------------------------------*/
Manages a WorkItem grid in a CooperativeWorkGroupLoopRange for the host.
Int32 nbBlock() const
Number of blocks in the grid.
void barrier()
Blocks until all WorkItems in the grid have arrived here.
Execution context for a command on a set of blocks.
CooperativeHostWorkItemGrid grid() const
Current group.
Iteration range of a loop using cooperative hierarchical parallelism.
constexpr WorkGroupLoopContextBase(IndexType loop_index, Int32 group_index, Int32 group_size, Int32 nb_active_item, IndexType total_size)
This constructor is used in the host implementation.
Namespace for accelerator usage.
std::int32_t Int32
Signed integer type of 32 bits.