Arcane  4.1.12.0
Developer documentation
Loading...
Searching...
No Matches
CooperativeWorkGroupLoopRange.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* CooperativeWorkGroupLoopRange.h (C) 2000-2026 */
9/* */
10/* Loop for cooperative hierarchical parallelism. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ACCELERATOR_COOPERATIVEWORKGROUPLOOPRANGE_H
13#define ARCCORE_ACCELERATOR_COOPERATIVEWORKGROUPLOOPRANGE_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16
17#include "arccore/accelerator/WorkGroupLoopRange.h"
18
19/*---------------------------------------------------------------------------*/
20/*---------------------------------------------------------------------------*/
21
22namespace Arcane::Accelerator
23{
24
25/*---------------------------------------------------------------------------*/
26/*---------------------------------------------------------------------------*/
27
36{
37 template <typename T> friend class CooperativeWorkGroupLoopContext;
38
39 private:
40
43 : m_nb_block(nb_block)
44 , m_syncer(syncer)
45 {}
46
47 public:
48
50 Int32 nbBlock() const { return m_nb_block; }
51
53 void barrier()
54 {
55 if (m_syncer)
56 m_syncer->sync();
57 }
58
59 private:
60
61 Int32 m_nb_block = 0;
62 Impl::ThreadGridSynchronizer* m_syncer = nullptr;
63};
64
65/*---------------------------------------------------------------------------*/
66/*---------------------------------------------------------------------------*/
67
68#if defined(ARCCORE_COMPILING_CUDA_OR_HIP)
69
70/*---------------------------------------------------------------------------*/
71/*---------------------------------------------------------------------------*/
72
77class CooperativeDeviceWorkItemGrid
78{
79 template <typename T> friend class CooperativeWorkGroupLoopContext;
80
81 private:
82
89 __device__ CooperativeDeviceWorkItemGrid()
90 : m_grid_group(cooperative_groups::this_grid())
91 {}
92
93 public:
94
96 __device__ Int32 nbBlock() const { return m_grid_group.group_dim().x; }
97
99 __device__ void barrier() { m_grid_group.sync(); }
100
101 private:
102
103 cooperative_groups::grid_group m_grid_group;
104};
105
106/*---------------------------------------------------------------------------*/
107/*---------------------------------------------------------------------------*/
108
109#endif
110
111/*---------------------------------------------------------------------------*/
112/*---------------------------------------------------------------------------*/
113
121template <typename IndexType_>
123: public WorkGroupLoopContextBase<IndexType_>
124{
125 // For accessing constructors
126 friend class CooperativeWorkGroupLoopRange<IndexType_>;
129 using BaseClass = WorkGroupLoopContextBase<IndexType_>;
130
131 public:
132
133 using IndexType = IndexType_;
134
135 private:
136
138 constexpr CooperativeWorkGroupLoopContext(IndexType loop_index, Int32 group_index,
139 Int32 group_size, Int32 nb_active_item,
140 IndexType total_size, Int32 nb_block, Impl::ThreadGridSynchronizer* syncer)
141 : BaseClass(loop_index, group_index, group_size, nb_active_item, total_size)
142 , m_nb_block(nb_block)
143 , m_syncer(syncer)
144 {
145 }
146
147 // This constructor is only used on the device
148 // It does nothing because useful values are retrieved via
149 // cooperative_groups::this_thread_block()
150 explicit constexpr ARCCORE_DEVICE CooperativeWorkGroupLoopContext(IndexType total_size)
151 : BaseClass(total_size)
152 {}
153
154 public:
155
156#if defined(ARCCORE_DEVICE_CODE) && !defined(ARCCORE_COMPILING_SYCL)
158 __device__ CooperativeDeviceWorkItemGrid grid() const { return CooperativeDeviceWorkItemGrid{}; }
159#else
161 CooperativeHostWorkItemGrid grid() const { return CooperativeHostWorkItemGrid(m_nb_block, m_syncer); }
162#endif
163
164 private:
165
166 Int32 m_nb_block = 0;
167 Impl::ThreadGridSynchronizer* m_syncer = nullptr;
168};
169
170/*---------------------------------------------------------------------------*/
171/*---------------------------------------------------------------------------*/
172
173/*
174 * Implementation for SYCL.
175 */
176#if defined(ARCCORE_COMPILING_SYCL)
177
181class SyclCooperativeDeviceWorkItemGrid
182{
183 template <typename T> friend class SyclCooperativeWorkGroupLoopContext;
184
185 private:
186
187 explicit SyclCooperativeDeviceWorkItemGrid(sycl::nd_item<1> n)
188 : m_nd_item(n)
189 {
190 }
191
192 public:
193
195 Int32 nbBlock() const { return static_cast<Int32>(m_nd_item.get_group_range(0)); }
196
198 void barrier() { /* Not Yet Implemented */ }
199
200 private:
201
202 sycl::nd_item<1> m_nd_item;
203};
204
205/*---------------------------------------------------------------------------*/
206/*---------------------------------------------------------------------------*/
207
213template <typename IndexType_>
215: public SyclWorkGroupLoopContextBase<IndexType_>
216{
217 friend CooperativeWorkGroupLoopRange<IndexType_>;
218 friend Impl::WorkGroupLoopContextBuilder;
219
220 public:
221
222 using IndexType = IndexType_;
223
224 private:
225
226 // This constructor is only used on the device
227 explicit SyclCooperativeWorkGroupLoopContext(sycl::nd_item<1> nd_item, IndexType total_size)
228 : SyclWorkGroupLoopContextBase<IndexType_>(nd_item, total_size)
229 {
230 }
231
232 public:
233
235 SyclCooperativeDeviceWorkItemGrid grid() const
236 {
237 return SyclCooperativeDeviceWorkItemGrid(this->m_nd_item);
238 }
239};
240
241/*---------------------------------------------------------------------------*/
242/*---------------------------------------------------------------------------*/
243
244#endif // ARCCORE_COMPILING_SYCL
245
246/*---------------------------------------------------------------------------*/
247/*---------------------------------------------------------------------------*/
248
254template <typename IndexType_>
255class CooperativeWorkGroupLoopRange
256: public WorkGroupLoopRangeBase<true, IndexType_>
257{
258 public:
259
261 using IndexType = IndexType_;
262
263 public:
264
265 CooperativeWorkGroupLoopRange() = default;
266 explicit CooperativeWorkGroupLoopRange(IndexType total_nb_element)
267 : WorkGroupLoopRangeBase<true, IndexType_>(total_nb_element)
268 {}
269
270 public:
271};
272
273/*---------------------------------------------------------------------------*/
274/*---------------------------------------------------------------------------*/
275
276} // namespace Arcane::Accelerator
277
278/*---------------------------------------------------------------------------*/
279/*---------------------------------------------------------------------------*/
280
281#endif
282
283/*---------------------------------------------------------------------------*/
284/*---------------------------------------------------------------------------*/
Manages a WorkItem grid in a CooperativeWorkGroupLoopRange for the host.
CooperativeHostWorkItemGrid(Int32 nb_block, Impl::ThreadGridSynchronizer *syncer)
Constructor for the host.
Int32 nbBlock() const
Number of blocks in the grid.
void barrier()
Blocks until all WorkItems in the grid have arrived here.
Execution context for a command on a set of blocks.
CooperativeHostWorkItemGrid grid() const
Current group.
constexpr CooperativeWorkGroupLoopContext(IndexType loop_index, Int32 group_index, Int32 group_size, Int32 nb_active_item, IndexType total_size, Int32 nb_block, Impl::ThreadGridSynchronizer *syncer)
This constructor is used in the host implementation.
Iteration range of a loop using cooperative hierarchical parallelism.
Class to execute a portion of the loop sequentially on the host.
constexpr WorkGroupLoopContextBase(IndexType loop_index, Int32 group_index, Int32 group_size, Int32 nb_active_item, IndexType total_size)
This constructor is used in the host implementation.
std::int32_t Int32
Signed integer type of 32 bits.