Arcane  v4.1.4.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
CooperativeWorkGroupLoopRange.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* CooperativeWorkGroupLoopRange.h (C) 2000-2026 */
9/* */
10/* Boucle pour le parallélisme hiérarchique coopératif. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ACCELERATOR_COOPERATIVEWORKGROUPLOOPRANGE_H
13#define ARCCORE_ACCELERATOR_COOPERATIVEWORKGROUPLOOPRANGE_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16
17#include "arccore/accelerator/WorkGroupLoopRange.h"
18
19/*---------------------------------------------------------------------------*/
20/*---------------------------------------------------------------------------*/
21
22namespace Arcane::Accelerator
23{
24
25/*---------------------------------------------------------------------------*/
26/*---------------------------------------------------------------------------*/
35{
36 template<typename T> friend class CooperativeWorkGroupLoopContext;
37
38 private:
39
42 : m_nb_block(nb_block)
43 , m_syncer(syncer)
44 {}
45
46 public:
47
49 Int32 nbBlock() const { return m_nb_block; }
50
52 void barrier()
53 {
54 if (m_syncer)
55 m_syncer->sync();
56 }
57
58 private:
59
60 Int32 m_nb_block = 0;
61 Impl::ThreadGridSynchronizer* m_syncer = nullptr;
62};
63
64/*---------------------------------------------------------------------------*/
65/*---------------------------------------------------------------------------*/
66
67#if defined(ARCCORE_COMPILING_CUDA_OR_HIP)
68
69/*---------------------------------------------------------------------------*/
70/*---------------------------------------------------------------------------*/
75class CooperativeDeviceWorkItemGrid
76{
77 template <typename T> friend class CooperativeWorkGroupLoopContext;
78
79 private:
80
87 __device__ CooperativeDeviceWorkItemGrid()
88 : m_grid_group(cooperative_groups::this_grid())
89 {}
90
91 public:
92
94 __device__ Int32 nbBlock() const { return m_grid_group.group_dim().x; }
95
97 __device__ void barrier() { m_grid_group.sync(); }
98
99 private:
100
101 cooperative_groups::grid_group m_grid_group;
102};
103
104/*---------------------------------------------------------------------------*/
105/*---------------------------------------------------------------------------*/
106
107#endif
108
109/*---------------------------------------------------------------------------*/
110/*---------------------------------------------------------------------------*/
119template <typename IndexType_>
121: public WorkGroupLoopContextBase<IndexType_>
122{
123 // Pour accéder aux constructeurs
124 friend class CooperativeWorkGroupLoopRange<IndexType_>;
127 using BaseClass = WorkGroupLoopContextBase<IndexType_>;
128
129 public:
130
131 using IndexType = IndexType_;
132
133 private:
134
136 constexpr CooperativeWorkGroupLoopContext(IndexType loop_index, Int32 group_index,
137 Int32 group_size, Int32 nb_active_item,
138 IndexType total_size, Int32 nb_block, Impl::ThreadGridSynchronizer* syncer)
139 : BaseClass(loop_index, group_index, group_size, nb_active_item, total_size)
140 , m_nb_block(nb_block)
141 , m_syncer(syncer)
142 {
143 }
144
145 // Ce constructeur n'est utilisé que sur le device
146 // Il ne fait rien car les valeurs utiles sont récupérées via cooperative_groups::this_thread_block()
147 explicit constexpr ARCCORE_DEVICE CooperativeWorkGroupLoopContext(IndexType total_size)
148 : BaseClass(total_size)
149 {}
150
151 public:
152
153#if defined(ARCCORE_DEVICE_CODE) && !defined(ARCCORE_COMPILING_SYCL)
155 __device__ CooperativeDeviceWorkItemGrid grid() const { return CooperativeDeviceWorkItemGrid{}; }
156#else
158 CooperativeHostWorkItemGrid grid() const { return CooperativeHostWorkItemGrid(m_nb_block, m_syncer); }
159#endif
160
161 private:
162
163 Int32 m_nb_block = 0;
164 Impl::ThreadGridSynchronizer* m_syncer = nullptr;
165};
166
167/*---------------------------------------------------------------------------*/
168/*---------------------------------------------------------------------------*/
169/*
170 * Implémentation pour SYCL.
171 */
172#if defined(ARCCORE_COMPILING_SYCL)
173
177class SyclCooperativeDeviceWorkItemGrid
178{
179 template <typename T> friend class SyclCooperativeWorkGroupLoopContext;
180
181 private:
182
183 explicit SyclCooperativeDeviceWorkItemGrid(sycl::nd_item<1> n)
184 : m_nd_item(n)
185 {
186 }
187
188 public:
189
191 Int32 nbBlock() const { return static_cast<Int32>(m_nd_item.get_group_range(0)); }
192
194 void barrier() { /* Not Yet Implemented */ }
195
196 private:
197
198 sycl::nd_item<1> m_nd_item;
199};
200
201/*---------------------------------------------------------------------------*/
202/*---------------------------------------------------------------------------*/
209template <typename IndexType_>
211: public SyclWorkGroupLoopContextBase<IndexType_>
212{
213 friend CooperativeWorkGroupLoopRange<IndexType_>;
214 friend Impl::WorkGroupLoopContextBuilder;
215
216 public:
217
218 using IndexType = IndexType_;
219
220 private:
221
222 // Ce constructeur n'est utilisé que sur le device
223 explicit SyclCooperativeWorkGroupLoopContext(sycl::nd_item<1> nd_item, IndexType total_size)
224 : SyclWorkGroupLoopContextBase<IndexType_>(nd_item, total_size)
225 {
226 }
227
228 public:
229
231 SyclCooperativeDeviceWorkItemGrid grid() const
232 {
233 return SyclCooperativeDeviceWorkItemGrid(this->m_nd_item);
234 }
235};
236
237/*---------------------------------------------------------------------------*/
238/*---------------------------------------------------------------------------*/
239
240#endif // ARCCORE_COMPILING_SYCL
241
242/*---------------------------------------------------------------------------*/
243/*---------------------------------------------------------------------------*/
250template <typename IndexType_>
251class CooperativeWorkGroupLoopRange
252: public WorkGroupLoopRangeBase<true, IndexType_>
253{
254 public:
255
257 using IndexType = IndexType_;
258
259 public:
260
261 CooperativeWorkGroupLoopRange() = default;
262 explicit CooperativeWorkGroupLoopRange(IndexType total_nb_element)
263 : WorkGroupLoopRangeBase<true, IndexType_>(total_nb_element)
264 {}
265
266 public:
267};
268
269/*---------------------------------------------------------------------------*/
270/*---------------------------------------------------------------------------*/
271
272} // namespace Arcane::Accelerator
273
274/*---------------------------------------------------------------------------*/
275/*---------------------------------------------------------------------------*/
276
277#endif
278
279/*---------------------------------------------------------------------------*/
280/*---------------------------------------------------------------------------*/
Gère une grille de WorkItem dans un CooperativeWorkGroupLoopRange pour l'hôte.
CooperativeHostWorkItemGrid(Int32 nb_block, Impl::ThreadGridSynchronizer *syncer)
Constructeur pour l'hôte.
Int32 nbBlock() const
Nombre de blocs dans la grille.
void barrier()
Bloque tant que tous les WorkItem de la grille ne sont pas arrivés ici.
Contexte d'exécution d'une commande sur un ensemble de blocs.
CooperativeHostWorkItemGrid grid() const
Groupe courant.
constexpr CooperativeWorkGroupLoopContext(IndexType loop_index, Int32 group_index, Int32 group_size, Int32 nb_active_item, IndexType total_size, Int32 nb_block, Impl::ThreadGridSynchronizer *syncer)
Ce constructeur est utilisé dans l'implémentation hôte.
Intervalle d'itération d'une boucle utilisant le parallélisme hiérarchique collaboratif.
Classe pour exécuter en séquentiel sur l'hôte une partie de la boucle.
constexpr WorkGroupLoopContextBase(IndexType loop_index, Int32 group_index, Int32 group_size, Int32 nb_active_item, IndexType total_size)
Ce constructeur est utilisé dans l'implémentation hôte.
Espace de nom pour l'utilisation des accélérateurs.
std::int32_t Int32
Type entier signé sur 32 bits.