Arcane  v4.1.1.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
MultiThreadAlgo.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* MultiThreadAlgo.h (C) 2000-2025 */
9/* */
10/* Implémentation des algorithmes accélérateurs en mode multi-thread. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ACCELERATOR_MULTITHREADALGO_H
13#define ARCCORE_ACCELERATOR_MULTITHREADALGO_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16
17#include "arccore/common/SmallArray.h"
18
19#include "arccore/base/ForLoopRunInfo.h"
20#include "arccore/concurrency/ParallelFor.h"
21
22#include "arccore/accelerator/AcceleratorGlobal.h"
23
24/*---------------------------------------------------------------------------*/
25/*---------------------------------------------------------------------------*/
26
27namespace Arcane::Accelerator::impl
28{
29
30/*---------------------------------------------------------------------------*/
31/*---------------------------------------------------------------------------*/
38{
39 public:
40
59 template <bool IsExclusive, typename DataType, typename Operator,
60 typename InputIterator, typename OutputIterator>
61 void doScan(ForLoopRunInfo run_info, Int32 nb_value,
62 InputIterator input, OutputIterator output,
63 DataType init_value, Operator op)
64 {
65 //std::cout << "DO_SCAN MULTI_THREAD nb_value=" << nb_value << " init_value=" << init_value << "\n";
66 auto multiple_getter_func = [=](Int32 input_index, Int32 nb_value) -> DataType {
67 DataType partial_value = Operator::defaultValue();
68 for (Int32 x = 0; x < nb_value; ++x)
69 partial_value = op(input[x + input_index], partial_value);
70 return partial_value;
71 };
72
73 auto multiple_setter_func = [=](DataType previous_sum, Int32 input_index, Int32 nb_value) {
74 for (Int32 x = 0; x < nb_value; ++x) {
75 if constexpr (IsExclusive) {
76 output[x + input_index] = previous_sum;
77 previous_sum = op(input[x + input_index], previous_sum);
78 }
79 else {
80 previous_sum = op(input[x + input_index], previous_sum);
81 output[x + input_index] = previous_sum;
82 }
83 }
84 };
85 // TODO: calculer automatiquement cette valeur.
86 const Int32 nb_block = 10;
87
88 // Tableau pour conserver les valeurs partielles des blocs.
89 // TODO: Utiliser un padding pour éviter des conflits de cache entre les threads.
90 SmallArray<DataType> partial_values(nb_block);
91 Span<DataType> out_partial_values = partial_values;
92
93 auto partial_value_func = [=](Int32 a, Int32 n) {
94 for (Int32 i = 0; i < n; ++i) {
95 Int32 interval_index = i + a;
96
97 Int32 input_index = 0;
98 Int32 nb_value_in_interval = 0;
99 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
100
101 DataType partial_value = multiple_getter_func(input_index, nb_value_in_interval);
102
103 out_partial_values[interval_index] = partial_value;
104 }
105 };
106
107 ParallelLoopOptions loop_options(run_info.options().value_or(ParallelLoopOptions{}));
108 loop_options.setGrainSize(1);
109 run_info.addOptions(loop_options);
110
111 // Calcule les sommes partielles pour nb_block
112 Arcane::arccoreParallelFor(0, nb_block, run_info, partial_value_func);
113
114 auto final_sum_func = [=](Int32 a, Int32 n) {
115 for (Int32 i = 0; i < n; ++i) {
116 Int32 interval_index = i + a;
117
118 DataType previous_sum = init_value;
119 for (Int32 z = 0; z < interval_index; ++z)
120 previous_sum = op(out_partial_values[z], previous_sum);
121
122 Int32 input_index = 0;
123 Int32 nb_value_in_interval = 0;
124 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
125
126 multiple_setter_func(previous_sum, input_index, nb_value_in_interval);
127 }
128 };
129
130 // Calcule les valeurs finales
131 Arcane::arccoreParallelFor(0, nb_block, run_info, final_sum_func);
132 }
133
134 template <bool InPlace, typename InputIterator, typename OutputIterator, typename SelectLambda>
135 Int32 doFilter(ForLoopRunInfo run_info, Int32 nb_value,
136 InputIterator input, OutputIterator output,
137 SelectLambda select_lambda)
138 {
139 // Type de l'index
140 using IndexType = Int32;
141
142 UniqueArray<bool> select_flags(nb_value);
143 Span<bool> select_flags_view = select_flags;
144 //std::cout << "DO_FILTER MULTI_THREAD nb_value=" << nb_value << "\n";
145 auto multiple_getter_func = [=](Int32 input_index, Int32 nb_value) -> IndexType {
146 IndexType partial_value = 0;
147 for (Int32 x = 0; x < nb_value; ++x) {
148 const Int32 index = x + input_index;
149 bool is_select = select_lambda(input[index]);
150 select_flags_view[index] = is_select;
151 if (is_select)
152 ++partial_value;
153 }
154 return partial_value;
155 };
156
157 auto multiple_setter_func = [=](IndexType partial_value, Int32 input_index, Int32 nb_value) {
158 for (Int32 x = 0; x < nb_value; ++x) {
159 const Int32 index = x + input_index;
160 if (select_flags_view[index]) {
161 output[partial_value] = input[index];
162 ++partial_value;
163 }
164 }
165 };
166
167 // TODO: calculer automatiquement cette valeur.
168 const Int32 nb_block = 10;
169
170 // Tableau pour conserver les valeurs partielles des blocs.
171 // TODO: Utiliser un padding pour éviter des conflits de cache entre les threads.
172 SmallArray<Int32> partial_values(nb_block, 0);
173 Span<Int32> out_partial_values = partial_values;
174
175 auto partial_value_func = [=](Int32 a, Int32 n) {
176 for (Int32 i = 0; i < n; ++i) {
177 Int32 interval_index = i + a;
178
179 Int32 input_index = 0;
180 Int32 nb_value_in_interval = 0;
181 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
182
183 out_partial_values[interval_index] = multiple_getter_func(input_index, nb_value_in_interval);
184 }
185 };
186
187 ParallelLoopOptions loop_options(run_info.options().value_or(ParallelLoopOptions{}));
188 loop_options.setGrainSize(1);
189 run_info.addOptions(loop_options);
190
191 // Calcule les sommes partielles pour nb_block
192 Arcane::arccoreParallelFor(0, nb_block, run_info, partial_value_func);
193
194 // Calcule le nombre de valeurs filtrées
195 // Calcule aussi la valeur accumulée de partial_values
196 Int32 nb_filter = 0;
197 for (Int32 i = 0; i < nb_block; ++i) {
198 Int32 x = partial_values[i];
199 nb_filter += x;
200 partial_values[i] = nb_filter;
201 }
202
203 auto filter_func = [=](Int32 a, Int32 n) {
204 for (Int32 i = 0; i < n; ++i) {
205 Int32 interval_index = i + a;
206
207 IndexType partial_value = 0;
208 if (interval_index > 0)
209 partial_value = out_partial_values[interval_index - 1];
210
211 Int32 input_index = 0;
212 Int32 nb_value_in_interval = 0;
213 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
214
215 multiple_setter_func(partial_value, input_index, nb_value_in_interval);
216 }
217 };
218
219 // Si l'entrée et la sortie sont les mêmes, on fait le remplissage en séquentiel.
220 // TODO: faire en parallèle.
221 if (InPlace)
222 filter_func(0, nb_block);
223 else
224 Arcane::arccoreParallelFor(0, nb_block, run_info, filter_func);
225
226 return nb_filter;
227 }
228
229 private:
230
231 template <typename SizeType>
232 static void _subInterval(SizeType size, SizeType interval_index, SizeType nb_interval,
233 SizeType* out_begin_index, SizeType* out_interval_size)
234 {
235 *out_begin_index = 0;
236 *out_interval_size = 0;
237 if (nb_interval <= 0)
238 return;
239 if (interval_index < 0 || interval_index >= nb_interval)
240 return;
241 SizeType isize = size / nb_interval;
242 SizeType ibegin = interval_index * isize;
243 // Pour le dernier interval, prend les elements restants
244 if ((interval_index + 1) == nb_interval)
245 isize = size - ibegin;
246 *out_begin_index = ibegin;
247 *out_interval_size = isize;
248 }
249};
250
251/*---------------------------------------------------------------------------*/
252/*---------------------------------------------------------------------------*/
253
254} // namespace Arcane::Accelerator::impl
255
256/*---------------------------------------------------------------------------*/
257/*---------------------------------------------------------------------------*/
258
259#endif
260
261/*---------------------------------------------------------------------------*/
262/*---------------------------------------------------------------------------*/
Algorithmes avancée en mode multi-thread.
void doScan(ForLoopRunInfo run_info, Int32 nb_value, InputIterator input, OutputIterator output, DataType init_value, Operator op)
Algorithme de scan multi-thread.
Informations d'exécution d'une boucle.
Options d'exécution d'une boucle parallèle en multi-thread.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
Tableau 1D de données avec buffer pré-alloué sur la pile.
Vue d'un tableau d'éléments de type T.
Definition Span.h:633
Vecteur 1D de données avec sémantique par valeur (style STL).
void arccoreParallelFor(WorkGroupLoopRange bounds, ForLoopRunInfo run_info, const Lambda &func, const RemainingArgs &... remaining_args)
Applique le fonctor func sur une boucle parallèle.
std::int32_t Int32
Type entier signé sur 32 bits.
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition rapidjson.h:416