Arcane  v3.15.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
MultiThreadAlgo.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* MultiThreadAlgo.h (C) 2000-2024 */
9/* */
10/* Implémentation des algorithmes accélérateurs en mode multi-thread. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCANE_ACCELERATOR_MULTITHREADALGO_H
13#define ARCANE_ACCELERATOR_MULTITHREADALGO_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16
17#include "arcane/utils/SmallArray.h"
18
20
21#include "arcane/accelerator/AcceleratorGlobal.h"
22
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26namespace Arcane::Accelerator::impl
27{
28
29/*---------------------------------------------------------------------------*/
30/*---------------------------------------------------------------------------*/
37{
38 public:
39
58 template <bool IsExclusive, typename DataType, typename Operator,
59 typename InputIterator, typename OutputIterator>
62 DataType init_value, Operator op)
63 {
64 //std::cout << "DO_SCAN MULTI_THREAD nb_value=" << nb_value << " init_value=" << init_value << "\n";
65 auto multiple_getter_func = [=](Int32 input_index, Int32 nb_value) -> DataType {
66 DataType partial_value = Operator::defaultValue();
67 for (Int32 x = 0; x < nb_value; ++x)
69 return partial_value;
70 };
71
73 for (Int32 x = 0; x < nb_value; ++x) {
74 if constexpr (IsExclusive) {
75 output[x + input_index] = previous_sum;
77 }
78 else {
80 output[x + input_index] = previous_sum;
81 }
82 }
83 };
84 // TODO: calculer automatiquement cette valeur.
85 const Int32 nb_block = 10;
86
87 // Tableau pour conserver les valeurs partielles des blocs.
88 // TODO: Utiliser un padding pour éviter des conflits de cache entre les threads.
91
92 auto partial_value_func = [=](Int32 a, Int32 n) {
93 for (Int32 i = 0; i < n; ++i) {
94 Int32 interval_index = i + a;
95
99
101
103 }
104 };
105
107 loop_options.setGrainSize(1);
108 run_info.addOptions(loop_options);
109
110 // Calcule les sommes partielles pour nb_block
112
113 auto final_sum_func = [=](Int32 a, Int32 n) {
114 for (Int32 i = 0; i < n; ++i) {
115 Int32 interval_index = i + a;
116
117 DataType previous_sum = init_value;
118 for (Int32 z = 0; z < interval_index; ++z)
120
121 Int32 input_index = 0;
124
126 }
127 };
128
129 // Calcule les valeurs finales
131 }
132
133 template <bool InPlace, typename InputIterator, typename OutputIterator, typename SelectLambda>
137 {
138 // Type de l'index
139 using IndexType = Int32;
140
143 //std::cout << "DO_FILTER MULTI_THREAD nb_value=" << nb_value << "\n";
144 auto multiple_getter_func = [=](Int32 input_index, Int32 nb_value) -> IndexType {
145 IndexType partial_value = 0;
146 for (Int32 x = 0; x < nb_value; ++x) {
147 const Int32 index = x + input_index;
148 bool is_select = select_lambda(input[index]);
150 if (is_select)
152 }
153 return partial_value;
154 };
155
157 for (Int32 x = 0; x < nb_value; ++x) {
158 const Int32 index = x + input_index;
159 if (select_flags_view[index]) {
160 output[partial_value] = input[index];
162 }
163 }
164 };
165
166 // TODO: calculer automatiquement cette valeur.
167 const Int32 nb_block = 10;
168
169 // Tableau pour conserver les valeurs partielles des blocs.
170 // TODO: Utiliser un padding pour éviter des conflits de cache entre les threads.
171 SmallArray<Int32> partial_values(nb_block, 0);
172 Span<Int32> out_partial_values = partial_values;
173
174 auto partial_value_func = [=](Int32 a, Int32 n) {
175 for (Int32 i = 0; i < n; ++i) {
176 Int32 interval_index = i + a;
177
178 Int32 input_index = 0;
179 Int32 nb_value_in_interval = 0;
180 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
181
182 out_partial_values[interval_index] = multiple_getter_func(input_index, nb_value_in_interval);
183 }
184 };
185
186 ParallelLoopOptions loop_options(run_info.options().value_or(ParallelLoopOptions{}));
187 loop_options.setGrainSize(1);
188 run_info.addOptions(loop_options);
189
190 // Calcule les sommes partielles pour nb_block
191 Arcane::arcaneParallelFor(0, nb_block, run_info, partial_value_func);
192
193 // Calcule le nombre de valeurs filtrées
194 // Calcule aussi la valeur accumulée de partial_values
195 Int32 nb_filter = 0;
196 for (Int32 i = 0; i < nb_block; ++i) {
197 Int32 x = partial_values[i];
198 nb_filter += x;
199 partial_values[i] = nb_filter;
200 }
201
202 auto filter_func = [=](Int32 a, Int32 n) {
203 for (Int32 i = 0; i < n; ++i) {
204 Int32 interval_index = i + a;
205
206 IndexType partial_value = 0;
207 if (interval_index > 0)
208 partial_value = out_partial_values[interval_index - 1];
209
210 Int32 input_index = 0;
211 Int32 nb_value_in_interval = 0;
212 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
213
214 multiple_setter_func(partial_value, input_index, nb_value_in_interval);
215 }
216 };
217
218 // Si l'entrée et la sortie sont les mêmes, on fait le remplissage en séquentiel.
219 // TODO: faire en parallèle.
220 if (InPlace)
221 filter_func(0, nb_block);
222 else
223 Arcane::arcaneParallelFor(0, nb_block, run_info, filter_func);
224
225 return nb_filter;
226 }
227
228 private:
229
230 template <typename SizeType>
231 static void _subInterval(SizeType size, SizeType interval_index, SizeType nb_interval,
232 SizeType* out_begin_index, SizeType* out_interval_size)
233 {
234 *out_begin_index = 0;
235 *out_interval_size = 0;
236 if (nb_interval <= 0)
237 return;
238 if (interval_index < 0 || interval_index >= nb_interval)
239 return;
240 SizeType isize = size / nb_interval;
241 SizeType ibegin = interval_index * isize;
242 // Pour le dernier interval, prend les elements restants
243 if ((interval_index + 1) == nb_interval)
244 isize = size - ibegin;
245 *out_begin_index = ibegin;
246 *out_interval_size = isize;
247 }
248};
249
250/*---------------------------------------------------------------------------*/
251/*---------------------------------------------------------------------------*/
252
253} // namespace Arcane::Accelerator::impl
254
255/*---------------------------------------------------------------------------*/
256/*---------------------------------------------------------------------------*/
257
258#endif
259
260/*---------------------------------------------------------------------------*/
261/*---------------------------------------------------------------------------*/
Classes, Types et macros pour gérer la concurrence.
Algorithmes avancée en mode multi-thread.
void doScan(ForLoopRunInfo run_info, Int32 nb_value, InputIterator input, OutputIterator output, DataType init_value, Operator op)
Algorithme de scan multi-thread.
Informations d'exécution d'une boucle.
Lecteur des fichiers de maillage via la bibliothèque LIMA.
Definition Lima.cc:149
Options d'exécution d'une boucle parallèle en multi-thread.
void arcaneParallelFor(Integer i0, Integer size, InstanceType *itype, void(InstanceType::*lambda_function)(Integer i0, Integer size))
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération [i0,...
std::int32_t Int32
Type entier signé sur 32 bits.
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition rapidjson.h:385