60 template <
bool IsExclusive,
typename DataType,
typename Operator,
61 typename InputIterator,
typename OutputIterator>
63 InputIterator input, OutputIterator output,
64 DataType init_value, Operator op)
67 auto multiple_getter_func = [=](
Int32 input_index,
Int32 nb_value) -> DataType {
68 DataType partial_value = Operator::defaultValue();
69 for (
Int32 x = 0; x < nb_value; ++x)
70 partial_value = op(input[x + input_index], partial_value);
74 auto multiple_setter_func = [=](DataType previous_sum,
Int32 input_index,
Int32 nb_value) {
75 for (
Int32 x = 0; x < nb_value; ++x) {
76 if constexpr (IsExclusive) {
77 output[x + input_index] = previous_sum;
78 previous_sum = op(input[x + input_index], previous_sum);
81 previous_sum = op(input[x + input_index], previous_sum);
82 output[x + input_index] = previous_sum;
87 const Int32 nb_block = 10;
94 auto partial_value_func = [=](
Int32 a,
Int32 n) {
95 for (
Int32 i = 0; i < n; ++i) {
96 Int32 interval_index = i + a;
98 Int32 input_index = 0;
99 Int32 nb_value_in_interval = 0;
100 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
102 DataType partial_value = multiple_getter_func(input_index, nb_value_in_interval);
104 out_partial_values[interval_index] = partial_value;
110 run_info.addOptions(loop_options);
116 for (
Int32 i = 0; i < n; ++i) {
117 Int32 interval_index = i + a;
119 DataType previous_sum = init_value;
120 for (
Int32 z = 0; z < interval_index; ++z)
121 previous_sum = op(out_partial_values[z], previous_sum);
123 Int32 input_index = 0;
124 Int32 nb_value_in_interval = 0;
125 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
127 multiple_setter_func(previous_sum, input_index, nb_value_in_interval);
135 template <
bool InPlace,
typename InputIterator,
typename OutputIterator,
typename SelectLambda>
137 InputIterator input, OutputIterator output,
138 SelectLambda select_lambda)
141 using IndexType =
Int32;
146 auto multiple_getter_func = [=](
Int32 input_index,
Int32 nb_value) -> IndexType {
147 IndexType partial_value = 0;
148 for (
Int32 x = 0; x < nb_value; ++x) {
149 const Int32 index = x + input_index;
150 bool is_select = select_lambda(input[index]);
151 select_flags_view[index] = is_select;
155 return partial_value;
158 auto multiple_setter_func = [=](IndexType partial_value,
Int32 input_index,
Int32 nb_value) {
159 for (
Int32 x = 0; x < nb_value; ++x) {
160 const Int32 index = x + input_index;
161 if (select_flags_view[index]) {
162 output[partial_value] = input[index];
169 const Int32 nb_block = 10;
173 SmallArray<Int32> partial_values(nb_block, 0);
174 Span<Int32> out_partial_values = partial_values;
176 auto partial_value_func = [=](
Int32 a,
Int32 n) {
177 for (
Int32 i = 0; i < n; ++i) {
178 Int32 interval_index = i + a;
180 Int32 input_index = 0;
181 Int32 nb_value_in_interval = 0;
182 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
184 out_partial_values[interval_index] = multiple_getter_func(input_index, nb_value_in_interval);
188 ParallelLoopOptions loop_options(run_info.options().value_or(ParallelLoopOptions{}));
189 loop_options.setGrainSize(1);
190 run_info.addOptions(loop_options);
198 for (
Int32 i = 0; i < nb_block; ++i) {
199 Int32 x = partial_values[i];
201 partial_values[i] = nb_filter;
205 for (
Int32 i = 0; i < n; ++i) {
206 Int32 interval_index = i + a;
208 IndexType partial_value = 0;
209 if (interval_index > 0)
210 partial_value = out_partial_values[interval_index - 1];
212 Int32 input_index = 0;
213 Int32 nb_value_in_interval = 0;
214 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
216 multiple_setter_func(partial_value, input_index, nb_value_in_interval);
223 filter_func(0, nb_block);
232 template <
typename SizeType>
236 *out_begin_index = 0;
237 *out_interval_size = 0;
238 if (nb_interval <= 0)
240 if (interval_index < 0 || interval_index >= nb_interval)
242 SizeType isize = size / nb_interval;
243 SizeType ibegin = interval_index * isize;
245 if ((interval_index + 1) == nb_interval)
246 isize = size - ibegin;
247 *out_begin_index = ibegin;
248 *out_interval_size = isize;
void doScan(ForLoopRunInfo run_info, Int32 nb_value, InputIterator input, OutputIterator output, DataType init_value, Operator op)
Multi-thread scan algorithm.
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applies the lambda function lambda_function concurrently over the iteration interval given by loop_ra...