59 template <
bool IsExclusive,
typename DataType,
typename Operator,
60 typename InputIterator,
typename OutputIterator>
62 InputIterator input, OutputIterator output,
63 DataType init_value, Operator op)
66 auto multiple_getter_func = [=](
Int32 input_index,
Int32 nb_value) -> DataType {
67 DataType partial_value = Operator::defaultValue();
68 for (
Int32 x = 0; x < nb_value; ++x)
69 partial_value = op(input[x + input_index], partial_value);
73 auto multiple_setter_func = [=](DataType previous_sum,
Int32 input_index,
Int32 nb_value) {
74 for (
Int32 x = 0; x < nb_value; ++x) {
75 if constexpr (IsExclusive) {
76 output[x + input_index] = previous_sum;
77 previous_sum = op(input[x + input_index], previous_sum);
80 previous_sum = op(input[x + input_index], previous_sum);
81 output[x + input_index] = previous_sum;
86 const Int32 nb_block = 10;
93 auto partial_value_func = [=](
Int32 a,
Int32 n) {
94 for (
Int32 i = 0; i < n; ++i) {
95 Int32 interval_index = i + a;
97 Int32 input_index = 0;
98 Int32 nb_value_in_interval = 0;
99 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
101 DataType partial_value = multiple_getter_func(input_index, nb_value_in_interval);
103 out_partial_values[interval_index] = partial_value;
109 run_info.addOptions(loop_options);
115 for (
Int32 i = 0; i < n; ++i) {
116 Int32 interval_index = i + a;
118 DataType previous_sum = init_value;
119 for (
Int32 z = 0; z < interval_index; ++z)
120 previous_sum = op(out_partial_values[z], previous_sum);
122 Int32 input_index = 0;
123 Int32 nb_value_in_interval = 0;
124 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
126 multiple_setter_func(previous_sum, input_index, nb_value_in_interval);
134 template <
bool InPlace,
typename InputIterator,
typename OutputIterator,
typename SelectLambda>
136 InputIterator input, OutputIterator output,
137 SelectLambda select_lambda)
140 using IndexType =
Int32;
145 auto multiple_getter_func = [=](
Int32 input_index,
Int32 nb_value) -> IndexType {
146 IndexType partial_value = 0;
147 for (
Int32 x = 0; x < nb_value; ++x) {
148 const Int32 index = x + input_index;
149 bool is_select = select_lambda(input[index]);
150 select_flags_view[index] = is_select;
154 return partial_value;
157 auto multiple_setter_func = [=](IndexType partial_value,
Int32 input_index,
Int32 nb_value) {
158 for (
Int32 x = 0; x < nb_value; ++x) {
159 const Int32 index = x + input_index;
160 if (select_flags_view[index]) {
161 output[partial_value] = input[index];
168 const Int32 nb_block = 10;
172 SmallArray<Int32> partial_values(nb_block, 0);
173 Span<Int32> out_partial_values = partial_values;
175 auto partial_value_func = [=](
Int32 a,
Int32 n) {
176 for (
Int32 i = 0; i < n; ++i) {
177 Int32 interval_index = i + a;
179 Int32 input_index = 0;
180 Int32 nb_value_in_interval = 0;
181 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
183 out_partial_values[interval_index] = multiple_getter_func(input_index, nb_value_in_interval);
187 ParallelLoopOptions loop_options(run_info.options().value_or(ParallelLoopOptions{}));
188 loop_options.setGrainSize(1);
189 run_info.addOptions(loop_options);
197 for (
Int32 i = 0; i < nb_block; ++i) {
198 Int32 x = partial_values[i];
200 partial_values[i] = nb_filter;
204 for (
Int32 i = 0; i < n; ++i) {
205 Int32 interval_index = i + a;
207 IndexType partial_value = 0;
208 if (interval_index > 0)
209 partial_value = out_partial_values[interval_index - 1];
211 Int32 input_index = 0;
212 Int32 nb_value_in_interval = 0;
213 _subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
215 multiple_setter_func(partial_value, input_index, nb_value_in_interval);
222 filter_func(0, nb_block);
231 template <
typename SizeType>
232 static void _subInterval(SizeType size, SizeType interval_index, SizeType nb_interval,
233 SizeType* out_begin_index, SizeType* out_interval_size)
235 *out_begin_index = 0;
236 *out_interval_size = 0;
237 if (nb_interval <= 0)
239 if (interval_index < 0 || interval_index >= nb_interval)
241 SizeType isize = size / nb_interval;
242 SizeType ibegin = interval_index * isize;
244 if ((interval_index + 1) == nb_interval)
245 isize = size - ibegin;
246 *out_begin_index = ibegin;
247 *out_interval_size = isize;