121 template <
typename InputIterator,
typename ReduceOperator>
123 InputIterator input_iter, ReduceOperator reduce_op,
const TraceInfo& trace_info)
127 command << trace_info;
131 switch (exec_policy) {
132#if defined(ARCANE_COMPILING_CUDA)
134 size_t temp_storage_size = 0;
135 cudaStream_t stream = impl::CudaUtils::toNativeStream(queue);
136 DataType* reduced_value_ptr =
nullptr;
138 ARCANE_CHECK_CUDA(::cub::DeviceReduce::Reduce(
nullptr, temp_storage_size, input_iter, reduced_value_ptr,
139 nb_item, reduce_op, init_value, stream));
141 s.m_algo_storage.allocate(temp_storage_size);
142 reduced_value_ptr = s.m_device_reduce_storage.allocate();
143 ARCANE_CHECK_CUDA(::cub::DeviceReduce::Reduce(s.m_algo_storage.address(), temp_storage_size,
144 input_iter, reduced_value_ptr, nb_item,
145 reduce_op, init_value, stream));
146 s.m_device_reduce_storage.copyToAsync(s.m_host_reduce_storage, queue);
149#if defined(ARCANE_COMPILING_HIP)
151 size_t temp_storage_size = 0;
152 hipStream_t stream = impl::HipUtils::toNativeStream(queue);
153 DataType* reduced_value_ptr =
nullptr;
155 ARCANE_CHECK_HIP(rocprim::reduce(
nullptr, temp_storage_size, input_iter, reduced_value_ptr, init_value,
156 nb_item, reduce_op, stream));
158 s.m_algo_storage.allocate(temp_storage_size);
159 reduced_value_ptr = s.m_device_reduce_storage.allocate();
161 ARCANE_CHECK_HIP(rocprim::reduce(s.m_algo_storage.address(), temp_storage_size, input_iter, reduced_value_ptr, init_value,
162 nb_item, reduce_op, stream));
163 s.m_device_reduce_storage.copyToAsync(s.m_host_reduce_storage, queue);
166#if defined(ARCANE_COMPILING_SYCL)
171 ReducerType reducer(command2);
175 reducer.combine(input_iter[i]);
178 s.m_host_reduce_storage[0] = reducer.reducedValue();
186 DataType reduced_value = init_value;
187 for (
Int32 i = 0; i < nb_item; ++i) {
188 reduced_value = reduce_op(reduced_value, *input_iter);
191 s.m_host_reduce_storage[0] = reduced_value;
280 template <
typename SelectLambda>
287 template <
typename SelectLambda>
294 template <
typename SelectLambda>
303 m_is_already_called =
false;
304 return this->_reducedValue();
309 bool m_is_already_called =
false;
313 template <
typename InputIterator,
typename ReduceOperator>
314 void _apply(Int32 nb_value, InputIterator input_iter, ReduceOperator reduce_op,
const TraceInfo& trace_info)
319 DataType init_value = reduce_op.defaultValue();
320 gf.apply(*base_ptr, nb_value, init_value, input_iter, reduce_op, trace_info);
323 template <
typename GetterLambda,
typename ReduceOperator>
324 void _applyWithIndex(Int32 nb_value,
const GetterLambda& getter_lambda,
325 ReduceOperator reduce_op,
const TraceInfo& trace_info)
328 impl::GenericReducerBase<DataType>* base_ptr =
this;
329 impl::GenericReducerIf<DataType> gf;
330 impl::GetterLambdaIterator<DataType, GetterLambda> input_iter(getter_lambda);
331 DataType init_value = reduce_op.defaultValue();
332 gf.apply(*base_ptr, nb_value, init_value, input_iter, reduce_op, trace_info);
337 if (m_is_already_called)
338 ARCANE_FATAL(
"apply() has already been called for this instance");
339 m_is_already_called =
true;