205 static ARCCORE_DEVICE DataType
208 _applyDevice(dev_info);
211 static DataType apply(DataType* ptr, DataType v)
213 std::atomic_ref<DataType> aref(*ptr);
214 DataType prev_value = aref.load();
215 while (prev_value < v && !aref.compare_exchange_weak(prev_value, v)) {
219#if defined(ARCANE_COMPILING_SYCL)
220 static sycl::maximum<DataType> syclFunctor() {
return {}; }
240 static ARCCORE_DEVICE DataType
243 _applyDevice(dev_info);
246 static DataType apply(DataType* vptr, DataType v)
248 std::atomic_ref<DataType> aref(*vptr);
249 DataType prev_value = aref.load();
250 while (prev_value > v && !aref.compare_exchange_weak(prev_value, v)) {
254#if defined(ARCANE_COMPILING_SYCL)
255 static sycl::minimum<DataType> syclFunctor() {
return {}; }
302class HostDeviceReducerBase
306 HostDeviceReducerBase(RunCommand& command)
308 , m_command(&command)
311 m_is_master_instance =
true;
312 m_identity = ReduceFunctor::identity();
313 m_local_value = m_identity;
314 m_atomic_value = m_identity;
315 m_atomic_parent_value = &m_atomic_value;
317 m_memory_impl = impl::internalGetOrCreateReduceMemoryImpl(&command);
320 m_grid_memory_info = m_memory_impl->gridMemoryInfo();
326#if defined(__INTEL_LLVM_COMPILER) && defined(__SYCL_DEVICE_ONLY__)
327 HostDeviceReducerBase(
const HostDeviceReducerBase& rhs) =
default;
329 ARCCORE_HOST_DEVICE HostDeviceReducerBase(
const HostDeviceReducerBase& rhs)
331 , m_local_value(rhs.m_local_value)
332 , m_identity(rhs.m_identity)
334#ifdef ARCCORE_DEVICE_CODE
335 m_grid_memory_info = rhs.m_grid_memory_info;
340 m_memory_impl = rhs.m_memory_impl;
342 m_grid_memory_info = m_memory_impl->gridMemoryInfo();
345 m_atomic_parent_value = rhs.m_atomic_parent_value;
346 m_local_value = rhs.m_identity;
347 m_atomic_value = m_identity;
356 ARCCORE_HOST_DEVICE HostDeviceReducerBase(HostDeviceReducerBase&& rhs) =
delete;
357 HostDeviceReducerBase& operator=(
const HostDeviceReducerBase& rhs) =
delete;
361 ARCCORE_HOST_DEVICE
void setValue(DataType v)
365 ARCCORE_HOST_DEVICE DataType localValue()
const
367 return m_local_value;
372 impl::IReduceMemoryImpl* m_memory_impl =
nullptr;
380 impl::IReduceMemoryImpl::GridMemoryInfo m_grid_memory_info;
384 RunCommand* m_command =
nullptr;
388 mutable DataType m_local_value;
389 DataType* m_atomic_parent_value =
nullptr;
390 mutable DataType m_atomic_value;
396 bool m_is_master_instance =
false;
403 if (!m_is_master_instance)
404 ARCANE_FATAL(
"Final reduce operation is only valid on master instance");
408 m_memory_impl->copyReduceValueFromDevice();
409 final_ptr =
reinterpret_cast<DataType*
>(m_grid_memory_info.m_host_memory_for_reduced_value);
410 m_memory_impl->release();
411 m_memory_impl =
nullptr;
414 if (m_atomic_parent_value) {
418 ReduceFunctor::apply(m_atomic_parent_value, *final_ptr);
419 *final_ptr = *m_atomic_parent_value;
431 ARCCORE_HOST_DEVICE
void
434#ifdef ARCCORE_DEVICE_CODE
438 auto buf_span = m_grid_memory_info.m_grid_memory_values.bytes();
439 DataType* buf =
reinterpret_cast<DataType*
>(buf_span.data());
450 ReduceFunctor::applyDevice(dvi);
458 if (!m_is_master_instance)
459 ReduceFunctor::apply(m_atomic_parent_value, m_local_value);
475class HostDeviceReducer
476:
public HostDeviceReducerBase<DataType, ReduceFunctor>
480 using BaseClass = HostDeviceReducerBase<DataType, ReduceFunctor>;
484 explicit HostDeviceReducer(RunCommand& command)
487 HostDeviceReducer(
const HostDeviceReducer& rhs) =
default;
488 ARCCORE_HOST_DEVICE ~HostDeviceReducer()
500 DataType reducedValue()
512class HostDeviceReducer2
513:
public HostDeviceReducerBase<DataType, ReduceFunctor>
519 using BaseClass = HostDeviceReducerBase<DataType, ReduceFunctor>;
520 using BaseClass::m_grid_memory_info;
522 using BaseClass::m_local_value;
528 explicit HostDeviceReducer2(RunCommand& command)
534 DataType reducedValue()
542#if defined(ARCANE_COMPILING_SYCL)
543 void _internalExecWorkItemAtEnd(sycl::nd_item<1>
id)
545 unsigned int* atomic_counter_ptr = m_grid_memory_info.m_grid_device_count;
546 const Int32 local_id =
static_cast<Int32>(
id.get_local_id(0));
547 const Int32 group_id =
static_cast<Int32>(
id.get_group_linear_id());
548 const Int32 nb_block =
static_cast<Int32>(
id.get_group_range(0));
550 auto buf_span = m_grid_memory_info.m_grid_memory_values.bytes();
551 DataType* buf =
reinterpret_cast<DataType*
>(buf_span.data());
554 DataType v = m_local_value;
555 bool is_last =
false;
556 auto sycl_functor = ReduceFunctor::syclFunctor();
557 DataType local_sum = sycl::reduce_over_group(
id.get_group(), v, sycl_functor);
559 grid_buffer[group_id] = local_sum;
568#if defined(__ADAPTIVECPP__)
569 int* atomic_counter_ptr_as_int =
reinterpret_cast<int*
>(atomic_counter_ptr);
570 sycl::atomic_ref<int, sycl::memory_order::relaxed, sycl::memory_scope::device> a(*atomic_counter_ptr_as_int);
572 sycl::atomic_ref<unsigned int, sycl::memory_order::relaxed, sycl::memory_scope::device> a(*atomic_counter_ptr);
574 Int32 cx = a.fetch_add(1);
575 if (cx == (nb_block - 1))
582 DataType my_total = grid_buffer[0];
583 for (
int x = 1; x < nb_block; ++x)
584 my_total = sycl_functor(my_total, grid_buffer[x]);
586 grid_buffer[0] = my_total;
588 *atomic_counter_ptr = 0;
641:
public Reducer<DataType, impl::ReduceFunctorSum<DataType>>
643 using BaseClass = Reducer<DataType, impl::ReduceFunctorSum<DataType>>;
644 using BaseClass::m_local_value;
648 explicit ReducerSum(RunCommand& command)
654 ARCCORE_HOST_DEVICE DataType combine(DataType v)
const
657 return m_local_value;
660 ARCCORE_HOST_DEVICE DataType add(DataType v)
const
673:
public Reducer<DataType, impl::ReduceFunctorMax<DataType>>
675 using BaseClass = Reducer<DataType, impl::ReduceFunctorMax<DataType>>;
676 using BaseClass::m_local_value;
680 explicit ReducerMax(RunCommand& command)
686 ARCCORE_HOST_DEVICE DataType combine(DataType v)
const
688 m_local_value = v > m_local_value ? v : m_local_value;
689 return m_local_value;
692 ARCCORE_HOST_DEVICE DataType max(DataType v)
const
708:
public Reducer<DataType, impl::ReduceFunctorMin<DataType>>
710 using BaseClass = Reducer<DataType, impl::ReduceFunctorMin<DataType>>;
711 using BaseClass::m_local_value;
715 explicit ReducerMin(RunCommand& command)
721 ARCCORE_HOST_DEVICE DataType combine(DataType v)
const
723 m_local_value = v < m_local_value ? v : m_local_value;
724 return m_local_value;
727 ARCCORE_HOST_DEVICE DataType min(DataType v)
const
743:
public HostDeviceReducer2<DataType, impl::ReduceFunctorSum<DataType>>
745 using BaseClass = HostDeviceReducer2<DataType, impl::ReduceFunctorSum<DataType>>;
749 explicit ReducerSum2(RunCommand& command)
755 ARCCORE_HOST_DEVICE
void combine(DataType v)
757 this->m_local_value += v;
768:
public HostDeviceReducer2<DataType, impl::ReduceFunctorMax<DataType>>
770 using BaseClass = HostDeviceReducer2<DataType, impl::ReduceFunctorMax<DataType>>;
774 explicit ReducerMax2(RunCommand& command)
780 ARCCORE_HOST_DEVICE
void combine(DataType v)
782 DataType& lv = this->m_local_value;
783 lv = v > lv ? v : lv;
794:
public HostDeviceReducer2<DataType, impl::ReduceFunctorMin<DataType>>
796 using BaseClass = HostDeviceReducer2<DataType, impl::ReduceFunctorMin<DataType>>;
800 explicit ReducerMin2(RunCommand& command)
806 ARCCORE_HOST_DEVICE
void combine(DataType v)
808 DataType& lv = this->m_local_value;
809 lv = v < lv ? v : lv;
823 template <
typename DataType,
typename ReduceFunctor>
828 template <
typename DataType,
typename ReduceFunctor>
835 template <
typename DataType,
typename ReduceFunctor>
836 static ARCCORE_DEVICE
void
841 template <
typename DataType,
typename ReduceFunctor>
842 static ARCCORE_DEVICE
void
848#if defined(ARCANE_COMPILING_SYCL)
849 template <
typename DataType,
typename ReduceFunctor>
854 template <
typename DataType,
typename ReduceFunctor>
858 reducer._internalExecWorkItemAtEnd(
id);
Classe pour gérer les arguments de type HostDeviceReducer2 en début et fin d'exécution des noyaux.