85 template <
typename CompareLambda,
typename InputIterator,
typename OutputIterator>
87 OutputIterator output_iter,
const CompareLambda& compare_lambda)
89 RunQueue queue = s.m_queue;
90 eExecutionPolicy exec_policy = queue.executionPolicy();
91 switch (exec_policy) {
92#if defined(ARCANE_COMPILING_CUDA)
93 case eExecutionPolicy::CUDA: {
94 size_t temp_storage_size = 0;
95 cudaStream_t stream = Impl::CudaUtils::toNativeStream(&queue);
97 ARCANE_CHECK_CUDA(::cub::DeviceMergeSort::SortKeysCopy(
nullptr, temp_storage_size,
98 input_iter, output_iter, nb_item,
99 compare_lambda, stream));
101 s.m_algo_storage.allocate(temp_storage_size);
102 ARCANE_CHECK_CUDA(::cub::DeviceMergeSort::SortKeysCopy(s.m_algo_storage.address(), temp_storage_size,
103 input_iter, output_iter, nb_item,
104 compare_lambda, stream));
107#if defined(ARCANE_COMPILING_HIP)
108 case eExecutionPolicy::HIP: {
109 size_t temp_storage_size = 0;
110 hipStream_t stream = Impl::HipUtils::toNativeStream(&queue);
112 ARCANE_CHECK_HIP(rocprim::merge_sort(
nullptr, temp_storage_size, input_iter, output_iter,
113 nb_item, compare_lambda, stream));
115 s.m_algo_storage.allocate(temp_storage_size);
117 ARCANE_CHECK_HIP(rocprim::merge_sort(s.m_algo_storage.address(), temp_storage_size, input_iter, output_iter,
118 nb_item, compare_lambda, stream));
121#if defined(ARCANE_COMPILING_SYCL)
122 case eExecutionPolicy::SYCL: {
125 auto command = makeCommand(queue);
129 *(output_iter + i) = *(input_iter + i);
132#if defined(ARCANE_HAS_ONEDPL)
133 sycl::queue true_queue = AcceleratorUtils::toSyclNativeStream(queue);
134 auto policy = oneapi::dpl::execution::make_device_policy(true_queue);
135 oneapi::dpl::sort(policy, output_iter, output_iter + nb_item, compare_lambda);
136#elif defined(__ADAPTIVECPP__)
137 sycl::queue true_queue = AcceleratorUtils::toSyclNativeStream(queue);
138 sycl::event e = acpp::algorithms::sort(true_queue, output_iter, output_iter + nb_item, compare_lambda);
145 case eExecutionPolicy::Thread:
148 case eExecutionPolicy::Sequential: {
150 auto output_iter_begin = output_iter;
151 for (
Int32 i = 0; i < nb_item; ++i) {
152 *output_iter = *input_iter;
156 std::sort(output_iter_begin, output_iter, compare_lambda);