86    command << trace_info;
 
   89    switch (exec_policy) {
 
   90#if defined(ARCANE_COMPILING_CUDA) 
   92      size_t temp_storage_size = 0;
 
   93      cudaStream_t stream = impl::CudaUtils::toNativeStream(&queue);
 
   95      int* nb_list1_ptr = 
nullptr;
 
   96      ARCANE_CHECK_CUDA(::cub::DevicePartition::If(
nullptr, temp_storage_size,
 
   97                                                   input_iter, output_iter, nb_list1_ptr, nb_item,
 
   98                                                   select_lambda, stream));
 
  100      s.m_algo_storage.allocate(temp_storage_size);
 
  101      nb_list1_ptr = s.m_device_nb_list1_storage.allocate();
 
  102      ARCANE_CHECK_CUDA(::cub::DevicePartition::If(s.m_algo_storage.address(), temp_storage_size,
 
  103                                                   input_iter, output_iter, nb_list1_ptr, nb_item,
 
  104                                                   select_lambda, stream));
 
  105      s.m_device_nb_list1_storage.
copyToAsync(s.m_host_nb_list1_storage, queue);
 
  108#if defined(ARCANE_COMPILING_HIP) 
  110      size_t temp_storage_size = 0;
 
  112      hipStream_t stream = impl::HipUtils::toNativeStream(&queue);
 
  113      int* nb_list1_ptr = 
nullptr;
 
  114      ARCANE_CHECK_HIP(rocprim::partition(
nullptr, temp_storage_size, input_iter, output_iter,
 
  115                                          nb_list1_ptr, nb_item, select_lambda, stream));
 
  117      s.m_algo_storage.allocate(temp_storage_size);
 
  118      nb_list1_ptr = s.m_device_nb_list1_storage.allocate();
 
  120      ARCANE_CHECK_HIP(rocprim::partition(s.m_algo_storage.address(), temp_storage_size, input_iter, output_iter,
 
  121                                          nb_list1_ptr, nb_item, select_lambda, stream));
 
  122      s.m_device_nb_list1_storage.
copyToAsync(s.m_host_nb_list1_storage, queue);
 
  125#if defined(ARCANE_COMPILING_SYCL) && defined(__INTEL_LLVM_COMPILER) 
  137      using InputDataType = 
typename InputIterator::value_type;
 
  138      using DataType = 
typename OutputIterator::value_type;
 
  148          tmp_output[i] = input_iter[i];
 
  151      auto tmp_select_lambda = [=](
Int32 i) { 
return tmp_select[i]; };
 
  152      sycl::queue sycl_queue = impl::SyclUtils::toNativeStream(queue);
 
  153      auto policy = oneapi::dpl::execution::make_device_policy(sycl_queue);
 
  154      auto output_after = oneapi::dpl::stable_partition(policy, tmp_output.begin(), tmp_output.end(), select_lambda);
 
  156      Int32 nb_list1 = (output_after - tmp_output.begin());
 
  157      Int32 nb_list2 = nb_item - nb_list1;
 
  158      s.m_host_nb_list1_storage[0] = nb_list1;
 
  167        Int32 nb_iter2 = (nb_list2 / 2) + (nb_list2 % 2);
 
  174            Int32 j = i - nb_list1;
 
  175            Int32 reverse_i = (nb_item - (j + 1));
 
  176            auto x1 = tmp_output[i];
 
  177            auto x2 = tmp_output[reverse_i];
 
  178            output_iter[i] = tmp_output[reverse_i];
 
  179            output_iter[reverse_i] = tmp_output[i];
 
  182            output_iter[i] = tmp_output[i];
 
  192      auto saved_output_iter = output_iter;
 
  193      auto output2_iter = output_iter + nb_item;
 
  194      for (
Int32 i = 0; i < nb_item; ++i) {
 
  195        auto v = *input_iter;
 
  196        if (select_lambda(v)) {
 
  206      Int32 nb_list1 = 
static_cast<Int32>(output_iter - saved_output_iter);
 
  207      s.m_host_nb_list1_storage[0] = nb_list1;
 
 
  222              InputIterator input_iter,
 
  223              FirstOutputIterator first_output_iter,
 
  224              SecondOutputIterator second_output_iter,
 
  225              UnselectedIterator unselected_iter,
 
  226              const Select1Lambda& select1_lambda,
 
  227              const Select2Lambda& select2_lambda,
 
  233    command << trace_info;
 
  236    switch (exec_policy) {
 
  237#if defined(ARCANE_COMPILING_CUDA) 
  239      size_t temp_storage_size = 0;
 
  240      cudaStream_t stream = impl::CudaUtils::toNativeStream(&queue);
 
  242      int* nb_list1_ptr = 
nullptr;
 
  243      ARCANE_CHECK_CUDA(::cub::DevicePartition::If(
nullptr, temp_storage_size,
 
  244                                                   input_iter, first_output_iter, second_output_iter,
 
  245                                                   unselected_iter, nb_list1_ptr, nb_item,
 
  246                                                   select1_lambda, select2_lambda, stream));
 
  248      s.m_algo_storage.allocate(temp_storage_size);
 
  249      nb_list1_ptr = s.m_device_nb_list1_storage.allocate();
 
  250      ARCANE_CHECK_CUDA(::cub::DevicePartition::If(s.m_algo_storage.address(), temp_storage_size,
 
  251                                                   input_iter, first_output_iter, second_output_iter,
 
  252                                                   unselected_iter, nb_list1_ptr, nb_item,
 
  253                                                   select1_lambda, select2_lambda, stream));
 
  254      s.m_device_nb_list1_storage.
copyToAsync(s.m_host_nb_list1_storage, queue);
 
  257#if defined(ARCANE_COMPILING_HIP) 
  259      size_t temp_storage_size = 0;
 
  261      hipStream_t stream = impl::HipUtils::toNativeStream(&queue);
 
  262      int* nb_list1_ptr = 
nullptr;
 
  263      using namespace rocprim;
 
  264      ARCANE_CHECK_HIP(::rocprim::partition_three_way(
nullptr, temp_storage_size, input_iter, first_output_iter,
 
  265                                                      second_output_iter, unselected_iter,
 
  266                                                      nb_list1_ptr, nb_item, select1_lambda, select2_lambda, stream));
 
  268      s.m_algo_storage.allocate(temp_storage_size);
 
  269      nb_list1_ptr = s.m_device_nb_list1_storage.allocate();
 
  271      ARCANE_CHECK_HIP(partition_three_way(s.m_algo_storage.address(), temp_storage_size, input_iter, first_output_iter,
 
  272                                           second_output_iter, unselected_iter, nb_list1_ptr, nb_item,
 
  273                                           select1_lambda, select2_lambda, stream));
 
  274      s.m_device_nb_list1_storage.
copyToAsync(s.m_host_nb_list1_storage, queue);
 
  283      for (
Int32 i = 0; i < nb_item; ++i) {
 
  284        auto v = *input_iter;
 
  285        bool is_1 = select1_lambda(v);
 
  286        bool is_2 = select2_lambda(v);
 
  288          *first_output_iter = v;
 
  294            *second_output_iter = v;
 
  295            ++second_output_iter;
 
  299            *unselected_iter = v;
 
  306      s.m_host_nb_list1_storage[0] = nb_first;
 
  307      s.m_host_nb_list1_storage[1] = nb_second;