8#include <gtest/gtest.h>
10#include "arccore/base/PlatformUtils.h"
12#include "arccore/common/accelerator/Runner.h"
13#include "arccore/common/accelerator/RunQueue.h"
14#include "arccore/common/NumArray.h"
19#include "arccore/accelerator/internal/Initializer.h"
32 Int32 nb_value, Int32 nb_part, Int32 nb_loop,
bool is_async)
36 if ((nb_value % nb_part) != 0)
37 ARCCORE_FATAL(
"{0} is not a multiple of {1}", nb_value, nb_part);
38 Int32 nb_true_value = nb_value / nb_part;
39 Int32 offset = nb_true_value;
43 for (
int j = 0; j < nb_loop; ++j) {
46 command.addNbThreadPerBlock(nb_thread);
50 for (Int32 k = 0; k < nb_part; ++k)
51 reducer.combine(c_view[i + (offset * k)]);
53 Int64 tx = reducer.reducedValue();
58 Int64 nb_byte = c.
size() *
sizeof(Int64) * nb_loop;
60 Real nb_giga_byte_second = (
static_cast<Real
>(nb_byte) / 1.0e9) / diff;
61 std::cout <<
"** TotalReduceDirect=" << total_x <<
" async?=" << is_async
62 <<
" nb_part=" << nb_part <<
" nb_value=" << nb_value
63 <<
" nb_thread=" << nb_thread
64 <<
" GB/s=" << nb_giga_byte_second <<
" time=" << diff <<
"\n";
82 reducer.combine(iter);
84 Int64 tx = reducer.reducedValue();
87 std::cout <<
"** TotalReduceEmpty=" << total_x <<
"\n";
#define ARCCORE_FATAL(...)
Macro throwing a FatalErrorException.
Types and functions for managing synchronizations on accelerators.
Types and macros for managing loops on accelerators.
#define RUNCOMMAND_LOOP1(iter_name, x1,...)
1D loop on accelerator with additional arguments.
Class to perform a 'sum' reduction.
Execution queue for an accelerator.
void setAsync(bool v)
Sets the instance's asynchronous state.
View of an array of elements of type T.
constexpr __host__ __device__ SizeType size() const noexcept
Returns the size of the array.
Namespace for accelerator usage.
RunCommand makeCommand(const RunQueue &run_queue)
Creates a command associated with the queue run_queue.
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --