8#include <gtest/gtest.h>
10#include "arccore/base/PlatformUtils.h"
11#include "arccore/base/ForLoopRanges.h"
13#include "arccore/common/accelerator/Runner.h"
14#include "arccore/common/accelerator/RunQueue.h"
15#include "arccore/common/NumArray.h"
17#define ARCCORE_EXPERIMENTAL_GRID_STRIDE
27#include "arccore/accelerator/internal/Initializer.h"
40 Int32 nb_value, Int32 nb_part, Int32 nb_loop,
bool is_async)
44 if ((nb_value % nb_part) != 0)
45 ARCCORE_FATAL(
"{0} is not a multiple of {1}", nb_value, nb_part);
51 for (
int j = 0; j < nb_loop; ++j) {
54 command.addNbThreadPerBlock(nb_thread);
55 command.addNbStride(nb_part);
58 reducer.combine(c_view[iter]);
60 Int64 tx = reducer.reducedValue();
65 Int64 nb_byte = c.
size() *
sizeof(Int64) * nb_loop;
67 Real nb_giga_byte_second = (
static_cast<Real
>(nb_byte) / 1.0e9) / diff;
68 std::cout <<
"** TotalReduceGridStride=" << total_x <<
" async?=" << is_async
69 <<
" nb_part=" << nb_part <<
" nb_value=" << nb_value
70 <<
" nb_thread=" << nb_thread
71 <<
" GB/s=" << nb_giga_byte_second <<
" time=" << diff <<
"\n";
#define ARCCORE_FATAL(...)
Macro throwing a FatalErrorException.
Types and functions for managing synchronizations on accelerators.
Types and macros for managing loops on accelerators.
#define RUNCOMMAND_LOOP1(iter_name, x1,...)
1D loop on accelerator with additional arguments.
Class to perform a 'sum' reduction.
Execution queue for an accelerator.
void setAsync(bool v)
Sets the instance's asynchronous state.
bool isAcceleratorPolicy() const
Indicates if the instance is associated with an accelerator.
View of an array of elements of type T.
constexpr __host__ __device__ SizeType size() const noexcept
Returns the size of the array.
Namespace for accelerator usage.
RunCommand makeCommand(const RunQueue &run_queue)
Creates a command associated with the queue run_queue.
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --