14#include "arccore/common/accelerator/RunCommandLaunchInfo.h"
16#include "arccore/base/FatalErrorException.h"
17#include "arccore/base/CheckedConvert.h"
18#include "arccore/base/ConcurrencyBase.h"
20#include "arccore/common/accelerator/KernelLaunchArgs.h"
21#include "arccore/common/accelerator/RunCommand.h"
22#include "arccore/common/accelerator/NativeStream.h"
23#include "arccore/common/accelerator/internal/RunQueueImpl.h"
24#include "arccore/common/accelerator/internal/IRunnerRuntime.h"
25#include "arccore/common/accelerator/internal/RunCommandImpl.h"
30namespace Arcane::Accelerator::Impl
36void RunCommandLaunchInfo::
39 m_queue_impl = m_command._internalQueueImpl();
40 m_exec_policy = m_queue_impl->executionPolicy();
44 m_command._allocateReduceMemory(m_kernel_launch_args.nbBlockPerGrid());
46 m_is_forced_need_barrier = m_command.m_p->hasActiveReduction();
54RunCommandLaunchInfo(RunCommand& command,
Int64 total_loop_size)
56, m_total_loop_size(total_loop_size)
65RunCommandLaunchInfo(RunCommand& command, Int64 total_loop_size,
bool is_cooperative)
67, m_is_cooperative_launch(is_cooperative)
68, m_total_loop_size(total_loop_size)
77~RunCommandLaunchInfo() noexcept(false)
87void RunCommandLaunchInfo::
92 m_has_exec_begun =
true;
93 m_command._internalNotifyBeginLaunchKernel();
107void RunCommandLaunchInfo::
110 if (!m_has_exec_begun)
111 ARCCORE_FATAL(
"beginExecute() has to be called before endExecute()");
112 _doEndKernelLaunch();
118void RunCommandLaunchInfo::
121 if (m_is_notify_end_kernel_done)
123 m_is_notify_end_kernel_done =
true;
124 m_command._internalNotifyEndLaunchKernel();
127 if (!q->isAsync() || m_is_need_barrier || m_is_forced_need_barrier)
134NativeStream RunCommandLaunchInfo::
135_internalNativeStream()
137 return m_command._internalNativeStream();
147void RunCommandLaunchInfo::
148_computeInitialKernelLaunchArgs()
150 int threads_per_block = m_command.nbThreadPerBlock();
151 if (threads_per_block <= 0)
152 threads_per_block = 256;
153 Int64 big_b = (m_total_loop_size + threads_per_block - 1) / threads_per_block;
154 int blocks_per_grid = CheckedConvert::toInt32(big_b);
155 m_kernel_launch_args =
KernelLaunchArgs(blocks_per_grid, threads_per_block);
156 m_kernel_launch_args.setSharedMemorySize(m_command._sharedMemory());
157 m_kernel_launch_args.setIsCooperative(m_is_cooperative_launch);
164computeParallelLoopOptions()
const
167 const bool use_dynamic_compute =
true;
171 if (use_dynamic_compute && opt.
grainSize() == 0) {
177 Int32 grain_size =
static_cast<Int32>((double)m_total_loop_size / (nb_thread * 2.0));
191void RunCommandLaunchInfo::
196 m_loop_run_info.setExecStat(m_command._internalCommandExecStat());
212_computeKernelLaunchArgs(
const void* func)
const
216 return r->computeKernalLaunchArgs(m_kernel_launch_args, func,
223void RunCommandLaunchInfo::
224_addSyclEvent(
void* sycl_event_ptr)
226 m_command._internalNotifyBeginLaunchKernelSyclEvent(sycl_event_ptr);
232bool RunCommandLaunchInfo::
233_isUseCooperativeLaunch()
const
236 return m_is_cooperative_launch;
241bool RunCommandLaunchInfo::
242_isUseCudaLaunchKernel()
const
251void RunCommandLaunchInfo::
252_setIsNeedBarrier(
bool v)
254 m_is_need_barrier = v;
#define ARCCORE_FATAL(...)
Macro throwing a FatalErrorException.
Interface of the runtime associated with an accelerator.
Arguments for launching a kernel.
void _computeInitialKernelLaunchArgs()
Calculates the initial value of kernel block/thread/grid based on full_size.
Int64 totalLoopSize() const
Total loop size.
void _computeLoopRunInfo()
Calculates the value of m_loop_run_info.
ParallelLoopOptions computeParallelLoopOptions() const
Calculates and returns the information for multi-thread loops.
Execution queue for accelerator.
void _internalBarrier()
Blocks until all commands are finished.
static Int32 maxAllowedThread()
Maximum number of allowed threads for multi-threading.
Loop execution information.
Trace information for a 'for' loop.
Execution options for a parallel loop in multi-threading.
Integer grainSize() const
Size of an iteration interval.
Int32 maxThread() const
Maximum number of allowed threads.
void setGrainSize(Integer v)
Sets the size (approximate) of an iteration interval.
@ Thread
Multi-threaded execution policy.
bool isAcceleratorPolicy(eExecutionPolicy exec_policy)
Indicates if exec_policy corresponds to an accelerator.
std::int64_t Int64
Signed integer type of 64 bits.
std::int32_t Int32
Signed integer type of 32 bits.