14#include "arccore/common/accelerator/RunCommandLaunchInfo.h"
16#include "arccore/base/FatalErrorException.h"
17#include "arccore/base/CheckedConvert.h"
18#include "arccore/base/ConcurrencyBase.h"
20#include "arccore/common/accelerator/KernelLaunchArgs.h"
21#include "arccore/common/accelerator/RunCommand.h"
22#include "arccore/common/accelerator/NativeStream.h"
23#include "arccore/common/accelerator/internal/RunQueueImpl.h"
24#include "arccore/common/accelerator/internal/IRunnerRuntime.h"
29namespace Arcane::Accelerator::Impl
35void RunCommandLaunchInfo::
38 m_queue_impl = m_command._internalQueueImpl();
39 m_exec_policy = m_queue_impl->executionPolicy();
43 _computeInitialKernelLaunchArgs();
44 m_command._allocateReduceMemory(m_kernel_launch_args.nbBlockPerGrid());
52RunCommandLaunchInfo(RunCommand& command,
Int64 total_loop_size)
54, m_total_loop_size(total_loop_size)
63RunCommandLaunchInfo(RunCommand& command, Int64 total_loop_size,
bool is_cooperative)
65, m_total_loop_size(total_loop_size)
66, m_is_cooperative_launch(is_cooperative)
75~RunCommandLaunchInfo()
85void RunCommandLaunchInfo::
90 m_has_exec_begun =
true;
91 m_command._internalNotifyBeginLaunchKernel();
93 _computeLoopRunInfo();
104void RunCommandLaunchInfo::
107 if (!m_has_exec_begun)
108 ARCCORE_FATAL(
"beginExecute() has to be called before endExecute()");
109 _doEndKernelLaunch();
115void RunCommandLaunchInfo::
118 if (m_is_notify_end_kernel_done)
120 m_is_notify_end_kernel_done =
true;
121 m_command._internalNotifyEndLaunchKernel();
123 Impl::RunQueueImpl* q = m_queue_impl;
124 if (!q->isAsync() || m_is_need_barrier)
125 q->_internalBarrier();
131NativeStream RunCommandLaunchInfo::
132_internalNativeStream()
134 return m_command._internalNativeStream();
143void RunCommandLaunchInfo::
144_computeInitialKernelLaunchArgs()
146 int threads_per_block = m_command.nbThreadPerBlock();
147 if (threads_per_block<=0)
148 threads_per_block = 256;
149 Int64 big_b = (m_total_loop_size + threads_per_block - 1) / threads_per_block;
150 int blocks_per_grid = CheckedConvert::toInt32(big_b);
151 m_kernel_launch_args = KernelLaunchArgs(blocks_per_grid, threads_per_block);
152 m_kernel_launch_args.setSharedMemorySize(m_command._sharedMemory());
153 m_kernel_launch_args.setIsCooperative(m_is_cooperative_launch);
160computeParallelLoopOptions()
const
163 const bool use_dynamic_compute =
true;
167 if (use_dynamic_compute && opt.
grainSize() == 0) {
173 Int32 grain_size =
static_cast<Int32>((double)m_total_loop_size / (nb_thread * 2.0));
186void RunCommandLaunchInfo::
190 m_loop_run_info =
ForLoopRunInfo(computeParallelLoopOptions(), lti);
191 m_loop_run_info.setExecStat(m_command._internalCommandExecStat());
205KernelLaunchArgs RunCommandLaunchInfo::
206_computeKernelLaunchArgs(
const void* func)
const
208 Impl::IRunnerRuntime* r = m_queue_impl->_internalRuntime();
210 return r->computeKernalLaunchArgs(m_kernel_launch_args, func,
217void RunCommandLaunchInfo::
218_addSyclEvent(
void* sycl_event_ptr)
220 m_command._internalNotifyBeginLaunchKernelSyclEvent(sycl_event_ptr);
226bool RunCommandLaunchInfo::
227_isUseCooperativeLaunch()
const
230 return m_is_cooperative_launch;
235bool RunCommandLaunchInfo::
236_isUseCudaLaunchKernel()
const
245void RunCommandLaunchInfo::
246_setIsNeedBarrier(
bool v)
248 m_is_need_barrier = v;
#define ARCCORE_FATAL(...)
Macro envoyant une exception FatalErrorException.
static Int32 maxAllowedThread()
Nombre maximum de threads autorisés pour le multi-threading.
Informations d'exécution d'une boucle.
Informations de trace pour une boucle 'for'.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
@ Thread
Politique d'exécution multi-thread.
bool isAcceleratorPolicy(eExecutionPolicy exec_policy)
Indique si exec_policy correspond à un accélérateur.
std::int64_t Int64
Type entier signé sur 64 bits.
std::int32_t Int32
Type entier signé sur 32 bits.