14#include "arcane/accelerator/core/RunCommandLaunchInfo.h"
16#include "arccore/base/FatalErrorException.h"
17#include "arccore/base/CheckedConvert.h"
18#include "arccore/base/ConcurrencyBase.h"
20#include "arcane/accelerator/core/RunCommand.h"
21#include "arcane/accelerator/core/internal/RunQueueImpl.h"
22#include "arcane/accelerator/core/NativeStream.h"
23#include "arcane/accelerator/core/internal/IRunnerRuntime.h"
28namespace Arcane::Accelerator::impl
35RunCommandLaunchInfo(RunCommand& command,
Int64 total_loop_size)
37, m_total_loop_size(total_loop_size)
39 m_queue_impl = m_command._internalQueueImpl();
40 m_exec_policy = m_queue_impl->executionPolicy();
44 m_kernel_launch_args = _computeKernelLaunchArgs();
45 m_command._allocateReduceMemory(m_kernel_launch_args.nbBlockPerGrid());
53~RunCommandLaunchInfo()
63void RunCommandLaunchInfo::
68 m_has_exec_begun =
true;
69 m_command._internalNotifyBeginLaunchKernel();
82void RunCommandLaunchInfo::
85 if (!m_has_exec_begun)
86 ARCANE_FATAL(
"beginExecute() has to be called before endExecute()");
93void RunCommandLaunchInfo::
96 if (m_is_notify_end_kernel_done)
98 m_is_notify_end_kernel_done =
true;
99 m_command._internalNotifyEndLaunchKernel();
109NativeStream RunCommandLaunchInfo::
110_internalNativeStream()
112 return m_command._internalNativeStream();
121_computeKernelLaunchArgs()
const
123 int threads_per_block = m_command.nbThreadPerBlock();
124 if (threads_per_block<=0)
125 threads_per_block = 256;
126 Int64 big_b = (m_total_loop_size + threads_per_block - 1) / threads_per_block;
127 int blocks_per_grid = CheckedConvert::toInt32(big_b);
128 return { blocks_per_grid, threads_per_block };
135computeParallelLoopOptions()
const
138 const bool use_dynamic_compute =
true;
142 if (use_dynamic_compute && opt.
grainSize() == 0) {
148 Int32 grain_size =
static_cast<Int32>((double)m_total_loop_size / (nb_thread * 2.0));
161void RunCommandLaunchInfo::
166 m_loop_run_info.setExecStat(m_command._internalCommandExecStat());
181_threadBlockInfo(
const void* func,
Int32 shared_memory_size)
const
183 return m_queue_impl->_internalRuntime()->computeKernalLaunchArgs(m_kernel_launch_args, func,
totalLoopSize(), shared_memory_size);
189void RunCommandLaunchInfo::
190_addSyclEvent(
void* sycl_event_ptr)
192 m_command._internalNotifyBeginLaunchKernelSyclEvent(sycl_event_ptr);
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Arguments pour lancer un kernel.
Int64 totalLoopSize() const
Taille totale de la boucle.
ParallelLoopOptions computeParallelLoopOptions() const
Calcule et retourne les informations pour les boucles multi-thread.
void _computeLoopRunInfo()
Calcule la valeur de m_loop_run_info.
File d'exécution pour accélérateur.
void _internalBarrier()
Bloque jusqu'à ce que toutes les commandes soient terminées.
static Int32 maxAllowedThread()
Nombre maximum de threads autorisés pour le multi-threading.
Informations d'exécution d'une boucle.
Informations de trace pour une boucle 'for'.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
@ Thread
Politique d'exécution multi-thread.
bool isAcceleratorPolicy(eExecutionPolicy exec_policy)
Indique si exec_policy correspond à un accélérateur.
std::int64_t Int64
Type entier signé sur 64 bits.
std::int32_t Int32
Type entier signé sur 32 bits.