14#include "arcane/accelerator/RunCommandLaunchInfo.h"
16#include "arcane/utils/CheckedConvert.h"
17#include "arcane/utils/PlatformUtils.h"
19#include "arcane/accelerator/core/RunQueue.h"
20#include "arcane/accelerator/core/internal/IRunQueueStream.h"
25namespace Arcane::Accelerator::impl
32RunCommandLaunchInfo(RunCommand& command, Int64 total_loop_size)
34, m_total_loop_size(total_loop_size)
36 m_thread_block_info = _computeThreadBlockInfo();
44~RunCommandLaunchInfo()
54void RunCommandLaunchInfo::
57 const RunQueue& queue = m_command._internalQueue();
59 m_queue_stream = queue._internalStream();
60 m_runtime = queue._internalRuntime();
61 m_command._allocateReduceMemory(m_thread_block_info.nb_block_per_grid);
67void RunCommandLaunchInfo::
72 m_has_exec_begun =
true;
73 m_command._internalNotifyBeginLaunchKernel();
84void RunCommandLaunchInfo::
87 if (!m_has_exec_begun)
88 ARCANE_FATAL(
"beginExecute() has to be called before endExecute()");
95void RunCommandLaunchInfo::
98 if (m_is_notify_end_kernel_done)
100 m_is_notify_end_kernel_done =
true;
101 m_command._internalNotifyEndLaunchKernel();
103 const RunQueue& q = m_command._internalQueue();
111void* RunCommandLaunchInfo::
114 return m_queue_stream->_internalImpl();
121auto RunCommandLaunchInfo::
122_computeThreadBlockInfo() const -> ThreadBlockInfo
124 int threads_per_block = m_command.nbThreadPerBlock();
125 if (threads_per_block<=0)
126 threads_per_block = 256;
127 Int64 big_b = (m_total_loop_size + threads_per_block - 1) / threads_per_block;
128 int blocks_per_grid = CheckedConvert::toInt32(big_b);
129 return { blocks_per_grid, threads_per_block };
136computeParallelLoopOptions()
const
139 const bool use_dynamic_compute =
false;
142 if (use_dynamic_compute && opt.
grainSize() == 0) {
145 nb_thread = TaskFactory::nbAllowedThread();
148 Int32 grain_size =
static_cast<Int32>((double)m_total_loop_size / (nb_thread * 10.0));
157void RunCommandLaunchInfo::
160 if (m_has_exec_begun)
161 ARCANE_FATAL(
"computeLoopRunInfo() has to be called before beginExecute()");
163 m_loop_run_info =
ForLoopRunInfo(computeParallelLoopOptions(), lti);
164 m_loop_run_info.setExecStat(m_command._internalCommandExecStat());
170void RunCommandLaunchInfo::
171_addSyclEvent(
void* sycl_event_ptr)
173 m_command._internalNotifyBeginLaunchKernelSyclEvent(sycl_event_ptr);
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
File d'exécution pour un accélérateur.
bool isAsync() const
Indique si la file d'exécution est asynchrone.
void barrier() const
Bloque tant que toutes les commandes associées à la file ne sont pas terminées.
eExecutionPolicy executionPolicy() const
Politique d'exécution de la file.
Informations d'exécution d'une boucle.
Informations de trace pour une boucle 'for'.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
std::int32_t Int32
Type entier signé sur 32 bits.