14#include "arcane/accelerator/core/RunCommandLaunchInfo.h"
16#include "arcane/utils/CheckedConvert.h"
17#include "arcane/utils/PlatformUtils.h"
19#include "arcane/accelerator/core/RunQueue.h"
20#include "arcane/accelerator/core/internal/IRunQueueStream.h"
21#include "arcane/accelerator/core/internal/RunQueueImpl.h"
22#include "arcane/accelerator/core/NativeStream.h"
27namespace Arcane::Accelerator::impl
34RunCommandLaunchInfo(RunCommand& command, Int64 total_loop_size)
36, m_total_loop_size(total_loop_size)
38 m_queue_impl = m_command._internalQueueImpl();
39 m_exec_policy = m_queue_impl->executionPolicy();
43 m_kernel_launch_args = _computeKernelLaunchArgs();
44 m_command._allocateReduceMemory(m_kernel_launch_args.nbBlockPerGrid());
52~RunCommandLaunchInfo()
62void RunCommandLaunchInfo::
67 m_has_exec_begun =
true;
68 m_command._internalNotifyBeginLaunchKernel();
69 if (m_exec_policy == eExecutionPolicy::Thread)
70 _computeLoopRunInfo();
81void RunCommandLaunchInfo::
84 if (!m_has_exec_begun)
85 ARCANE_FATAL(
"beginExecute() has to be called before endExecute()");
92void RunCommandLaunchInfo::
95 if (m_is_notify_end_kernel_done)
97 m_is_notify_end_kernel_done =
true;
98 m_command._internalNotifyEndLaunchKernel();
100 impl::RunQueueImpl* q = m_queue_impl;
102 q->_internalBarrier();
108NativeStream RunCommandLaunchInfo::
109_internalNativeStream()
111 return m_command._internalNativeStream();
119KernelLaunchArgs RunCommandLaunchInfo::
120_computeKernelLaunchArgs()
const
122 int threads_per_block = m_command.nbThreadPerBlock();
123 if (threads_per_block<=0)
124 threads_per_block = 256;
125 Int64 big_b = (m_total_loop_size + threads_per_block - 1) / threads_per_block;
126 int blocks_per_grid = CheckedConvert::toInt32(big_b);
127 return { blocks_per_grid, threads_per_block };
134computeParallelLoopOptions()
const
137 const bool use_dynamic_compute =
true;
141 if (use_dynamic_compute && opt.
grainSize() == 0) {
144 nb_thread = TaskFactory::nbAllowedThread();
147 Int32 grain_size =
static_cast<Int32>((double)m_total_loop_size / (nb_thread * 2.0));
160void RunCommandLaunchInfo::
164 m_loop_run_info =
ForLoopRunInfo(computeParallelLoopOptions(), lti);
165 m_loop_run_info.setExecStat(m_command._internalCommandExecStat());
171KernelLaunchArgs RunCommandLaunchInfo::
172_threadBlockInfo([[maybe_unused]]
const void* func,[[maybe_unused]] Int64 shared_memory_size)
const
174 return m_kernel_launch_args;
180void RunCommandLaunchInfo::
181_addSyclEvent(
void* sycl_event_ptr)
183 m_command._internalNotifyBeginLaunchKernelSyclEvent(sycl_event_ptr);
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Informations d'exécution d'une boucle.
Informations de trace pour une boucle 'for'.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
bool isAcceleratorPolicy(eExecutionPolicy exec_policy)
Indique si exec_policy correspond à un accélérateur.
std::int32_t Int32
Type entier signé sur 32 bits.