14#include "arcane/accelerator/core/RunCommandLaunchInfo.h"
16#include "arcane/utils/CheckedConvert.h"
17#include "arcane/utils/PlatformUtils.h"
19#include "arcane/accelerator/core/RunQueue.h"
20#include "arcane/accelerator/core/internal/IRunQueueStream.h"
21#include "arcane/accelerator/core/internal/RunQueueImpl.h"
22#include "arcane/accelerator/core/NativeStream.h"
27namespace Arcane::Accelerator::impl
34RunCommandLaunchInfo(RunCommand& command,
Int64 total_loop_size)
36, m_total_loop_size(total_loop_size)
38 m_queue_impl = m_command._internalQueueImpl();
39 m_exec_policy = m_queue_impl->executionPolicy();
43 m_kernel_launch_args = _computeKernelLaunchArgs();
44 m_command._allocateReduceMemory(m_kernel_launch_args.nbBlockPerGrid());
52~RunCommandLaunchInfo()
62void RunCommandLaunchInfo::
67 m_has_exec_begun =
true;
68 m_command._internalNotifyBeginLaunchKernel();
70 _computeLoopRunInfo();
81void RunCommandLaunchInfo::
84 if (!m_has_exec_begun)
85 ARCANE_FATAL(
"beginExecute() has to be called before endExecute()");
92void RunCommandLaunchInfo::
95 if (m_is_notify_end_kernel_done)
97 m_is_notify_end_kernel_done =
true;
98 m_command._internalNotifyEndLaunchKernel();
100 impl::RunQueueImpl* q = m_queue_impl;
102 q->_internalBarrier();
108NativeStream RunCommandLaunchInfo::
109_internalNativeStream()
111 return m_command._internalNativeStream();
119KernelLaunchArgs RunCommandLaunchInfo::
120_computeKernelLaunchArgs()
const
122 int threads_per_block = m_command.nbThreadPerBlock();
123 if (threads_per_block<=0)
124 threads_per_block = 256;
125 Int64 big_b = (m_total_loop_size + threads_per_block - 1) / threads_per_block;
127 return { blocks_per_grid, threads_per_block };
134computeParallelLoopOptions()
const
137 const bool use_dynamic_compute =
true;
141 if (use_dynamic_compute && opt.
grainSize() == 0) {
147 Int32 grain_size =
static_cast<Int32>((double)m_total_loop_size / (nb_thread * 2.0));
160void RunCommandLaunchInfo::
164 m_loop_run_info =
ForLoopRunInfo(computeParallelLoopOptions(), lti);
165 m_loop_run_info.setExecStat(m_command._internalCommandExecStat());
171KernelLaunchArgs RunCommandLaunchInfo::
172_threadBlockInfo([[maybe_unused]]
const void* func,[[maybe_unused]]
Int64 shared_memory_size)
const
174 return m_kernel_launch_args;
180void RunCommandLaunchInfo::
181_addSyclEvent(
void* sycl_event_ptr)
183 m_command._internalNotifyBeginLaunchKernelSyclEvent(sycl_event_ptr);
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Informations d'exécution d'une boucle.
Informations de trace pour une boucle 'for'.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
static Int32 nbAllowedThread()
Nombre de threads utilisés au maximum pour gérer les tâches.
@ Thread
Politique d'exécution multi-thread.
bool isAcceleratorPolicy(eExecutionPolicy exec_policy)
Indique si exec_policy correspond à un accélérateur.
Int32 toInt32(Int64 v)
Converti un Int64 en un Int32.
std::int64_t Int64
Type entier signé sur 64 bits.
std::int32_t Int32
Type entier signé sur 32 bits.