14#include "arccore/common/accelerator/internal/RunCommandImpl.h"
15#include "arccore/common/accelerator/internal/AcceleratorCoreGlobalInternal.h"
17#include "arccore/base/FatalErrorException.h"
18#include "arccore/base/ForLoopTraceInfo.h"
19#include "arccore/base/PlatformUtils.h"
20#include "arccore/base/Convert.h"
21#include "arccore/base/ConcurrencyBase.h"
23#include "arccore/common/accelerator/Runner.h"
24#include "arccore/common/accelerator/internal/IRunQueueEventImpl.h"
25#include "arccore/common/accelerator/internal/IRunQueueStream.h"
26#include "arccore/common/accelerator/internal/IRunnerRuntime.h"
27#include "arccore/common/accelerator/internal/RunQueueImpl.h"
28#include "arccore/common/accelerator/internal/ReduceMemoryImpl.h"
29#include "arccore/common/accelerator/internal/RunnerImpl.h"
34namespace Arcane::Accelerator::Impl
41RunCommandImpl(RunQueueImpl* queue)
43, m_execution_policy(queue->runner()->executionPolicy())
66 while (!m_reduce_memory_pool.empty()) {
67 delete m_reduce_memory_pool.top();
68 m_reduce_memory_pool.pop();
75IRunQueueEventImpl* RunCommandImpl::
78 if (m_use_sequential_timer_event)
79 return getSequentialRunQueueRuntime()->createEventImplWithTimer();
80 return runner()->_createEventWithTimer();
97 if (m_use_accelerator && !ProfilingRegistry::hasProfiling())
98 m_use_sequential_timer_event =
true;
100 m_start_event = _createEvent();
101 m_stop_event = _createEvent();
103 if (
auto v = Convert::Type<Int32>::tryParseFromEnvironment(
"ARCANE_ACCELERATOR_ALLOW_REUSE_COMMAND",
true))
104 m_is_allow_reuse_command = (v.value() != 0);
106 if (
auto v = Convert::Type<Int32>::tryParseFromEnvironment(
"ARCANE_ACCELERATOR_NB_GRID_STRIDE",
true)) {
110 m_default_nb_stride = n;
118RunCommandImpl* RunCommandImpl::
119create(RunQueueImpl* r)
121 RunCommandImpl* c = r->_internalCreateOrGetRunCommandImpl();
132notifyBeginLaunchKernel()
134 if (m_has_been_launched) {
135 if (!m_is_allow_reuse_command)
136 ARCCORE_FATAL(
"Command has already been launched. You can not re-use the same command.\n"
137 " You can temporarily allow it if you set environment variable\n"
138 " ARCANE_ACCELERATOR_ALLOW_REUSE_COMMAND to 1\n");
140 IRunQueueStream* stream = internalStream();
141 stream->notifyBeginLaunchKernel(*
this);
143 m_has_been_launched =
true;
144 if (m_use_profiling) {
145 m_start_event->recordQueue(stream);
146 m_begin_time = platform::getRealTimeNS();
147 m_loop_one_exec_stat_ptr = &m_loop_one_exec_stat;
148 m_loop_one_exec_stat.setBeginTime(m_begin_time);
160notifyEndLaunchKernel()
162 IRunQueueStream* stream = internalStream();
165 m_stop_event->recordQueue(stream);
166 stream->notifyEndLaunchKernel(*
this);
167 m_queue->_addRunningCommand(
this);
180notifyLaunchKernelSyclEvent(
void* sycl_event_ptr)
182 IRunQueueStream* stream = internalStream();
183 stream->_setSyclLastCommandEvent(sycl_event_ptr);
187 m_start_event->recordQueue(stream);
200notifyEndExecuteKernel()
203 if (!m_has_been_launched)
206 Int64 diff_time_ns = 0;
207 if (m_use_profiling){
208 diff_time_ns = m_stop_event->elapsedTime(m_start_event);
209 runner()->addTime((
double)diff_time_ns / 1.0e9);
212 ForLoopOneExecStat* exec_info = m_loop_one_exec_stat_ptr;
214 exec_info->setEndTime(m_begin_time + diff_time_ns);
216 ForLoopTraceInfo flti(traceInfo(), kernelName());
217 ProfilingRegistry::_threadLocalForLoopInstance()->merge(*exec_info, flti);
227 m_kernel_name = String();
228 m_trace_info = TraceInfo();
229 m_nb_thread_per_block = 0;
230 m_use_profiling = ProfilingRegistry::hasProfiling();
231 m_parallel_loop_options = ConcurrencyBase::defaultParallelLoopOptions();
233 m_loop_one_exec_stat.reset();
234 m_loop_one_exec_stat_ptr =
nullptr;
235 m_has_been_launched =
false;
236 m_has_living_run_command =
false;
237 m_may_be_put_in_pool =
false;
238 m_shared_memory_size = 0;
239 m_nb_stride = m_default_nb_stride;
245IReduceMemoryImpl* RunCommandImpl::
246getOrCreateReduceMemoryImpl()
248 ReduceMemoryImpl* p = _getOrCreateReduceMemoryImpl();
250 m_active_reduce_memory_list.insert(p);
259releaseReduceMemoryImpl(ReduceMemoryImpl* p)
261 auto x = m_active_reduce_memory_list.find(p);
262 if (x == m_active_reduce_memory_list.end())
264 m_active_reduce_memory_list.erase(x);
265 m_reduce_memory_pool.push(p);
271IRunQueueStream* RunCommandImpl::
272internalStream()
const
274 return m_queue->_internalStream();
280RunnerImpl* RunCommandImpl::
283 return m_queue->runner();
289ReduceMemoryImpl* RunCommandImpl::
290_getOrCreateReduceMemoryImpl()
294 if (!m_use_accelerator)
297 auto& pool = m_reduce_memory_pool;
300 ReduceMemoryImpl* p = pool.top();
304 return new ReduceMemoryImpl(
this);
313_notifyDestroyRunCommand()
317 if (!m_has_been_launched || m_may_be_put_in_pool)
318 m_queue->_putInCommandPool(
this);
#define ARCCORE_FATAL(...)
Macro envoyant une exception FatalErrorException.
bool isAcceleratorPolicy(eExecutionPolicy exec_policy)
Indique si exec_policy correspond à un accélérateur.