Arcane  4.1.12.0
Developer documentation
Loading...
Searching...
No Matches
RunCommandImpl.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* RunCommandImpl.cc (C) 2000-2026 */
9/* */
10/* Implementation of command management on accelerator. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arccore/common/accelerator/internal/RunCommandImpl.h"
15#include "arccore/common/accelerator/internal/AcceleratorCoreGlobalInternal.h"
16
17#include "arccore/base/FatalErrorException.h"
18#include "arccore/base/ForLoopTraceInfo.h"
19#include "arccore/base/PlatformUtils.h"
20#include "arccore/base/Convert.h"
21#include "arccore/base/ConcurrencyBase.h"
22
23#include "arccore/common/accelerator/Runner.h"
24#include "arccore/common/accelerator/internal/IRunQueueEventImpl.h"
25#include "arccore/common/accelerator/internal/IRunQueueStream.h"
26#include "arccore/common/accelerator/internal/IRunnerRuntime.h"
27#include "arccore/common/accelerator/internal/RunQueueImpl.h"
28#include "arccore/common/accelerator/internal/ReduceMemoryImpl.h"
29#include "arccore/common/accelerator/internal/RunnerImpl.h"
30
31/*---------------------------------------------------------------------------*/
32/*---------------------------------------------------------------------------*/
33
34namespace Arcane::Accelerator::Impl
35{
36
37/*---------------------------------------------------------------------------*/
38/*---------------------------------------------------------------------------*/
39
40RunCommandImpl::
41RunCommandImpl(RunQueueImpl* queue)
42: m_queue(queue)
43, m_execution_policy(queue->runner()->executionPolicy())
44, m_use_accelerator(isAcceleratorPolicy(m_execution_policy))
45{
46 _init();
47}
48
49/*---------------------------------------------------------------------------*/
50/*---------------------------------------------------------------------------*/
51
52RunCommandImpl::
53~RunCommandImpl()
54{
55 _freePools();
56 delete m_start_event;
57 delete m_stop_event;
58}
59
60/*---------------------------------------------------------------------------*/
61/*---------------------------------------------------------------------------*/
62
63void RunCommandImpl::
64_freePools()
65{
66 while (!m_reduce_memory_pool.empty()) {
67 delete m_reduce_memory_pool.top();
68 m_reduce_memory_pool.pop();
69 }
70}
71
72/*---------------------------------------------------------------------------*/
73/*---------------------------------------------------------------------------*/
74
75IRunQueueEventImpl* RunCommandImpl::
76_createEvent()
77{
78 if (m_use_sequential_timer_event)
79 return getSequentialRunQueueRuntime()->createEventImplWithTimer();
80 return runner()->_createEventWithTimer();
81}
82
83/*---------------------------------------------------------------------------*/
84/*---------------------------------------------------------------------------*/
85
86void RunCommandImpl::
87_init()
88{
89 // Only uses accelerator timers if profiling is enabled.
90 // This is done to avoid calling accelerator events since we
91 // do not yet know their influence on performance. If it is
92 // negligible, we can enable it by default.
93
94 // TODO: we should possibly have a sequential instance and
95 // one associated with runner() to handle the case where ProfilingRegistry::hasProfiling()
96 // changes during execution.
97 if (m_use_accelerator && !ProfilingRegistry::hasProfiling())
98 m_use_sequential_timer_event = true;
99
100 m_start_event = _createEvent();
101 m_stop_event = _createEvent();
102
103 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_ALLOW_REUSE_COMMAND", true))
104 m_is_allow_reuse_command = (v.value() != 0);
105
106 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_NB_GRID_STRIDE", true)) {
107 Int32 n = v.value();
108 if (n <= 0)
109 n = 1;
110 m_default_nb_stride = n;
111 }
112}
113
114/*---------------------------------------------------------------------------*/
115/*---------------------------------------------------------------------------*/
116
117RunCommandImpl* RunCommandImpl::
118create(RunQueueImpl* r)
119{
120 RunCommandImpl* c = r->_internalCreateOrGetRunCommandImpl();
121 c->_reset();
122 return c;
123}
124
125/*---------------------------------------------------------------------------*/
126/*---------------------------------------------------------------------------*/
127
131void RunCommandImpl::
132notifyBeginLaunchKernel()
133{
136 ARCCORE_FATAL("Command has already been launched. You can not re-use the same command.\n"
137 " You can temporarily allow it if you set environment variable\n"
138 " ARCANE_ACCELERATOR_ALLOW_REUSE_COMMAND to 1\n");
139 }
140 IRunQueueStream* stream = internalStream();
141 stream->notifyBeginLaunchKernel(*this);
142 // TODO: use the correct stream in sequential mode
143 m_has_been_launched = true;
144 if (m_use_profiling) {
145 m_start_event->recordQueue(stream);
147 m_loop_one_exec_stat_ptr = &m_loop_one_exec_stat;
148 m_loop_one_exec_stat.setBeginTime(m_begin_time);
149 }
150}
151
152/*---------------------------------------------------------------------------*/
153/*---------------------------------------------------------------------------*/
154
160void RunCommandImpl::
161notifyEndLaunchKernel()
162{
163 IRunQueueStream* stream = internalStream();
164 // TODO: use the correct stream in sequential mode
165 if (m_use_profiling)
166 m_stop_event->recordQueue(stream);
167 stream->notifyEndLaunchKernel(*this);
168 m_queue->_addRunningCommand(this);
169}
170
171/*---------------------------------------------------------------------------*/
172/*---------------------------------------------------------------------------*/
173
181void RunCommandImpl::
182notifyLaunchKernelSyclEvent(void* sycl_event_ptr)
183{
184 IRunQueueStream* stream = internalStream();
185 stream->_setSyclLastCommandEvent(sycl_event_ptr);
186 // We must register the queue associated with the event again
187 // because when notifyBeginLaunchKernel() was called,
188 // the event associated with this queue was not yet available.
189 m_start_event->recordQueue(stream);
190}
191
192/*---------------------------------------------------------------------------*/
193/*---------------------------------------------------------------------------*/
194
202void RunCommandImpl::
203notifyEndExecuteKernel()
204{
205 // Do nothing if the command has not been launched.
207 return;
208
209 Int64 diff_time_ns = 0;
210 if (m_use_profiling) {
211 diff_time_ns = m_stop_event->elapsedTime(m_start_event);
212 runner()->addTime((double)diff_time_ns / 1.0e9);
213 }
214
215 ForLoopOneExecStat* exec_info = m_loop_one_exec_stat_ptr;
216 if (exec_info) {
217 exec_info->setEndTime(m_begin_time + diff_time_ns);
218 //std::cout << "END_EXEC exec_info=" << m_loop_run_info.traceInfo().traceInfo() << "\n";
219 ForLoopTraceInfo flti(traceInfo(), kernelName());
220 ProfilingRegistry::_threadLocalForLoopInstance()->merge(*exec_info, flti);
221 }
222}
223
224/*---------------------------------------------------------------------------*/
225/*---------------------------------------------------------------------------*/
226
227void RunCommandImpl::
228_reset()
229{
230 m_kernel_name = String();
231 m_trace_info = TraceInfo();
232 m_nb_thread_per_block = 0;
233 m_use_profiling = ProfilingRegistry::hasProfiling();
234 m_parallel_loop_options = ConcurrencyBase::defaultParallelLoopOptions();
235 m_begin_time = 0;
236 m_loop_one_exec_stat.reset();
237 m_loop_one_exec_stat_ptr = nullptr;
238 m_has_been_launched = false;
239 m_has_living_run_command = false;
240 m_may_be_put_in_pool = false;
241 m_shared_memory_size = 0;
242 m_nb_stride = m_default_nb_stride;
243}
244
245/*---------------------------------------------------------------------------*/
246/*---------------------------------------------------------------------------*/
247
248IReduceMemoryImpl* RunCommandImpl::
249getOrCreateReduceMemoryImpl()
250{
251 ReduceMemoryImpl* p = _getOrCreateReduceMemoryImpl();
252 if (p) {
253 m_active_reduce_memory_list.insert(p);
254 }
255 return p;
256}
257
258/*---------------------------------------------------------------------------*/
259/*---------------------------------------------------------------------------*/
260
261void RunCommandImpl::
262releaseReduceMemoryImpl(ReduceMemoryImpl* p)
263{
264 auto x = m_active_reduce_memory_list.find(p);
265 if (x == m_active_reduce_memory_list.end())
266 ARCCORE_FATAL("ReduceMemoryImpl in not in active list");
267 m_active_reduce_memory_list.erase(x);
268 m_reduce_memory_pool.push(p);
269}
270
271/*---------------------------------------------------------------------------*/
272/*---------------------------------------------------------------------------*/
273
274IRunQueueStream* RunCommandImpl::
275internalStream() const
276{
277 return m_queue->_internalStream();
278}
279
280/*---------------------------------------------------------------------------*/
281/*---------------------------------------------------------------------------*/
282
283RunnerImpl* RunCommandImpl::
284runner() const
285{
286 return m_queue->runner();
287}
288
289/*---------------------------------------------------------------------------*/
290/*---------------------------------------------------------------------------*/
291
292ReduceMemoryImpl* RunCommandImpl::
293_getOrCreateReduceMemoryImpl()
294{
295 // No need to allocate specific memory if we are not
296 // on an accelerator
297 if (!m_use_accelerator)
298 return nullptr;
299
300 auto& pool = m_reduce_memory_pool;
301
302 if (!pool.empty()) {
303 ReduceMemoryImpl* p = pool.top();
304 pool.pop();
305 return p;
306 }
307 return new ReduceMemoryImpl(this);
308}
309
310/*---------------------------------------------------------------------------*/
311/*---------------------------------------------------------------------------*/
312
316void RunCommandImpl::
317_notifyDestroyRunCommand()
318{
319 // If the command has not been launched, it must be put back into the
320 // queue's command pool (otherwise there will be a memory leak)
322 m_queue->_putInCommandPool(this);
323}
324
325/*---------------------------------------------------------------------------*/
326/*---------------------------------------------------------------------------*/
327
328} // namespace Arcane::Accelerator::Impl
329
330/*---------------------------------------------------------------------------*/
331/*---------------------------------------------------------------------------*/
#define ARCCORE_FATAL(...)
Macro throwing a FatalErrorException.
Interface of an execution stream for a RunQueue.
virtual void notifyEndLaunchKernel(RunCommandImpl &command)=0
Notification of command launch completion.
virtual void _setSyclLastCommandEvent(void *sycl_event_ptr)
For SYCL, positions the event associated with the last executed command.
virtual void notifyBeginLaunchKernel(RunCommandImpl &command)=0
Notification before command launch.
bool m_may_be_put_in_pool
Indicates if the command can be returned to the pool associated with the RunQueue.
bool m_is_allow_reuse_command
Indicates if we allow the same command to be used multiple times.
static const ParallelLoopOptions & defaultParallelLoopOptions()
Default execution values for a parallel loop.
Class to manage the profiling of a single loop execution.
void setEndTime(Int64 v)
Sets the loop end time in nanoseconds.
static Impl::ForLoopStatInfoList * _threadLocalForLoopInstance()
Definition Profiling.cc:273
static bool hasProfiling()
Indicates if profiling is active.
bool isAcceleratorPolicy(eExecutionPolicy exec_policy)
Indicates if exec_policy corresponds to an accelerator.
Int64 getRealTimeNS()
Clock time in nanoseconds.
std::int64_t Int64
Signed integer type of 64 bits.