Arcane  v4.1.5.0
Documentation utilisateur
Chargement...
Recherche...
Aucune correspondance
RunCommandImpl.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* RunCommandImpl.cc (C) 2000-2026 */
9/* */
10/* Implémentation de la gestion d'une commande sur accélérateur. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arccore/common/accelerator/internal/RunCommandImpl.h"
15#include "arccore/common/accelerator/internal/AcceleratorCoreGlobalInternal.h"
16
17#include "arccore/base/FatalErrorException.h"
18#include "arccore/base/ForLoopTraceInfo.h"
19#include "arccore/base/PlatformUtils.h"
20#include "arccore/base/Convert.h"
21#include "arccore/base/ConcurrencyBase.h"
22
23#include "arccore/common/accelerator/Runner.h"
24#include "arccore/common/accelerator/internal/IRunQueueEventImpl.h"
25#include "arccore/common/accelerator/internal/IRunQueueStream.h"
26#include "arccore/common/accelerator/internal/IRunnerRuntime.h"
27#include "arccore/common/accelerator/internal/RunQueueImpl.h"
28#include "arccore/common/accelerator/internal/ReduceMemoryImpl.h"
29#include "arccore/common/accelerator/internal/RunnerImpl.h"
30
31/*---------------------------------------------------------------------------*/
32/*---------------------------------------------------------------------------*/
33
34namespace Arcane::Accelerator::Impl
35{
36
37/*---------------------------------------------------------------------------*/
38/*---------------------------------------------------------------------------*/
39
40RunCommandImpl::
41RunCommandImpl(RunQueueImpl* queue)
42: m_queue(queue)
43, m_execution_policy(queue->runner()->executionPolicy())
44, m_use_accelerator(isAcceleratorPolicy(m_execution_policy))
45{
46 _init();
47}
48
49/*---------------------------------------------------------------------------*/
50/*---------------------------------------------------------------------------*/
51
52RunCommandImpl::
53~RunCommandImpl()
54{
55 _freePools();
56 delete m_start_event;
57 delete m_stop_event;
58}
59
60/*---------------------------------------------------------------------------*/
61/*---------------------------------------------------------------------------*/
62
63void RunCommandImpl::
64_freePools()
65{
66 while (!m_reduce_memory_pool.empty()) {
67 delete m_reduce_memory_pool.top();
68 m_reduce_memory_pool.pop();
69 }
70}
71
72/*---------------------------------------------------------------------------*/
73/*---------------------------------------------------------------------------*/
74
75IRunQueueEventImpl* RunCommandImpl::
76_createEvent()
77{
78 if (m_use_sequential_timer_event)
79 return getSequentialRunQueueRuntime()->createEventImplWithTimer();
80 return runner()->_createEventWithTimer();
81}
82
83/*---------------------------------------------------------------------------*/
84/*---------------------------------------------------------------------------*/
85
86void RunCommandImpl::
87_init()
88{
89 // N'utilise les timers accélérateur que si le profiling est activé.
90 // On fait cela pour éviter d'appeler les évènements accélérateurs car on
91 // ne connait pas encore leur influence sur les performances. Si elle est
92 // négligeable alors on pourra l'activer par défaut.
93
94 // TODO: il faudrait éventuellement avoir une instance séquentielle et
95 // une associée à runner() pour gérer le cas ou ProfilingRegistry::hasProfiling()
96 // change en cours d'exécution.
97 if (m_use_accelerator && !ProfilingRegistry::hasProfiling())
98 m_use_sequential_timer_event = true;
99
100 m_start_event = _createEvent();
101 m_stop_event = _createEvent();
102
103 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_ALLOW_REUSE_COMMAND", true))
104 m_is_allow_reuse_command = (v.value() != 0);
105
106 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_NB_GRID_STRIDE", true)) {
107 Int32 n = v.value();
108 if (n <= 0)
109 n = 1;
110 m_default_nb_stride = n;
111 }
112
113}
114
115/*---------------------------------------------------------------------------*/
116/*---------------------------------------------------------------------------*/
117
118RunCommandImpl* RunCommandImpl::
119create(RunQueueImpl* r)
120{
121 RunCommandImpl* c = r->_internalCreateOrGetRunCommandImpl();
122 c->_reset();
123 return c;
124}
125
126/*---------------------------------------------------------------------------*/
127/*---------------------------------------------------------------------------*/
128/*!
129 * \brief Notification du début d'exécution de la commande.
130 */
131void RunCommandImpl::
132notifyBeginLaunchKernel()
133{
134 if (m_has_been_launched) {
135 if (!m_is_allow_reuse_command)
136 ARCCORE_FATAL("Command has already been launched. You can not re-use the same command.\n"
137 " You can temporarily allow it if you set environment variable\n"
138 " ARCANE_ACCELERATOR_ALLOW_REUSE_COMMAND to 1\n");
139 }
140 IRunQueueStream* stream = internalStream();
141 stream->notifyBeginLaunchKernel(*this);
142 // TODO: utiliser la bonne stream en séquentiel
143 m_has_been_launched = true;
144 if (m_use_profiling) {
145 m_start_event->recordQueue(stream);
146 m_begin_time = platform::getRealTimeNS();
147 m_loop_one_exec_stat_ptr = &m_loop_one_exec_stat;
148 m_loop_one_exec_stat.setBeginTime(m_begin_time);
149 }
150}
151
152/*---------------------------------------------------------------------------*/
153/*---------------------------------------------------------------------------*/
154/*!
155 * \brief Notification de la fin de lancement de la commande.
156 *
157 * La commande continue à s'exécuter en tâche de fond.
158 */
159void RunCommandImpl::
160notifyEndLaunchKernel()
161{
162 IRunQueueStream* stream = internalStream();
163 // TODO: utiliser la bonne stream en séquentiel
164 if (m_use_profiling)
165 m_stop_event->recordQueue(stream);
166 stream->notifyEndLaunchKernel(*this);
167 m_queue->_addRunningCommand(this);
168}
169
170/*---------------------------------------------------------------------------*/
171/*---------------------------------------------------------------------------*/
172/*!
173 * \brief Notification du lancement d'un kernel SYCL.
174 *
175 * \a sycl_event_ptr est de type sycl::event* et contient
176 * l'évènement associé à la commande qui est retourné lors
177 * des appels à sycl::queue::submit().
178 */
179void RunCommandImpl::
180notifyLaunchKernelSyclEvent(void* sycl_event_ptr)
181{
182 IRunQueueStream* stream = internalStream();
183 stream->_setSyclLastCommandEvent(sycl_event_ptr);
184 // Il faut enregistrer à nouveau la file associée à l'évènement
185 // car lors de l'appel à notifyBeginLaunchKernel() il n'y avait pas
186 // encore l'évènement associé à cette file.
187 m_start_event->recordQueue(stream);
188}
189
190/*---------------------------------------------------------------------------*/
191/*---------------------------------------------------------------------------*/
192/*!
193 * \brief Notification de la fin d'exécution du noyau.
194 *
195 * Après cet appel, on est sur que la commande a fini de s'exécuter et on
196 * peut la recycler. En asynchrone, cette méthode est appelée lors de la
197 * synchronisation d'une file.
198 */
199void RunCommandImpl::
200notifyEndExecuteKernel()
201{
202 // Ne fait rien si la commande n'a pas été lancée.
203 if (!m_has_been_launched)
204 return;
205
206 Int64 diff_time_ns = 0;
207 if (m_use_profiling){
208 diff_time_ns = m_stop_event->elapsedTime(m_start_event);
209 runner()->addTime((double)diff_time_ns / 1.0e9);
210 }
211
212 ForLoopOneExecStat* exec_info = m_loop_one_exec_stat_ptr;
213 if (exec_info) {
214 exec_info->setEndTime(m_begin_time + diff_time_ns);
215 //std::cout << "END_EXEC exec_info=" << m_loop_run_info.traceInfo().traceInfo() << "\n";
216 ForLoopTraceInfo flti(traceInfo(), kernelName());
217 ProfilingRegistry::_threadLocalForLoopInstance()->merge(*exec_info, flti);
218 }
219}
220
221/*---------------------------------------------------------------------------*/
222/*---------------------------------------------------------------------------*/
223
224void RunCommandImpl::
225_reset()
226{
227 m_kernel_name = String();
228 m_trace_info = TraceInfo();
229 m_nb_thread_per_block = 0;
230 m_use_profiling = ProfilingRegistry::hasProfiling();
231 m_parallel_loop_options = ConcurrencyBase::defaultParallelLoopOptions();
232 m_begin_time = 0;
233 m_loop_one_exec_stat.reset();
234 m_loop_one_exec_stat_ptr = nullptr;
235 m_has_been_launched = false;
236 m_has_living_run_command = false;
237 m_may_be_put_in_pool = false;
238 m_shared_memory_size = 0;
239 m_nb_stride = m_default_nb_stride;
240}
241
242/*---------------------------------------------------------------------------*/
243/*---------------------------------------------------------------------------*/
244
245IReduceMemoryImpl* RunCommandImpl::
246getOrCreateReduceMemoryImpl()
247{
248 ReduceMemoryImpl* p = _getOrCreateReduceMemoryImpl();
249 if (p) {
250 m_active_reduce_memory_list.insert(p);
251 }
252 return p;
253}
254
255/*---------------------------------------------------------------------------*/
256/*---------------------------------------------------------------------------*/
257
258void RunCommandImpl::
259releaseReduceMemoryImpl(ReduceMemoryImpl* p)
260{
261 auto x = m_active_reduce_memory_list.find(p);
262 if (x == m_active_reduce_memory_list.end())
263 ARCCORE_FATAL("ReduceMemoryImpl in not in active list");
264 m_active_reduce_memory_list.erase(x);
265 m_reduce_memory_pool.push(p);
266}
267
268/*---------------------------------------------------------------------------*/
269/*---------------------------------------------------------------------------*/
270
271IRunQueueStream* RunCommandImpl::
272internalStream() const
273{
274 return m_queue->_internalStream();
275}
276
277/*---------------------------------------------------------------------------*/
278/*---------------------------------------------------------------------------*/
279
280RunnerImpl* RunCommandImpl::
281runner() const
282{
283 return m_queue->runner();
284}
285
286/*---------------------------------------------------------------------------*/
287/*---------------------------------------------------------------------------*/
288
289ReduceMemoryImpl* RunCommandImpl::
290_getOrCreateReduceMemoryImpl()
291{
292 // Pas besoin d'allouer de la mémoire spécifique si on n'est pas
293 // sur un accélérateur
294 if (!m_use_accelerator)
295 return nullptr;
296
297 auto& pool = m_reduce_memory_pool;
298
299 if (!pool.empty()) {
300 ReduceMemoryImpl* p = pool.top();
301 pool.pop();
302 return p;
303 }
304 return new ReduceMemoryImpl(this);
305}
306
307/*---------------------------------------------------------------------------*/
308/*---------------------------------------------------------------------------*/
309/*!
310 * \brief Méthode appelée quand l'instance RunCommand associée est détruite.
311 */
312void RunCommandImpl::
313_notifyDestroyRunCommand()
314{
315 // Si la commande n'a pas été lancé, il faut la remettre dans le pool
316 // des commandes de la file (sinon on aura une fuite mémoire)
317 if (!m_has_been_launched || m_may_be_put_in_pool)
318 m_queue->_putInCommandPool(this);
319}
320
321/*---------------------------------------------------------------------------*/
322/*---------------------------------------------------------------------------*/
323
324} // namespace Arcane::Accelerator::Impl
325
326/*---------------------------------------------------------------------------*/
327/*---------------------------------------------------------------------------*/
#define ARCCORE_FATAL(...)
Macro envoyant une exception FatalErrorException.
bool isAcceleratorPolicy(eExecutionPolicy exec_policy)
Indique si exec_policy correspond à un accélérateur.