Arcane  v4.1.3.0
Documentation utilisateur
Chargement...
Recherche...
Aucune correspondance
RunCommandImpl.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* RunCommandImpl.cc (C) 2000-2026 */
9/* */
10/* Implémentation de la gestion d'une commande sur accélérateur. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arccore/common/accelerator/internal/RunCommandImpl.h"
15#include "arccore/common/accelerator/internal/AcceleratorCoreGlobalInternal.h"
16
17#include "arccore/base/FatalErrorException.h"
18#include "arccore/base/ForLoopTraceInfo.h"
19#include "arccore/base/PlatformUtils.h"
20#include "arccore/base/Convert.h"
21#include "arccore/base/ConcurrencyBase.h"
22
23#include "arccore/common/accelerator/Runner.h"
24#include "arccore/common/accelerator/internal/IRunQueueEventImpl.h"
25#include "arccore/common/accelerator/internal/IRunQueueStream.h"
26#include "arccore/common/accelerator/internal/IRunnerRuntime.h"
27#include "arccore/common/accelerator/internal/RunQueueImpl.h"
28#include "arccore/common/accelerator/internal/ReduceMemoryImpl.h"
29#include "arccore/common/accelerator/internal/RunnerImpl.h"
30
31/*---------------------------------------------------------------------------*/
32/*---------------------------------------------------------------------------*/
33
34namespace Arcane::Accelerator::Impl
35{
36
37/*---------------------------------------------------------------------------*/
38/*---------------------------------------------------------------------------*/
39
40RunCommandImpl::
41RunCommandImpl(RunQueueImpl* queue)
42: m_queue(queue)
43, m_execution_policy(queue->runner()->executionPolicy())
44, m_use_accelerator(isAcceleratorPolicy(m_execution_policy))
45{
46 _init();
47}
48
49/*---------------------------------------------------------------------------*/
50/*---------------------------------------------------------------------------*/
51
52RunCommandImpl::
53~RunCommandImpl()
54{
55 _freePools();
56 delete m_start_event;
57 delete m_stop_event;
58}
59
60/*---------------------------------------------------------------------------*/
61/*---------------------------------------------------------------------------*/
62
63void RunCommandImpl::
64_freePools()
65{
66 while (!m_reduce_memory_pool.empty()) {
67 delete m_reduce_memory_pool.top();
68 m_reduce_memory_pool.pop();
69 }
70}
71
72/*---------------------------------------------------------------------------*/
73/*---------------------------------------------------------------------------*/
74
75IRunQueueEventImpl* RunCommandImpl::
76_createEvent()
77{
78 if (m_use_sequential_timer_event)
79 return getSequentialRunQueueRuntime()->createEventImplWithTimer();
80 return runner()->_createEventWithTimer();
81}
82
83/*---------------------------------------------------------------------------*/
84/*---------------------------------------------------------------------------*/
85
86void RunCommandImpl::
87_init()
88{
89 // N'utilise les timers accélérateur que si le profiling est activé.
90 // On fait cela pour éviter d'appeler les évènements accélérateurs car on
91 // ne connait pas encore leur influence sur les performances. Si elle est
92 // négligeable alors on pourra l'activer par défaut.
93
94 // TODO: il faudrait éventuellement avoir une instance séquentielle et
95 // une associée à runner() pour gérer le cas ou ProfilingRegistry::hasProfiling()
96 // change en cours d'exécution.
97 if (m_use_accelerator && !ProfilingRegistry::hasProfiling())
98 m_use_sequential_timer_event = true;
99
100 m_start_event = _createEvent();
101 m_stop_event = _createEvent();
102
103 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_ALLOW_REUSE_COMMAND", true))
104 m_is_allow_reuse_command = (v.value() != 0);
105}
106
107/*---------------------------------------------------------------------------*/
108/*---------------------------------------------------------------------------*/
109
110RunCommandImpl* RunCommandImpl::
111create(RunQueueImpl* r)
112{
113 RunCommandImpl* c = r->_internalCreateOrGetRunCommandImpl();
114 c->_reset();
115 return c;
116}
117
118/*---------------------------------------------------------------------------*/
119/*---------------------------------------------------------------------------*/
120/*!
121 * \brief Notification du début d'exécution de la commande.
122 */
123void RunCommandImpl::
124notifyBeginLaunchKernel()
125{
126 if (m_has_been_launched) {
127 if (!m_is_allow_reuse_command)
128 ARCCORE_FATAL("Command has already been launched. You can not re-use the same command.\n"
129 " You can temporarily allow it if you set environment variable\n"
130 " ARCANE_ACCELERATOR_ALLOW_REUSE_COMMAND to 1\n");
131 }
132 IRunQueueStream* stream = internalStream();
133 stream->notifyBeginLaunchKernel(*this);
134 // TODO: utiliser la bonne stream en séquentiel
135 m_has_been_launched = true;
136 if (m_use_profiling) {
137 m_start_event->recordQueue(stream);
138 m_begin_time = platform::getRealTimeNS();
139 m_loop_one_exec_stat_ptr = &m_loop_one_exec_stat;
140 m_loop_one_exec_stat.setBeginTime(m_begin_time);
141 }
142}
143
144/*---------------------------------------------------------------------------*/
145/*---------------------------------------------------------------------------*/
146/*!
147 * \brief Notification de la fin de lancement de la commande.
148 *
149 * La commande continue à s'exécuter en tâche de fond.
150 */
151void RunCommandImpl::
152notifyEndLaunchKernel()
153{
154 IRunQueueStream* stream = internalStream();
155 // TODO: utiliser la bonne stream en séquentiel
156 if (m_use_profiling)
157 m_stop_event->recordQueue(stream);
158 stream->notifyEndLaunchKernel(*this);
159 m_queue->_addRunningCommand(this);
160}
161
162/*---------------------------------------------------------------------------*/
163/*---------------------------------------------------------------------------*/
164/*!
165 * \brief Notification du lancement d'un kernel SYCL.
166 *
167 * \a sycl_event_ptr est de type sycl::event* et contient
168 * l'évènement associé à la commande qui est retourné lors
169 * des appels à sycl::queue::submit().
170 */
171void RunCommandImpl::
172notifyLaunchKernelSyclEvent(void* sycl_event_ptr)
173{
174 IRunQueueStream* stream = internalStream();
175 stream->_setSyclLastCommandEvent(sycl_event_ptr);
176 // Il faut enregistrer à nouveau la file associée à l'évènement
177 // car lors de l'appel à notifyBeginLaunchKernel() il n'y avait pas
178 // encore l'évènement associé à cette file.
179 m_start_event->recordQueue(stream);
180}
181
182/*---------------------------------------------------------------------------*/
183/*---------------------------------------------------------------------------*/
184/*!
185 * \brief Notification de la fin d'exécution du noyau.
186 *
187 * Après cet appel, on est sur que la commande a fini de s'exécuter et on
188 * peut la recycler. En asynchrone, cette méthode est appelée lors de la
189 * synchronisation d'une file.
190 */
191void RunCommandImpl::
192notifyEndExecuteKernel()
193{
194 // Ne fait rien si la commande n'a pas été lancée.
195 if (!m_has_been_launched)
196 return;
197
198 Int64 diff_time_ns = 0;
199 if (m_use_profiling){
200 diff_time_ns = m_stop_event->elapsedTime(m_start_event);
201 runner()->addTime((double)diff_time_ns / 1.0e9);
202 }
203
204 ForLoopOneExecStat* exec_info = m_loop_one_exec_stat_ptr;
205 if (exec_info) {
206 exec_info->setEndTime(m_begin_time + diff_time_ns);
207 //std::cout << "END_EXEC exec_info=" << m_loop_run_info.traceInfo().traceInfo() << "\n";
208 ForLoopTraceInfo flti(traceInfo(), kernelName());
209 ProfilingRegistry::_threadLocalForLoopInstance()->merge(*exec_info, flti);
210 }
211}
212
213/*---------------------------------------------------------------------------*/
214/*---------------------------------------------------------------------------*/
215
216void RunCommandImpl::
217_reset()
218{
219 m_kernel_name = String();
220 m_trace_info = TraceInfo();
221 m_nb_thread_per_block = 0;
222 m_use_profiling = ProfilingRegistry::hasProfiling();
223 m_parallel_loop_options = ConcurrencyBase::defaultParallelLoopOptions();
224 m_begin_time = 0;
225 m_loop_one_exec_stat.reset();
226 m_loop_one_exec_stat_ptr = nullptr;
227 m_has_been_launched = false;
228 m_has_living_run_command = false;
229 m_may_be_put_in_pool = false;
230 m_shared_memory_size = 0;
231 m_nb_stride = 1;
232}
233
234/*---------------------------------------------------------------------------*/
235/*---------------------------------------------------------------------------*/
236
237IReduceMemoryImpl* RunCommandImpl::
238getOrCreateReduceMemoryImpl()
239{
240 ReduceMemoryImpl* p = _getOrCreateReduceMemoryImpl();
241 if (p) {
242 m_active_reduce_memory_list.insert(p);
243 }
244 return p;
245}
246
247/*---------------------------------------------------------------------------*/
248/*---------------------------------------------------------------------------*/
249
250void RunCommandImpl::
251releaseReduceMemoryImpl(ReduceMemoryImpl* p)
252{
253 auto x = m_active_reduce_memory_list.find(p);
254 if (x == m_active_reduce_memory_list.end())
255 ARCCORE_FATAL("ReduceMemoryImpl in not in active list");
256 m_active_reduce_memory_list.erase(x);
257 m_reduce_memory_pool.push(p);
258}
259
260/*---------------------------------------------------------------------------*/
261/*---------------------------------------------------------------------------*/
262
263IRunQueueStream* RunCommandImpl::
264internalStream() const
265{
266 return m_queue->_internalStream();
267}
268
269/*---------------------------------------------------------------------------*/
270/*---------------------------------------------------------------------------*/
271
272RunnerImpl* RunCommandImpl::
273runner() const
274{
275 return m_queue->runner();
276}
277
278/*---------------------------------------------------------------------------*/
279/*---------------------------------------------------------------------------*/
280
281ReduceMemoryImpl* RunCommandImpl::
282_getOrCreateReduceMemoryImpl()
283{
284 // Pas besoin d'allouer de la mémoire spécifique si on n'est pas
285 // sur un accélérateur
286 if (!m_use_accelerator)
287 return nullptr;
288
289 auto& pool = m_reduce_memory_pool;
290
291 if (!pool.empty()) {
292 ReduceMemoryImpl* p = pool.top();
293 pool.pop();
294 return p;
295 }
296 return new ReduceMemoryImpl(this);
297}
298
299/*---------------------------------------------------------------------------*/
300/*---------------------------------------------------------------------------*/
301/*!
302 * \brief Méthode appelée quand l'instance RunCommand associée est détruite.
303 */
304void RunCommandImpl::
305_notifyDestroyRunCommand()
306{
307 // Si la commande n'a pas été lancé, il faut la remettre dans le pool
308 // des commandes de la file (sinon on aura une fuite mémoire)
309 if (!m_has_been_launched || m_may_be_put_in_pool)
310 m_queue->_putInCommandPool(this);
311}
312
313/*---------------------------------------------------------------------------*/
314/*---------------------------------------------------------------------------*/
315
316} // namespace Arcane::Accelerator::Impl
317
318/*---------------------------------------------------------------------------*/
319/*---------------------------------------------------------------------------*/
#define ARCCORE_FATAL(...)
Macro envoyant une exception FatalErrorException.