Arcane  v4.1.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
RunCommandLaunchInfo.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* RunCommandLaunchInfo.cc (C) 2000-2025 */
9/* */
10/* Informations pour l'exécution d'une 'RunCommand'. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/accelerator/core/RunCommandLaunchInfo.h"
15
16#include "arccore/base/FatalErrorException.h"
17#include "arccore/base/CheckedConvert.h"
18#include "arccore/base/ConcurrencyBase.h"
19
20#include "arcane/accelerator/core/RunCommand.h"
21#include "arcane/accelerator/core/internal/RunQueueImpl.h"
22#include "arcane/accelerator/core/NativeStream.h"
23#include "arcane/accelerator/core/internal/IRunnerRuntime.h"
24
25/*---------------------------------------------------------------------------*/
26/*---------------------------------------------------------------------------*/
27
28namespace Arcane::Accelerator::impl
29{
30
31/*---------------------------------------------------------------------------*/
32/*---------------------------------------------------------------------------*/
33
34RunCommandLaunchInfo::
35RunCommandLaunchInfo(RunCommand& command, Int64 total_loop_size)
36: m_command(command)
37, m_total_loop_size(total_loop_size)
38{
39 m_queue_impl = m_command._internalQueueImpl();
40 m_exec_policy = m_queue_impl->executionPolicy();
41
42 // Le calcul des informations de kernel n'est utile que sur accélérateur
43 if (isAcceleratorPolicy(m_exec_policy)) {
44 m_kernel_launch_args = _computeKernelLaunchArgs();
45 m_command._allocateReduceMemory(m_kernel_launch_args.nbBlockPerGrid());
46 }
47}
48
49/*---------------------------------------------------------------------------*/
50/*---------------------------------------------------------------------------*/
51
52RunCommandLaunchInfo::
53~RunCommandLaunchInfo()
54{
55 // Notifie de la fin de lancement du noyau. Normalement, cela est déjà fait
56 // sauf s'il y a eu une exception pendant le lancement du noyau de calcul.
57 _doEndKernelLaunch();
58}
59
60/*---------------------------------------------------------------------------*/
61/*---------------------------------------------------------------------------*/
62
63void RunCommandLaunchInfo::
64beginExecute()
65{
66 if (m_has_exec_begun)
67 ARCANE_FATAL("beginExecute() has already been called");
68 m_has_exec_begun = true;
69 m_command._internalNotifyBeginLaunchKernel();
70 if (m_exec_policy == eExecutionPolicy::Thread)
72}
73
74/*---------------------------------------------------------------------------*/
75/*---------------------------------------------------------------------------*/
82void RunCommandLaunchInfo::
83endExecute()
84{
85 if (!m_has_exec_begun)
86 ARCANE_FATAL("beginExecute() has to be called before endExecute()");
87 _doEndKernelLaunch();
88}
89
90/*---------------------------------------------------------------------------*/
91/*---------------------------------------------------------------------------*/
92
93void RunCommandLaunchInfo::
94_doEndKernelLaunch()
95{
96 if (m_is_notify_end_kernel_done)
97 return;
98 m_is_notify_end_kernel_done = true;
99 m_command._internalNotifyEndLaunchKernel();
100
101 impl::RunQueueImpl* q = m_queue_impl;
102 if (!q->isAsync())
103 q->_internalBarrier();
104}
105
106/*---------------------------------------------------------------------------*/
107/*---------------------------------------------------------------------------*/
108
109NativeStream RunCommandLaunchInfo::
110_internalNativeStream()
111{
112 return m_command._internalNativeStream();
113}
114
115/*---------------------------------------------------------------------------*/
116/*---------------------------------------------------------------------------*/
120KernelLaunchArgs RunCommandLaunchInfo::
121_computeKernelLaunchArgs() const
122{
123 int threads_per_block = m_command.nbThreadPerBlock();
124 if (threads_per_block<=0)
125 threads_per_block = 256;
126 Int64 big_b = (m_total_loop_size + threads_per_block - 1) / threads_per_block;
127 int blocks_per_grid = CheckedConvert::toInt32(big_b);
128 return { blocks_per_grid, threads_per_block };
129}
130
131/*---------------------------------------------------------------------------*/
132/*---------------------------------------------------------------------------*/
133
134ParallelLoopOptions RunCommandLaunchInfo::
135computeParallelLoopOptions() const
136{
137 ParallelLoopOptions opt = m_command.parallelLoopOptions();
138 const bool use_dynamic_compute = true;
139 // Calcule une taille de grain par défaut si cela n'est pas renseigné dans
140 // les options. Par défaut on fait en sorte de faire un nombre d'itérations
141 // égale à 2 fois le nombre de threads utilisés.
142 if (use_dynamic_compute && opt.grainSize() == 0) {
143 Int32 nb_thread = opt.maxThread();
144 if (nb_thread <= 0)
146 if (nb_thread <= 0)
147 nb_thread = 1;
148 Int32 grain_size = static_cast<Int32>((double)m_total_loop_size / (nb_thread * 2.0));
149 opt.setGrainSize(grain_size);
150 }
151 return opt;
152}
153
154/*---------------------------------------------------------------------------*/
155/*---------------------------------------------------------------------------*/
161void RunCommandLaunchInfo::
162_computeLoopRunInfo()
163{
164 ForLoopTraceInfo lti(m_command.traceInfo(), m_command.kernelName());
165 m_loop_run_info = ForLoopRunInfo(computeParallelLoopOptions(), lti);
166 m_loop_run_info.setExecStat(m_command._internalCommandExecStat());
167}
168
169/*---------------------------------------------------------------------------*/
170/*---------------------------------------------------------------------------*/
180KernelLaunchArgs RunCommandLaunchInfo::
181_threadBlockInfo(const void* func, Int32 shared_memory_size) const
182{
183 return m_queue_impl->_internalRuntime()->computeKernalLaunchArgs(m_kernel_launch_args, func, totalLoopSize(), shared_memory_size);
184}
185
186/*---------------------------------------------------------------------------*/
187/*---------------------------------------------------------------------------*/
188
189void RunCommandLaunchInfo::
190_addSyclEvent(void* sycl_event_ptr)
191{
192 m_command._internalNotifyBeginLaunchKernelSyclEvent(sycl_event_ptr);
193}
194
195/*---------------------------------------------------------------------------*/
196/*---------------------------------------------------------------------------*/
197
198} // End namespace Arcane::Accelerator
199
200/*---------------------------------------------------------------------------*/
201/*---------------------------------------------------------------------------*/
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Arguments pour lancer un kernel.
Int64 totalLoopSize() const
Taille totale de la boucle.
ParallelLoopOptions computeParallelLoopOptions() const
Calcule et retourne les informations pour les boucles multi-thread.
void _computeLoopRunInfo()
Calcule la valeur de m_loop_run_info.
File d'exécution pour accélérateur.
void _internalBarrier()
Bloque jusqu'à ce que toutes les commandes soient terminées.
static Int32 maxAllowedThread()
Nombre maximum de threads autorisés pour le multi-threading.
Informations d'exécution d'une boucle.
Informations de trace pour une boucle 'for'.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
@ Thread
Politique d'exécution multi-thread.
bool isAcceleratorPolicy(eExecutionPolicy exec_policy)
Indique si exec_policy correspond à un accélérateur.
std::int64_t Int64
Type entier signé sur 64 bits.
std::int32_t Int32
Type entier signé sur 32 bits.