Arcane  v3.15.0.0
Documentation utilisateur
Chargement...
Recherche...
Aucune correspondance
RunCommandLaunchInfo.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* RunCommandLaunchInfo.cc (C) 2000-2024 */
9/* */
10/* Informations pour l'exécution d'une 'RunCommand'. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/accelerator/core/RunCommandLaunchInfo.h"
15
16#include "arcane/utils/CheckedConvert.h"
17#include "arcane/utils/PlatformUtils.h"
18
19#include "arcane/accelerator/core/RunQueue.h"
20#include "arcane/accelerator/core/internal/IRunQueueStream.h"
21#include "arcane/accelerator/core/internal/RunQueueImpl.h"
22#include "arcane/accelerator/core/NativeStream.h"
23
24/*---------------------------------------------------------------------------*/
25/*---------------------------------------------------------------------------*/
26
27namespace Arcane::Accelerator::impl
28{
29
30/*---------------------------------------------------------------------------*/
31/*---------------------------------------------------------------------------*/
32
33RunCommandLaunchInfo::
34RunCommandLaunchInfo(RunCommand& command, Int64 total_loop_size)
35: m_command(command)
36, m_total_loop_size(total_loop_size)
37{
38 m_queue_impl = m_command._internalQueueImpl();
39 m_exec_policy = m_queue_impl->executionPolicy();
40
41 // Le calcul des informations de kernel n'est utile que sur accélérateur
42 if (isAcceleratorPolicy(m_exec_policy)) {
43 m_kernel_launch_args = _computeKernelLaunchArgs();
44 m_command._allocateReduceMemory(m_kernel_launch_args.nbBlockPerGrid());
45 }
46}
47
48/*---------------------------------------------------------------------------*/
49/*---------------------------------------------------------------------------*/
50
51RunCommandLaunchInfo::
52~RunCommandLaunchInfo()
53{
54 // Notifie de la fin de lancement du noyau. Normalement, cela est déjà fait
55 // sauf s'il y a eu une exception pendant le lancement du noyau de calcul.
56 _doEndKernelLaunch();
57}
58
59/*---------------------------------------------------------------------------*/
60/*---------------------------------------------------------------------------*/
61
62void RunCommandLaunchInfo::
63beginExecute()
64{
65 if (m_has_exec_begun)
66 ARCANE_FATAL("beginExecute() has already been called");
67 m_has_exec_begun = true;
68 m_command._internalNotifyBeginLaunchKernel();
69 if (m_exec_policy == eExecutionPolicy::Thread)
70 _computeLoopRunInfo();
71}
72
73/*---------------------------------------------------------------------------*/
74/*---------------------------------------------------------------------------*/
75/*!
76 * \brief Notifie de la fin de lancement de la commande.
77 *
78 * A noter que si la commande est asynchrone, son exécution peut continuer
79 * après l'appel à cette méthode.
80 */
81void RunCommandLaunchInfo::
82endExecute()
83{
84 if (!m_has_exec_begun)
85 ARCANE_FATAL("beginExecute() has to be called before endExecute()");
86 _doEndKernelLaunch();
87}
88
89/*---------------------------------------------------------------------------*/
90/*---------------------------------------------------------------------------*/
91
92void RunCommandLaunchInfo::
93_doEndKernelLaunch()
94{
95 if (m_is_notify_end_kernel_done)
96 return;
97 m_is_notify_end_kernel_done = true;
98 m_command._internalNotifyEndLaunchKernel();
99
100 impl::RunQueueImpl* q = m_queue_impl;
101 if (!q->isAsync())
102 q->_internalBarrier();
103}
104
105/*---------------------------------------------------------------------------*/
106/*---------------------------------------------------------------------------*/
107
108NativeStream RunCommandLaunchInfo::
109_internalNativeStream()
110{
111 return m_command._internalNativeStream();
112}
113
114/*---------------------------------------------------------------------------*/
115/*---------------------------------------------------------------------------*/
116/*!
117 * \brief Calcule le nombre de block/thread/grille du noyau en fonction de \a full_size.
118 */
119KernelLaunchArgs RunCommandLaunchInfo::
120_computeKernelLaunchArgs() const
121{
122 int threads_per_block = m_command.nbThreadPerBlock();
123 if (threads_per_block<=0)
124 threads_per_block = 256;
125 Int64 big_b = (m_total_loop_size + threads_per_block - 1) / threads_per_block;
126 int blocks_per_grid = CheckedConvert::toInt32(big_b);
127 return { blocks_per_grid, threads_per_block };
128}
129
130/*---------------------------------------------------------------------------*/
131/*---------------------------------------------------------------------------*/
132
133ParallelLoopOptions RunCommandLaunchInfo::
134computeParallelLoopOptions() const
135{
136 ParallelLoopOptions opt = m_command.parallelLoopOptions();
137 const bool use_dynamic_compute = true;
138 // Calcule une taille de grain par défaut si cela n'est pas renseigné dans
139 // les options. Par défaut on fait en sorte de faire un nombre d'itérations
140 // égale à 2 fois le nombre de threads utilisés.
141 if (use_dynamic_compute && opt.grainSize() == 0) {
142 Int32 nb_thread = opt.maxThread();
143 if (nb_thread <= 0)
144 nb_thread = TaskFactory::nbAllowedThread();
145 if (nb_thread <= 0)
146 nb_thread = 1;
147 Int32 grain_size = static_cast<Int32>((double)m_total_loop_size / (nb_thread * 2.0));
148 opt.setGrainSize(grain_size);
149 }
150 return opt;
151}
152
153/*---------------------------------------------------------------------------*/
154/*---------------------------------------------------------------------------*/
155/*!
156 * \brief Calcule la valeur de m_loop_run_info.
157 *
158 * Cela n'est utile qu'en mode multi-thread.
159 */
160void RunCommandLaunchInfo::
161_computeLoopRunInfo()
162{
163 ForLoopTraceInfo lti(m_command.traceInfo(), m_command.kernelName());
164 m_loop_run_info = ForLoopRunInfo(computeParallelLoopOptions(), lti);
165 m_loop_run_info.setExecStat(m_command._internalCommandExecStat());
166}
167
168/*---------------------------------------------------------------------------*/
169/*---------------------------------------------------------------------------*/
170
171KernelLaunchArgs RunCommandLaunchInfo::
172_threadBlockInfo([[maybe_unused]] const void* func,[[maybe_unused]] Int64 shared_memory_size) const
173{
174 return m_kernel_launch_args;
175}
176
177/*---------------------------------------------------------------------------*/
178/*---------------------------------------------------------------------------*/
179
180void RunCommandLaunchInfo::
181_addSyclEvent(void* sycl_event_ptr)
182{
183 m_command._internalNotifyBeginLaunchKernelSyclEvent(sycl_event_ptr);
184}
185
186/*---------------------------------------------------------------------------*/
187/*---------------------------------------------------------------------------*/
188
189} // End namespace Arcane::Accelerator
190
191/*---------------------------------------------------------------------------*/
192/*---------------------------------------------------------------------------*/
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Informations d'exécution d'une boucle.
Informations de trace pour une boucle 'for'.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
bool isAcceleratorPolicy(eExecutionPolicy exec_policy)
Indique si exec_policy correspond à un accélérateur.
std::int32_t Int32
Type entier signé sur 32 bits.