Arcane  v3.15.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
RunCommandLaunchInfo.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* RunCommandLaunchInfo.cc (C) 2000-2024 */
9/* */
10/* Informations pour l'exécution d'une 'RunCommand'. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/accelerator/core/RunCommandLaunchInfo.h"
15
16#include "arcane/utils/CheckedConvert.h"
17#include "arcane/utils/PlatformUtils.h"
18
19#include "arcane/accelerator/core/RunQueue.h"
20#include "arcane/accelerator/core/internal/IRunQueueStream.h"
21#include "arcane/accelerator/core/internal/RunQueueImpl.h"
22#include "arcane/accelerator/core/NativeStream.h"
23
24/*---------------------------------------------------------------------------*/
25/*---------------------------------------------------------------------------*/
26
27namespace Arcane::Accelerator::impl
28{
29
30/*---------------------------------------------------------------------------*/
31/*---------------------------------------------------------------------------*/
32
33RunCommandLaunchInfo::
34RunCommandLaunchInfo(RunCommand& command, Int64 total_loop_size)
35: m_command(command)
36, m_total_loop_size(total_loop_size)
37{
38 m_queue_impl = m_command._internalQueueImpl();
39 m_exec_policy = m_queue_impl->executionPolicy();
40
41 // Le calcul des informations de kernel n'est utile que sur accélérateur
42 if (isAcceleratorPolicy(m_exec_policy)) {
43 m_kernel_launch_args = _computeKernelLaunchArgs();
44 m_command._allocateReduceMemory(m_kernel_launch_args.nbBlockPerGrid());
45 }
46}
47
48/*---------------------------------------------------------------------------*/
49/*---------------------------------------------------------------------------*/
50
51RunCommandLaunchInfo::
52~RunCommandLaunchInfo()
53{
54 // Notifie de la fin de lancement du noyau. Normalement, cela est déjà fait
55 // sauf s'il y a eu une exception pendant le lancement du noyau de calcul.
56 _doEndKernelLaunch();
57}
58
59/*---------------------------------------------------------------------------*/
60/*---------------------------------------------------------------------------*/
61
62void RunCommandLaunchInfo::
63beginExecute()
64{
65 if (m_has_exec_begun)
66 ARCANE_FATAL("beginExecute() has already been called");
67 m_has_exec_begun = true;
68 m_command._internalNotifyBeginLaunchKernel();
69 if (m_exec_policy == eExecutionPolicy::Thread)
70 _computeLoopRunInfo();
71}
72
73/*---------------------------------------------------------------------------*/
74/*---------------------------------------------------------------------------*/
81void RunCommandLaunchInfo::
82endExecute()
83{
84 if (!m_has_exec_begun)
85 ARCANE_FATAL("beginExecute() has to be called before endExecute()");
86 _doEndKernelLaunch();
87}
88
89/*---------------------------------------------------------------------------*/
90/*---------------------------------------------------------------------------*/
91
92void RunCommandLaunchInfo::
93_doEndKernelLaunch()
94{
95 if (m_is_notify_end_kernel_done)
96 return;
97 m_is_notify_end_kernel_done = true;
98 m_command._internalNotifyEndLaunchKernel();
99
100 impl::RunQueueImpl* q = m_queue_impl;
101 if (!q->isAsync())
102 q->_internalBarrier();
103}
104
105/*---------------------------------------------------------------------------*/
106/*---------------------------------------------------------------------------*/
107
108NativeStream RunCommandLaunchInfo::
109_internalNativeStream()
110{
111 return m_command._internalNativeStream();
112}
113
114/*---------------------------------------------------------------------------*/
115/*---------------------------------------------------------------------------*/
119KernelLaunchArgs RunCommandLaunchInfo::
120_computeKernelLaunchArgs() const
121{
122 int threads_per_block = m_command.nbThreadPerBlock();
123 if (threads_per_block<=0)
124 threads_per_block = 256;
125 Int64 big_b = (m_total_loop_size + threads_per_block - 1) / threads_per_block;
126 int blocks_per_grid = CheckedConvert::toInt32(big_b);
128}
129
130/*---------------------------------------------------------------------------*/
131/*---------------------------------------------------------------------------*/
132
133ParallelLoopOptions RunCommandLaunchInfo::
134computeParallelLoopOptions() const
135{
136 ParallelLoopOptions opt = m_command.parallelLoopOptions();
137 const bool use_dynamic_compute = true;
138 // Calcule une taille de grain par défaut si cela n'est pas renseigné dans
139 // les options. Par défaut on fait en sorte de faire un nombre d'itérations
140 // égale à 2 fois le nombre de threads utilisés.
141 if (use_dynamic_compute && opt.grainSize() == 0) {
142 Int32 nb_thread = opt.maxThread();
143 if (nb_thread <= 0)
144 nb_thread = TaskFactory::nbAllowedThread();
145 if (nb_thread <= 0)
146 nb_thread = 1;
147 Int32 grain_size = static_cast<Int32>((double)m_total_loop_size / (nb_thread * 2.0));
148 opt.setGrainSize(grain_size);
149 }
150 return opt;
151}
152
153/*---------------------------------------------------------------------------*/
154/*---------------------------------------------------------------------------*/
160void RunCommandLaunchInfo::
161_computeLoopRunInfo()
162{
163 ForLoopTraceInfo lti(m_command.traceInfo(), m_command.kernelName());
164 m_loop_run_info = ForLoopRunInfo(computeParallelLoopOptions(), lti);
165 m_loop_run_info.setExecStat(m_command._internalCommandExecStat());
166}
167
168/*---------------------------------------------------------------------------*/
169/*---------------------------------------------------------------------------*/
170
171KernelLaunchArgs RunCommandLaunchInfo::
172_threadBlockInfo([[maybe_unused]] const void* func,[[maybe_unused]] Int64 shared_memory_size) const
173{
174 return m_kernel_launch_args;
175}
176
177/*---------------------------------------------------------------------------*/
178/*---------------------------------------------------------------------------*/
179
180void RunCommandLaunchInfo::
181_addSyclEvent(void* sycl_event_ptr)
182{
183 m_command._internalNotifyBeginLaunchKernelSyclEvent(sycl_event_ptr);
184}
185
186/*---------------------------------------------------------------------------*/
187/*---------------------------------------------------------------------------*/
188
189} // End namespace Arcane::Accelerator
190
191/*---------------------------------------------------------------------------*/
192/*---------------------------------------------------------------------------*/
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Arguments pour lancer un kernel.
File d'exécution pour accélérateur.
Informations d'exécution d'une boucle.
Informations de trace pour une boucle 'for'.
Lecteur des fichiers de maillage via la bibliothèque LIMA.
Definition Lima.cc:149
Options d'exécution d'une boucle parallèle en multi-thread.
bool isAcceleratorPolicy(eExecutionPolicy exec_policy)
Indique si exec_policy correspond à un accélérateur.
std::int64_t Int64
Type entier signé sur 64 bits.
std::int32_t Int32
Type entier signé sur 32 bits.