Arcane  v3.14.10.0
Documentation utilisateur
Chargement...
Recherche...
Aucune correspondance
RunCommandLaunchInfo.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* RunCommandLaunchInfo.cc (C) 2000-2024 */
9/* */
10/* Informations pour l'exécution d'une 'RunCommand'. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/accelerator/RunCommandLaunchInfo.h"
15
16#include "arcane/utils/CheckedConvert.h"
17#include "arcane/utils/PlatformUtils.h"
18
19#include "arcane/accelerator/core/RunQueue.h"
20#include "arcane/accelerator/core/internal/IRunQueueStream.h"
21
22/*---------------------------------------------------------------------------*/
23/*---------------------------------------------------------------------------*/
24
25namespace Arcane::Accelerator::impl
26{
27
28/*---------------------------------------------------------------------------*/
29/*---------------------------------------------------------------------------*/
30
31RunCommandLaunchInfo::
32RunCommandLaunchInfo(RunCommand& command, Int64 total_loop_size)
33: m_command(command)
34, m_total_loop_size(total_loop_size)
35{
36 m_thread_block_info = _computeThreadBlockInfo();
37 _begin();
38}
39
40/*---------------------------------------------------------------------------*/
41/*---------------------------------------------------------------------------*/
42
43RunCommandLaunchInfo::
44~RunCommandLaunchInfo()
45{
46 // Notifie de la fin de lancement du noyau. Normalement cela est déjà fait
47 // sauf s'il y a eu une exception pendant le lancement du noyau de calcul.
48 _doEndKernelLaunch();
49}
50
51/*---------------------------------------------------------------------------*/
52/*---------------------------------------------------------------------------*/
53
54void RunCommandLaunchInfo::
55_begin()
56{
57 const RunQueue& queue = m_command._internalQueue();
58 m_exec_policy = queue.executionPolicy();
59 m_queue_stream = queue._internalStream();
60 m_runtime = queue._internalRuntime();
61 m_command._allocateReduceMemory(m_thread_block_info.nb_block_per_grid);
62}
63
64/*---------------------------------------------------------------------------*/
65/*---------------------------------------------------------------------------*/
66
67void RunCommandLaunchInfo::
68beginExecute()
69{
70 if (m_has_exec_begun)
71 ARCANE_FATAL("beginExecute() has already been called");
72 m_has_exec_begun = true;
73 m_command._internalNotifyBeginLaunchKernel();
74}
75
76/*---------------------------------------------------------------------------*/
77/*---------------------------------------------------------------------------*/
78/*!
79 * \brief Notifie de la fin de lancement de la commande.
80 *
81 * A noter que si la commande est asynchrone, son exécution peut continuer
82 * après l'appel à cette méthode.
83 */
84void RunCommandLaunchInfo::
85endExecute()
86{
87 if (!m_has_exec_begun)
88 ARCANE_FATAL("beginExecute() has to be called before endExecute()");
89 _doEndKernelLaunch();
90}
91
92/*---------------------------------------------------------------------------*/
93/*---------------------------------------------------------------------------*/
94
95void RunCommandLaunchInfo::
96_doEndKernelLaunch()
97{
98 if (m_is_notify_end_kernel_done)
99 return;
100 m_is_notify_end_kernel_done = true;
101 m_command._internalNotifyEndLaunchKernel();
102
103 const RunQueue& q = m_command._internalQueue();
104 if (!q.isAsync())
105 q.barrier();
106}
107
108/*---------------------------------------------------------------------------*/
109/*---------------------------------------------------------------------------*/
110
111void* RunCommandLaunchInfo::
112_internalStreamImpl()
113{
114 return m_queue_stream->_internalImpl();
115}
116
117/*---------------------------------------------------------------------------*/
118/*---------------------------------------------------------------------------*/
119
120//! Calcule le nombre de block/thread/grille du noyau en fonction de \a full_size
121auto RunCommandLaunchInfo::
122_computeThreadBlockInfo() const -> ThreadBlockInfo
123{
124 int threads_per_block = m_command.nbThreadPerBlock();
125 if (threads_per_block<=0)
126 threads_per_block = 256;
127 Int64 big_b = (m_total_loop_size + threads_per_block - 1) / threads_per_block;
128 int blocks_per_grid = CheckedConvert::toInt32(big_b);
129 return { blocks_per_grid, threads_per_block };
130}
131
132/*---------------------------------------------------------------------------*/
133/*---------------------------------------------------------------------------*/
134
135ParallelLoopOptions RunCommandLaunchInfo::
136computeParallelLoopOptions() const
137{
138 ParallelLoopOptions opt = m_command.parallelLoopOptions();
139 const bool use_dynamic_compute = false;
140 // Calcule une taille de grain par défaut si cela n'est pas renseigné dans
141 // les options
142 if (use_dynamic_compute && opt.grainSize() == 0) {
143 Int32 nb_thread = opt.maxThread();
144 if (nb_thread <= 0)
145 nb_thread = TaskFactory::nbAllowedThread();
146 if (nb_thread <= 0)
147 nb_thread = 1;
148 Int32 grain_size = static_cast<Int32>((double)m_total_loop_size / (nb_thread * 10.0));
149 opt.setGrainSize(grain_size);
150 }
151 return opt;
152}
153
154/*---------------------------------------------------------------------------*/
155/*---------------------------------------------------------------------------*/
156
157void RunCommandLaunchInfo::
158computeLoopRunInfo()
159{
160 if (m_has_exec_begun)
161 ARCANE_FATAL("computeLoopRunInfo() has to be called before beginExecute()");
162 ForLoopTraceInfo lti(m_command.traceInfo(), m_command.kernelName());
163 m_loop_run_info = ForLoopRunInfo(computeParallelLoopOptions(), lti);
164 m_loop_run_info.setExecStat(m_command._internalCommandExecStat());
165}
166
167/*---------------------------------------------------------------------------*/
168/*---------------------------------------------------------------------------*/
169
170void RunCommandLaunchInfo::
171_addSyclEvent(void* sycl_event_ptr)
172{
173 m_command._internalNotifyBeginLaunchKernelSyclEvent(sycl_event_ptr);
174}
175
176/*---------------------------------------------------------------------------*/
177/*---------------------------------------------------------------------------*/
178
179} // End namespace Arcane::Accelerator
180
181/*---------------------------------------------------------------------------*/
182/*---------------------------------------------------------------------------*/
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
File d'exécution pour un accélérateur.
bool isAsync() const
Indique si la file d'exécution est asynchrone.
Definition RunQueue.cc:313
void barrier() const
Bloque tant que toutes les commandes associées à la file ne sont pas terminées.
Definition RunQueue.cc:158
eExecutionPolicy executionPolicy() const
Politique d'exécution de la file.
Definition RunQueue.cc:168
Informations d'exécution d'une boucle.
Informations de trace pour une boucle 'for'.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
std::int32_t Int32
Type entier signé sur 32 bits.