Arcane  v4.1.7.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
Profiling.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* Profiling.cc (C) 2000-2026 */
9/* */
10/* Classes pour gérer le profilage. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arccore/base/Profiling.h"
15
16#include "arccore/base/ForLoopTraceInfo.h"
17#include "arccore/base/PlatformUtils.h"
18#include "arccore/base/internal/ProfilingInternal.h"
19
20#include <iostream>
21#include <iomanip>
22#include <vector>
23#include <mutex>
24#include <map>
25#include <memory>
26#include <set>
27
28namespace
29{
30using namespace Arcane;
31
32void _printGlobalLoopInfos(std::ostream& o, const Impl::ForLoopCumulativeStat& cumulative_stat)
33{
34 Int64 nb_loop_parallel_for = cumulative_stat.nbLoopParallelFor();
35 if (nb_loop_parallel_for == 0)
36 return;
37 Int64 nb_chunk_parallel_for = cumulative_stat.nbChunkParallelFor();
38 Int64 total_time = cumulative_stat.totalTime();
39 double x = static_cast<double>(total_time);
40 double x1 = 0.0;
41 if (nb_loop_parallel_for > 0)
42 x1 = x / static_cast<double>(nb_loop_parallel_for);
43 double x2 = 0.0;
44 if (nb_chunk_parallel_for > 0)
45 x2 = x / static_cast<double>(nb_chunk_parallel_for);
46 o << "LoopStat: global_time (ms) = " << x / 1.0e6 << "\n";
47 o << "LoopStat: global_nb_loop = " << std::setw(10) << nb_loop_parallel_for << " time=" << x1 << "\n";
48 o << "LoopStat: global_nb_chunk = " << std::setw(10) << nb_chunk_parallel_for << " time=" << x2 << "\n";
49}
50
51/*---------------------------------------------------------------------------*/
52/*---------------------------------------------------------------------------*/
53
54void _dumpOneLoopListStat(std::ostream& o, const Impl::ForLoopStatInfoList& stat_list)
55{
56 struct SortedStatInfo
57 {
58 bool operator<(const SortedStatInfo& rhs) const
59 {
60 return m_stat.execTime() > rhs.m_stat.execTime();
61 }
62 String m_name;
63 Impl::ForLoopProfilingStat m_stat;
64 };
65
66 // Met 1 pour éviter de diviser par zéro.
67 Int64 cumulative_total = 1;
68
69 // Tri les fonctions par temps d'exécution décroissant
70 std::set<SortedStatInfo> sorted_set;
71 for (const auto& x : stat_list._internalImpl()->m_stat_map) {
72 const auto& s = x.second;
73 sorted_set.insert({ x.first, s });
74 cumulative_total += s.execTime();
75 }
76
77 o << "ProfilingStat\n";
78 o << std::setw(10) << "Ncall" << std::setw(10) << "Nchunk"
79 << std::setw(11) << " T (ms)" << std::setw(10) << "Tck (ns)"
80 << " % name\n";
81
82 char old_filler = o.fill();
83 for (const auto& x : sorted_set) {
84 const Impl::ForLoopProfilingStat& s = x.m_stat;
85 Int64 nb_loop = s.nbCall();
86 Int64 nb_chunk = s.nbChunk();
87 Int64 total_time_ns = s.execTime();
88 Int64 total_time_us = total_time_ns / 1000;
89 Int64 total_time_ms = total_time_us / 1000;
90 Int64 total_time_remaining_us = total_time_us % 1000;
91 Int64 time_per_chunk = (nb_chunk == 0) ? 0 : (total_time_ns / nb_chunk);
92 Int64 per_mil = (total_time_ns * 1000) / cumulative_total;
93 Int64 percent = per_mil / 10;
94 Int64 percent_digit = per_mil % 10;
95
96 o << std::setw(10) << nb_loop << std::setw(10) << nb_chunk
97 << std::setw(7) << total_time_ms << ".";
98 o << std::setfill('0') << std::setw(3) << total_time_remaining_us << std::setfill(old_filler);
99 o << std::setw(10) << time_per_chunk
100 << std::setw(4) << percent << "." << percent_digit << " " << x.m_name << "\n";
101 }
102 o << "TOTAL=" << cumulative_total / 1000000 << "\n";
103}
104} // namespace
105
106/*---------------------------------------------------------------------------*/
107/*---------------------------------------------------------------------------*/
108
109namespace Arcane
110{
111
112/*---------------------------------------------------------------------------*/
113/*---------------------------------------------------------------------------*/
114
115Impl::ForLoopStatInfoList::
116ForLoopStatInfoList()
117: m_p(new ForLoopStatInfoListImpl())
118{
119}
120
121/*---------------------------------------------------------------------------*/
122/*---------------------------------------------------------------------------*/
123
124Impl::ForLoopStatInfoList::
125~ForLoopStatInfoList()
126{
127 delete m_p;
128}
129
130/*---------------------------------------------------------------------------*/
131/*---------------------------------------------------------------------------*/
132
133namespace
134{
135 Impl::ForLoopCumulativeStat global_stat;
136}
137
138/*---------------------------------------------------------------------------*/
139/*---------------------------------------------------------------------------*/
140
141Impl::ScopedStatLoop::
142ScopedStatLoop(ForLoopOneExecStat* s)
143: m_stat_info(s)
144{
145 if (m_stat_info) {
146 m_begin_time = Platform::getRealTimeNS();
147 }
148}
149
150/*---------------------------------------------------------------------------*/
151/*---------------------------------------------------------------------------*/
152
153Impl::ScopedStatLoop::
154~ScopedStatLoop()
155{
156 if (m_stat_info) {
157 Int64 end_time = Platform::getRealTimeNS();
158 m_stat_info->setBeginTime(m_begin_time);
159 m_stat_info->setEndTime(end_time);
160 }
161}
162
163/*---------------------------------------------------------------------------*/
164/*---------------------------------------------------------------------------*/
165
167{
168 public:
169
170 Impl::ForLoopStatInfoList* createForLoopStatInfoList()
171 {
172 std::lock_guard<std::mutex> lk(m_mutex);
173 std::unique_ptr<Impl::ForLoopStatInfoList> x(new Impl::ForLoopStatInfoList());
174 auto* ptr = x.get();
175 m_for_loop_stat_info_list_vector.push_back(std::move(x));
176 return ptr;
177 }
178 Impl::AcceleratorStatInfoList* createAcceleratorStatInfoList()
179 {
180 std::lock_guard<std::mutex> lk(m_mutex);
181 std::unique_ptr<Impl::AcceleratorStatInfoList> x(new Impl::AcceleratorStatInfoList());
182 auto* ptr = x.get();
183 m_accelerator_stat_info_list_vector.push_back(std::move(x));
184 return ptr;
185 }
186
187 void visitForLoop(const std::function<void(const Impl::ForLoopStatInfoList&)>& f)
188 {
189 for (const auto& x : m_for_loop_stat_info_list_vector)
190 f(*x);
191 }
192
193 void visitAccelerator(const std::function<void(const Impl::AcceleratorStatInfoList&)>& f)
194 {
195 for (const auto& x : m_accelerator_stat_info_list_vector)
196 f(*x);
197 }
198
199 public:
200
201 std::mutex m_mutex;
202 std::vector<std::unique_ptr<Impl::ForLoopStatInfoList>> m_for_loop_stat_info_list_vector;
203 std::vector<std::unique_ptr<Impl::AcceleratorStatInfoList>> m_accelerator_stat_info_list_vector;
204};
205
206/*---------------------------------------------------------------------------*/
207/*---------------------------------------------------------------------------*/
208
209AllStatInfoList global_all_stat_info_list;
210
211/*---------------------------------------------------------------------------*/
212/*---------------------------------------------------------------------------*/
213
214// Permet de gérer une instance de ForLoopStatInfoList par thread pour éviter les verroux
216{
217 public:
218
219 Impl::ForLoopStatInfoList* forLoopStatInfoList()
220 {
221 return _createOrGetForLoopStatInfoList();
222 }
223 Impl::AcceleratorStatInfoList* acceleratorStatInfoList()
224 {
225 return _createOrGetAcceleratorStatInfoList();
226 }
227 void merge(const ForLoopOneExecStat& stat_info, const ForLoopTraceInfo& trace_info)
228 {
229 Impl::ForLoopStatInfoList* stat_list = _createOrGetForLoopStatInfoList();
230 stat_list->merge(stat_info, trace_info);
231 }
232
233 private:
234
235 Impl::ForLoopStatInfoList* _createOrGetForLoopStatInfoList()
236 {
237 if (!m_for_loop_stat_info_list)
238 m_for_loop_stat_info_list = global_all_stat_info_list.createForLoopStatInfoList();
239 return m_for_loop_stat_info_list;
240 }
241 Impl::AcceleratorStatInfoList* _createOrGetAcceleratorStatInfoList()
242 {
243 if (!m_accelerator_stat_info_list)
244 m_accelerator_stat_info_list = global_all_stat_info_list.createAcceleratorStatInfoList();
245 return m_accelerator_stat_info_list;
246 }
247
248 private:
249
250 Impl::ForLoopStatInfoList* m_for_loop_stat_info_list = nullptr;
251 Impl::AcceleratorStatInfoList* m_accelerator_stat_info_list = nullptr;
252};
253thread_local ThreadLocalStatInfo thread_local_stat_info;
254
255/*---------------------------------------------------------------------------*/
256/*---------------------------------------------------------------------------*/
257
258Int32 ProfilingRegistry::m_profiling_level = 0;
259
260/*---------------------------------------------------------------------------*/
261/*---------------------------------------------------------------------------*/
262
265{
266 return thread_local_stat_info.forLoopStatInfoList();
267}
268
269/*---------------------------------------------------------------------------*/
270/*---------------------------------------------------------------------------*/
271
274{
275 return thread_local_stat_info.forLoopStatInfoList();
276}
277
278/*---------------------------------------------------------------------------*/
279/*---------------------------------------------------------------------------*/
280
283{
284 return thread_local_stat_info.acceleratorStatInfoList();
285}
286
287/*---------------------------------------------------------------------------*/
288/*---------------------------------------------------------------------------*/
289
291visitLoopStat(const std::function<void(const Impl::ForLoopStatInfoList&)>& f)
292{
293 global_all_stat_info_list.visitForLoop(f);
294}
295
296/*---------------------------------------------------------------------------*/
297/*---------------------------------------------------------------------------*/
298
300visitAcceleratorStat(const std::function<void(const Impl::AcceleratorStatInfoList&)>& f)
301{
302 global_all_stat_info_list.visitAccelerator(f);
303}
304
305/*---------------------------------------------------------------------------*/
306/*---------------------------------------------------------------------------*/
307
310{
311 m_profiling_level = level;
312}
313
314/*---------------------------------------------------------------------------*/
315/*---------------------------------------------------------------------------*/
316
317const Impl::ForLoopCumulativeStat& ProfilingRegistry::
318globalLoopStat()
319{
320 return global_stat;
321}
322
323/*---------------------------------------------------------------------------*/
324/*---------------------------------------------------------------------------*/
325
326/*---------------------------------------------------------------------------*/
327/*---------------------------------------------------------------------------*/
328
330add(const ForLoopOneExecStat& s)
331{
332 ++m_nb_call;
333 m_nb_chunk += s.nbChunk();
334 m_exec_time += s.execTime();
335}
336
337/*---------------------------------------------------------------------------*/
338/*---------------------------------------------------------------------------*/
339
340/*---------------------------------------------------------------------------*/
341/*---------------------------------------------------------------------------*/
342
343void Impl::ForLoopStatInfoList::
344merge(const ForLoopOneExecStat& loop_stat_info, const ForLoopTraceInfo& loop_trace_info)
345{
346 global_stat.merge(loop_stat_info);
347 String loop_name = "Unknown";
348 if (loop_trace_info.isValid()) {
349 loop_name = loop_trace_info.loopName();
350 if (loop_name.empty())
351 loop_name = loop_trace_info.traceInfo().name();
352 }
353 m_p->m_stat_map[loop_name].add(loop_stat_info);
354}
355
356/*---------------------------------------------------------------------------*/
357/*---------------------------------------------------------------------------*/
358
359void Impl::AcceleratorStatInfoList::
360print(std::ostream& o) const
361{
362 const auto& htod = memoryTransfer(eMemoryTransferType::HostToDevice);
363 const auto& dtoh = memoryTransfer(eMemoryTransferType::DeviceToHost);
364 o << "MemoryTransferSTATS: HTOD = " << htod.m_nb_byte << " (" << htod.m_nb_call << ")"
365 << " DTOH = " << dtoh.m_nb_byte << " (" << dtoh.m_nb_call << ")";
366 const auto& cpu_fault = memoryPageFault(eMemoryPageFaultType::Cpu);
367 const auto& gpu_fault = memoryPageFault(eMemoryPageFaultType::Gpu);
368 o << " PageFaultCPU = " << cpu_fault.m_nb_fault << " (" << cpu_fault.m_nb_call << ")"
369 << " PageFaultGPU = " << gpu_fault.m_nb_fault << " (" << gpu_fault.m_nb_call << ")";
370}
371
372/*---------------------------------------------------------------------------*/
373/*---------------------------------------------------------------------------*/
374
375void Impl::
376dumpProfilingStatistics(std::ostream& o)
377{
378 // Affiche les informations de profiling sur \a o
379 _printGlobalLoopInfos(o, ProfilingRegistry::globalLoopStat());
380 {
381 auto f = [&](const Impl::ForLoopStatInfoList& stat_list) {
382 _dumpOneLoopListStat(o, stat_list);
383 };
385 }
386 // Avant d'afficher le profiling accélérateur, il faudrait être certain
387 // qu'il est désactivé. Normalement, c'est le cas si on utilise ArcaneMainBatch.
388 {
389 auto f = [&](const Impl::AcceleratorStatInfoList& stat_list) {
390 stat_list.print(o);
391 };
393 }
394}
395
396/*---------------------------------------------------------------------------*/
397/*---------------------------------------------------------------------------*/
398
399} // End namespace Arcane
400
401/*---------------------------------------------------------------------------*/
402/*---------------------------------------------------------------------------*/
Classe pour gérer le profiling d'une seule exécution d'une boucle.
Int64 execTime() const
Temps d'exécution (en nanoseconde).
Informations de trace pour une boucle 'for'.
Statistiques pour les accélérateurs.
Statistiques cumulées sur le nombre de boucles exécutées.
ForLoopStatInfoListImpl * _internalImpl() const
Type opaque pour l'implémentation interne.
static void setProfilingLevel(Int32 level)
Positionne le niveau de profilage.
Definition Profiling.cc:309
static Impl::AcceleratorStatInfoList * _threadLocalAcceleratorInstance()
Definition Profiling.cc:282
static void visitAcceleratorStat(const std::function< void(const Impl::AcceleratorStatInfoList &)> &f)
Visite la liste des statistiques sur accélérateur.
Definition Profiling.cc:300
static Impl::ForLoopStatInfoList * threadLocalInstance()
Definition Profiling.cc:264
static Impl::ForLoopStatInfoList * _threadLocalForLoopInstance()
Definition Profiling.cc:273
static void visitLoopStat(const std::function< void(const Impl::ForLoopStatInfoList &)> &f)
Visite la liste des statistiques des boucles.
Definition Profiling.cc:291
Chaîne de caractères unicode.
bool empty() const
Vrai si la chaîne est vide (nulle ou "")
Definition String.cc:316
ARCCORE_BASE_EXPORT Int64 getRealTimeNS()
Temps horloge en nano-secondes.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
std::int64_t Int64
Type entier signé sur 64 bits.
bool operator<(const Item &item1, const Item &item2)
Compare deux entités.
Definition Item.h:551
std::int32_t Int32
Type entier signé sur 32 bits.
Statistiques d'exécution d'une boucle.
void add(const ForLoopOneExecStat &s)
Ajoute les infos de l'exécution s.
Definition Profiling.cc:330