Arcane  v4.1.1.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
Profiling.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* Profiling.cc (C) 2000-2025 */
9/* */
10/* Classes pour gérer le profilage. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arccore/base/Profiling.h"
15
16#include "arccore/base/ForLoopTraceInfo.h"
17#include "arccore/base/PlatformUtils.h"
18#include "arccore/base/internal/ProfilingInternal.h"
19
20#include <iostream>
21#include <iomanip>
22#include <vector>
23#include <mutex>
24#include <map>
25#include <memory>
26
27/*---------------------------------------------------------------------------*/
28/*---------------------------------------------------------------------------*/
29
30namespace Arcane
31{
32
33/*---------------------------------------------------------------------------*/
34/*---------------------------------------------------------------------------*/
35
36Impl::ForLoopStatInfoList::
37ForLoopStatInfoList()
38: m_p(new ForLoopStatInfoListImpl())
39{
40}
41
42/*---------------------------------------------------------------------------*/
43/*---------------------------------------------------------------------------*/
44
45Impl::ForLoopStatInfoList::
46~ForLoopStatInfoList()
47{
48 delete m_p;
49}
50
51/*---------------------------------------------------------------------------*/
52/*---------------------------------------------------------------------------*/
53
54namespace
55{
56 Impl::ForLoopCumulativeStat global_stat;
57}
58
59/*---------------------------------------------------------------------------*/
60/*---------------------------------------------------------------------------*/
61
62Impl::ScopedStatLoop::
63ScopedStatLoop(ForLoopOneExecStat* s)
64: m_stat_info(s)
65{
66 if (m_stat_info) {
67 m_begin_time = Platform::getRealTimeNS();
68 }
69}
70
71/*---------------------------------------------------------------------------*/
72/*---------------------------------------------------------------------------*/
73
74Impl::ScopedStatLoop::
75~ScopedStatLoop()
76{
77 if (m_stat_info) {
78 Int64 end_time = Platform::getRealTimeNS();
79 m_stat_info->setBeginTime(m_begin_time);
80 m_stat_info->setEndTime(end_time);
81 }
82}
83
84/*---------------------------------------------------------------------------*/
85/*---------------------------------------------------------------------------*/
86
88{
89 public:
90
91 Impl::ForLoopStatInfoList* createForLoopStatInfoList()
92 {
93 std::lock_guard<std::mutex> lk(m_mutex);
94 std::unique_ptr<Impl::ForLoopStatInfoList> x(new Impl::ForLoopStatInfoList());
95 auto* ptr = x.get();
96 m_for_loop_stat_info_list_vector.push_back(std::move(x));
97 return ptr;
98 }
99 Impl::AcceleratorStatInfoList* createAcceleratorStatInfoList()
100 {
101 std::lock_guard<std::mutex> lk(m_mutex);
102 std::unique_ptr<Impl::AcceleratorStatInfoList> x(new Impl::AcceleratorStatInfoList());
103 auto* ptr = x.get();
104 m_accelerator_stat_info_list_vector.push_back(std::move(x));
105 return ptr;
106 }
107
108 void visitForLoop(const std::function<void(const Impl::ForLoopStatInfoList&)>& f)
109 {
110 for (const auto& x : m_for_loop_stat_info_list_vector)
111 f(*x);
112 }
113
114 void visitAccelerator(const std::function<void(const Impl::AcceleratorStatInfoList&)>& f)
115 {
116 for (const auto& x : m_accelerator_stat_info_list_vector)
117 f(*x);
118 }
119
120 public:
121
122 std::mutex m_mutex;
123 std::vector<std::unique_ptr<Impl::ForLoopStatInfoList>> m_for_loop_stat_info_list_vector;
124 std::vector<std::unique_ptr<Impl::AcceleratorStatInfoList>> m_accelerator_stat_info_list_vector;
125};
126
127/*---------------------------------------------------------------------------*/
128/*---------------------------------------------------------------------------*/
129
130AllStatInfoList global_all_stat_info_list;
131
132/*---------------------------------------------------------------------------*/
133/*---------------------------------------------------------------------------*/
134
135// Permet de gérer une instance de ForLoopStatInfoList par thread pour éviter les verroux
137{
138 public:
139
140 Impl::ForLoopStatInfoList* forLoopStatInfoList()
141 {
142 return _createOrGetForLoopStatInfoList();
143 }
144 Impl::AcceleratorStatInfoList* acceleratorStatInfoList()
145 {
146 return _createOrGetAcceleratorStatInfoList();
147 }
148 void merge(const ForLoopOneExecStat& stat_info, const ForLoopTraceInfo& trace_info)
149 {
150 Impl::ForLoopStatInfoList* stat_list = _createOrGetForLoopStatInfoList();
151 stat_list->merge(stat_info, trace_info);
152 }
153
154 private:
155
156 Impl::ForLoopStatInfoList* _createOrGetForLoopStatInfoList()
157 {
158 if (!m_for_loop_stat_info_list)
159 m_for_loop_stat_info_list = global_all_stat_info_list.createForLoopStatInfoList();
160 return m_for_loop_stat_info_list;
161 }
162 Impl::AcceleratorStatInfoList* _createOrGetAcceleratorStatInfoList()
163 {
164 if (!m_accelerator_stat_info_list)
165 m_accelerator_stat_info_list = global_all_stat_info_list.createAcceleratorStatInfoList();
166 return m_accelerator_stat_info_list;
167 }
168
169 private:
170
171 Impl::ForLoopStatInfoList* m_for_loop_stat_info_list = nullptr;
172 Impl::AcceleratorStatInfoList* m_accelerator_stat_info_list = nullptr;
173};
174thread_local ThreadLocalStatInfo thread_local_stat_info;
175
176/*---------------------------------------------------------------------------*/
177/*---------------------------------------------------------------------------*/
178
179Int32 ProfilingRegistry::m_profiling_level = 0;
180
181/*---------------------------------------------------------------------------*/
182/*---------------------------------------------------------------------------*/
183
184Impl::ForLoopStatInfoList* ProfilingRegistry::
185threadLocalInstance()
186{
187 return thread_local_stat_info.forLoopStatInfoList();
188}
189
190/*---------------------------------------------------------------------------*/
191/*---------------------------------------------------------------------------*/
192
193Impl::ForLoopStatInfoList* ProfilingRegistry::
194_threadLocalForLoopInstance()
195{
196 return thread_local_stat_info.forLoopStatInfoList();
197}
198
199/*---------------------------------------------------------------------------*/
200/*---------------------------------------------------------------------------*/
201
203_threadLocalAcceleratorInstance()
204{
205 return thread_local_stat_info.acceleratorStatInfoList();
206}
207
208/*---------------------------------------------------------------------------*/
209/*---------------------------------------------------------------------------*/
210
211void ProfilingRegistry::
212visitLoopStat(const std::function<void(const Impl::ForLoopStatInfoList&)>& f)
213{
214 global_all_stat_info_list.visitForLoop(f);
215}
216
217/*---------------------------------------------------------------------------*/
218/*---------------------------------------------------------------------------*/
219
220void ProfilingRegistry::
221visitAcceleratorStat(const std::function<void(const Impl::AcceleratorStatInfoList&)>& f)
222{
223 global_all_stat_info_list.visitAccelerator(f);
224}
225
226/*---------------------------------------------------------------------------*/
227/*---------------------------------------------------------------------------*/
228
229void ProfilingRegistry::
230setProfilingLevel(Int32 level)
231{
232 m_profiling_level = level;
233}
234
235/*---------------------------------------------------------------------------*/
236/*---------------------------------------------------------------------------*/
237
238const Impl::ForLoopCumulativeStat& ProfilingRegistry::
239globalLoopStat()
240{
241 return global_stat;
242}
243
244/*---------------------------------------------------------------------------*/
245/*---------------------------------------------------------------------------*/
246
247/*---------------------------------------------------------------------------*/
248/*---------------------------------------------------------------------------*/
249
250void Impl::ForLoopProfilingStat::
251add(const ForLoopOneExecStat& s)
252{
253 ++m_nb_call;
254 m_nb_chunk += s.nbChunk();
255 m_exec_time += s.execTime();
256}
257
258/*---------------------------------------------------------------------------*/
259/*---------------------------------------------------------------------------*/
260
261/*---------------------------------------------------------------------------*/
262/*---------------------------------------------------------------------------*/
263
264void Impl::ForLoopStatInfoList::
265merge(const ForLoopOneExecStat& loop_stat_info, const ForLoopTraceInfo& loop_trace_info)
266{
267 global_stat.merge(loop_stat_info);
268 String loop_name = "Unknown";
269 if (loop_trace_info.isValid()) {
270 loop_name = loop_trace_info.loopName();
271 if (loop_name.empty())
272 loop_name = loop_trace_info.traceInfo().name();
273 }
274 m_p->m_stat_map[loop_name].add(loop_stat_info);
275}
276
277/*---------------------------------------------------------------------------*/
278/*---------------------------------------------------------------------------*/
279
280void Impl::AcceleratorStatInfoList::
281print(std::ostream& o) const
282{
283 const auto& htod = memoryTransfer(eMemoryTransferType::HostToDevice);
284 const auto& dtoh = memoryTransfer(eMemoryTransferType::DeviceToHost);
285 o << "MemoryTransferSTATS: HTOD = " << htod.m_nb_byte << " (" << htod.m_nb_call << ")"
286 << " DTOH = " << dtoh.m_nb_byte << " (" << dtoh.m_nb_call << ")";
287 const auto& cpu_fault = memoryPageFault(eMemoryPageFaultType::Cpu);
288 const auto& gpu_fault = memoryPageFault(eMemoryPageFaultType::Gpu);
289 o << " PageFaultCPU = " << cpu_fault.m_nb_fault << " (" << cpu_fault.m_nb_call << ")"
290 << " PageFaultGPU = " << gpu_fault.m_nb_fault << " (" << gpu_fault.m_nb_call << ")";
291}
292
293/*---------------------------------------------------------------------------*/
294/*---------------------------------------------------------------------------*/
295
296} // End namespace Arcane
297
298/*---------------------------------------------------------------------------*/
299/*---------------------------------------------------------------------------*/
Classe pour gérer le profiling d'une seule exécution d'une boucle.
Int64 execTime() const
Temps d'exécution (en nanoseconde).
Informations de trace pour une boucle 'for'.
Statistiques pour les accélérateurs.
Statistiques cumulées sur le nombre de boucles exécutées.
Chaîne de caractères unicode.
bool empty() const
Vrai si la chaîne est vide (nulle ou "")
Definition String.cc:316
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
std::int32_t Int32
Type entier signé sur 32 bits.