Arcane  v3.14.10.0
Documentation utilisateur
Chargement...
Recherche...
Aucune correspondance
Profiling.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* Profiling.cc (C) 2000-2024 */
9/* */
10/* Classes pour gérer le profilage. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/utils/Profiling.h"
15
16#include "arcane/utils/ForLoopTraceInfo.h"
17#include "arcane/utils/PlatformUtils.h"
18
19#include "arcane/utils/internal/ProfilingInternal.h"
20
21#include <iostream>
22#include <iomanip>
23#include <vector>
24#include <mutex>
25#include <map>
26
27/*---------------------------------------------------------------------------*/
28/*---------------------------------------------------------------------------*/
29
30namespace Arcane
31{
32
33/*---------------------------------------------------------------------------*/
34/*---------------------------------------------------------------------------*/
35
36impl::ForLoopStatInfoList::
37ForLoopStatInfoList()
38: m_p(new ForLoopStatInfoListImpl())
39{
40}
41
42/*---------------------------------------------------------------------------*/
43/*---------------------------------------------------------------------------*/
44
45impl::ForLoopStatInfoList::
46~ForLoopStatInfoList()
47{
48 delete m_p;
49}
50
51/*---------------------------------------------------------------------------*/
52/*---------------------------------------------------------------------------*/
53
54namespace
55{
56 impl::ForLoopCumulativeStat global_stat;
57}
58
59/*---------------------------------------------------------------------------*/
60/*---------------------------------------------------------------------------*/
61
62impl::ScopedStatLoop::
63ScopedStatLoop(ForLoopOneExecStat* s)
64: m_stat_info(s)
65{
66 if (m_stat_info) {
67 m_begin_time = platform::getRealTimeNS();
68 }
69}
70
71/*---------------------------------------------------------------------------*/
72/*---------------------------------------------------------------------------*/
73
74impl::ScopedStatLoop::
75~ScopedStatLoop()
76{
77 if (m_stat_info) {
78 Int64 end_time = platform::getRealTimeNS();
79 m_stat_info->setBeginTime(m_begin_time);
80 m_stat_info->setEndTime(end_time);
81 }
82}
83
84/*---------------------------------------------------------------------------*/
85/*---------------------------------------------------------------------------*/
86
88{
89 public:
90
91 impl::ForLoopStatInfoList* createForLoopStatInfoList()
92 {
93 std::lock_guard<std::mutex> lk(m_mutex);
94 std::unique_ptr<impl::ForLoopStatInfoList> x(new impl::ForLoopStatInfoList());
95 auto* ptr = x.get();
96 m_for_loop_stat_info_list_vector.push_back(std::move(x));
97 return ptr;
98 }
99 impl::AcceleratorStatInfoList* createAcceleratorStatInfoList()
100 {
101 std::lock_guard<std::mutex> lk(m_mutex);
102 std::unique_ptr<impl::AcceleratorStatInfoList> x(new impl::AcceleratorStatInfoList());
103 auto* ptr = x.get();
104 m_accelerator_stat_info_list_vector.push_back(std::move(x));
105 return ptr;
106 }
107
108 void visitForLoop(const std::function<void(const impl::ForLoopStatInfoList&)>& f)
109 {
110 for (const auto& x : m_for_loop_stat_info_list_vector)
111 f(*x);
112 }
113
114 void visitAccelerator(const std::function<void(const impl::AcceleratorStatInfoList&)>& f)
115 {
116 for (const auto& x : m_accelerator_stat_info_list_vector)
117 f(*x);
118 }
119
120 public:
121
122 std::mutex m_mutex;
123 std::vector<std::unique_ptr<impl::ForLoopStatInfoList>> m_for_loop_stat_info_list_vector;
124 std::vector<std::unique_ptr<impl::AcceleratorStatInfoList>> m_accelerator_stat_info_list_vector;
125};
126
127/*---------------------------------------------------------------------------*/
128/*---------------------------------------------------------------------------*/
129
130AllStatInfoList global_all_stat_info_list;
131
132/*---------------------------------------------------------------------------*/
133/*---------------------------------------------------------------------------*/
134
135// Permet de gérer une instance de ForLoopStatInfoList par thread pour éviter les verroux
137{
138 public:
139
140 impl::ForLoopStatInfoList* forLoopStatInfoList()
141 {
142 return _createOrGetForLoopStatInfoList();
143 }
144 impl::AcceleratorStatInfoList* acceleratorStatInfoList()
145 {
146 return _createOrGetAcceleratorStatInfoList();
147 }
148 void merge(const ForLoopOneExecStat& stat_info, const ForLoopTraceInfo& trace_info)
149 {
150 impl::ForLoopStatInfoList* stat_list = _createOrGetForLoopStatInfoList();
151 stat_list->merge(stat_info, trace_info);
152 }
153
154 private:
155
156 impl::ForLoopStatInfoList* _createOrGetForLoopStatInfoList()
157 {
158 if (!m_for_loop_stat_info_list)
159 m_for_loop_stat_info_list = global_all_stat_info_list.createForLoopStatInfoList();
160 return m_for_loop_stat_info_list;
161 }
162 impl::AcceleratorStatInfoList* _createOrGetAcceleratorStatInfoList()
163 {
164 if (!m_accelerator_stat_info_list)
165 m_accelerator_stat_info_list = global_all_stat_info_list.createAcceleratorStatInfoList();
166 return m_accelerator_stat_info_list;
167 }
168
169 private:
170
171 impl::ForLoopStatInfoList* m_for_loop_stat_info_list = nullptr;
172 impl::AcceleratorStatInfoList* m_accelerator_stat_info_list = nullptr;
173};
174thread_local ThreadLocalStatInfo thread_local_stat_info;
175
176/*---------------------------------------------------------------------------*/
177/*---------------------------------------------------------------------------*/
178
179Int32 ProfilingRegistry::m_profiling_level = 0;
180
181/*---------------------------------------------------------------------------*/
182/*---------------------------------------------------------------------------*/
183
184impl::ForLoopStatInfoList* ProfilingRegistry::
185threadLocalInstance()
186{
187 return thread_local_stat_info.forLoopStatInfoList();
188}
189
190/*---------------------------------------------------------------------------*/
191/*---------------------------------------------------------------------------*/
192
193impl::ForLoopStatInfoList* ProfilingRegistry::
194_threadLocalForLoopInstance()
195{
196 return thread_local_stat_info.forLoopStatInfoList();
197}
198
199/*---------------------------------------------------------------------------*/
200/*---------------------------------------------------------------------------*/
201
202impl::AcceleratorStatInfoList* ProfilingRegistry::
203_threadLocalAcceleratorInstance()
204{
205 return thread_local_stat_info.acceleratorStatInfoList();
206}
207
208/*---------------------------------------------------------------------------*/
209/*---------------------------------------------------------------------------*/
210
211void ProfilingRegistry::
212visitLoopStat(const std::function<void(const impl::ForLoopStatInfoList&)>& f)
213{
214 global_all_stat_info_list.visitForLoop(f);
215}
216
217/*---------------------------------------------------------------------------*/
218/*---------------------------------------------------------------------------*/
219
220void ProfilingRegistry::
221visitAcceleratorStat(const std::function<void(const impl::AcceleratorStatInfoList&)>& f)
222{
223 global_all_stat_info_list.visitAccelerator(f);
224}
225
226/*---------------------------------------------------------------------------*/
227/*---------------------------------------------------------------------------*/
228
229void ProfilingRegistry::
230setProfilingLevel(Int32 level)
231{
232 m_profiling_level = level;
233}
234
235/*---------------------------------------------------------------------------*/
236/*---------------------------------------------------------------------------*/
237
238const impl::ForLoopCumulativeStat& ProfilingRegistry::
239globalLoopStat()
240{
241 return global_stat;
242}
243
244/*---------------------------------------------------------------------------*/
245/*---------------------------------------------------------------------------*/
246
247/*---------------------------------------------------------------------------*/
248/*---------------------------------------------------------------------------*/
249
250void impl::ForLoopProfilingStat::
251add(const ForLoopOneExecStat& s)
252{
253 ++m_nb_call;
254 m_nb_chunk += s.nbChunk();
255 m_exec_time += s.execTime();
256}
257
258/*---------------------------------------------------------------------------*/
259/*---------------------------------------------------------------------------*/
260
261/*---------------------------------------------------------------------------*/
262/*---------------------------------------------------------------------------*/
263
264void impl::ForLoopStatInfoList::
265merge(const ForLoopOneExecStat& loop_stat_info, const ForLoopTraceInfo& loop_trace_info)
266{
267 global_stat.merge(loop_stat_info);
268 String loop_name = "Unknown";
269 if (loop_trace_info.isValid()) {
270 loop_name = loop_trace_info.loopName();
271 if (loop_name.empty())
272 loop_name = loop_trace_info.traceInfo().name();
273 }
274 m_p->m_stat_map[loop_name].add(loop_stat_info);
275}
276
277/*---------------------------------------------------------------------------*/
278/*---------------------------------------------------------------------------*/
279
280void impl::AcceleratorStatInfoList::
281print(std::ostream& o) const
282{
283 const auto& htod = memoryTransfer(eMemoryTransferType::HostToDevice);
284 const auto& dtoh = memoryTransfer(eMemoryTransferType::DeviceToHost);
285 o << "MemoryTransferSTATS: HTOD = " << htod.m_nb_byte << " (" << htod.m_nb_call << ")"
286 << " DTOH = " << dtoh.m_nb_byte << " (" << dtoh.m_nb_call << ")";
287 const auto& cpu_fault = memoryPageFault(eMemoryPageFaultType::Cpu);
288 const auto& gpu_fault = memoryPageFault(eMemoryPageFaultType::Gpu);
289 o << " PageFaultCPU = " << cpu_fault.m_nb_fault << " (" << cpu_fault.m_nb_call << ")"
290 << " PageFaultGPU = " << gpu_fault.m_nb_fault << " (" << gpu_fault.m_nb_call << ")";
291}
292
293/*---------------------------------------------------------------------------*/
294/*---------------------------------------------------------------------------*/
295
296} // End namespace Arcane
297
298/*---------------------------------------------------------------------------*/
299/*---------------------------------------------------------------------------*/
Informations de trace pour une boucle 'for'.
Statistiques d'exécution des boucles.
Definition Profiling.h:56
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-