14#include "arcane/utils/Profiling.h"
16#include "arcane/utils/ForLoopTraceInfo.h"
17#include "arcane/utils/PlatformUtils.h"
19#include "arcane/utils/internal/ProfilingInternal.h"
36impl::ForLoopStatInfoList::
45impl::ForLoopStatInfoList::
56 impl::ForLoopCumulativeStat global_stat;
63ScopedStatLoop(ForLoopOneExecStat* s)
67 m_begin_time = platform::getRealTimeNS();
78 Int64 end_time = platform::getRealTimeNS();
79 m_stat_info->setBeginTime(m_begin_time);
80 m_stat_info->setEndTime(end_time);
93 std::lock_guard<std::mutex> lk(m_mutex);
96 m_for_loop_stat_info_list_vector.push_back(std::move(x));
101 std::lock_guard<std::mutex> lk(m_mutex);
104 m_accelerator_stat_info_list_vector.push_back(std::move(x));
110 for (
const auto& x : m_for_loop_stat_info_list_vector)
116 for (
const auto& x : m_accelerator_stat_info_list_vector)
123 std::vector<std::unique_ptr<impl::ForLoopStatInfoList>> m_for_loop_stat_info_list_vector;
124 std::vector<std::unique_ptr<impl::AcceleratorStatInfoList>> m_accelerator_stat_info_list_vector;
142 return _createOrGetForLoopStatInfoList();
146 return _createOrGetAcceleratorStatInfoList();
151 stat_list->merge(stat_info, trace_info);
158 if (!m_for_loop_stat_info_list)
159 m_for_loop_stat_info_list = global_all_stat_info_list.createForLoopStatInfoList();
160 return m_for_loop_stat_info_list;
164 if (!m_accelerator_stat_info_list)
165 m_accelerator_stat_info_list = global_all_stat_info_list.createAcceleratorStatInfoList();
166 return m_accelerator_stat_info_list;
179Int32 ProfilingRegistry::m_profiling_level = 0;
187 return thread_local_stat_info.forLoopStatInfoList();
194_threadLocalForLoopInstance()
196 return thread_local_stat_info.forLoopStatInfoList();
203_threadLocalAcceleratorInstance()
205 return thread_local_stat_info.acceleratorStatInfoList();
211void ProfilingRegistry::
214 global_all_stat_info_list.visitForLoop(f);
220void ProfilingRegistry::
223 global_all_stat_info_list.visitAccelerator(f);
229void ProfilingRegistry::
230setProfilingLevel(
Int32 level)
232 m_profiling_level = level;
250void impl::ForLoopProfilingStat::
264void impl::ForLoopStatInfoList::
267 global_stat.merge(loop_stat_info);
268 String loop_name =
"Unknown";
269 if (loop_trace_info.isValid()) {
270 loop_name = loop_trace_info.loopName();
271 if (loop_name.
empty())
272 loop_name = loop_trace_info.traceInfo().name();
274 m_p->m_stat_map[loop_name].add(loop_stat_info);
280void impl::AcceleratorStatInfoList::
281print(std::ostream& o)
const
283 const auto& htod = memoryTransfer(eMemoryTransferType::HostToDevice);
284 const auto& dtoh = memoryTransfer(eMemoryTransferType::DeviceToHost);
285 o <<
"MemoryTransferSTATS: HTOD = " << htod.m_nb_byte <<
" (" << htod.m_nb_call <<
")"
286 <<
" DTOH = " << dtoh.m_nb_byte <<
" (" << dtoh.m_nb_call <<
")";
287 const auto& cpu_fault = memoryPageFault(eMemoryPageFaultType::Cpu);
288 const auto& gpu_fault = memoryPageFault(eMemoryPageFaultType::Gpu);
289 o <<
" PageFaultCPU = " << cpu_fault.m_nb_fault <<
" (" << cpu_fault.m_nb_call <<
")"
290 <<
" PageFaultGPU = " << gpu_fault.m_nb_fault <<
" (" << gpu_fault.m_nb_call <<
")";
Classe pour gérer le profiling d'une seule exécution d'une boucle.
Int64 nbChunk() const
Nombre de chunks.
Int64 execTime() const
Temps d'exécution (en nanoseconde).
Informations de trace pour une boucle 'for'.
Chaîne de caractères unicode.
bool empty() const
Vrai si la chaîne est vide (nulle ou "")
Statistiques pour les accélérateurs.
Statistiques cumulées sur le nombre de boucles exécutées.
Statistiques d'exécution des boucles.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
std::int32_t Int32
Type entier signé sur 32 bits.