14#include "arcane/utils/Profiling.h"
16#include "arcane/utils/ForLoopTraceInfo.h"
17#include "arcane/utils/PlatformUtils.h"
19#include "arcane/utils/internal/ProfilingInternal.h"
36impl::ForLoopStatInfoList::
38: m_p(new ForLoopStatInfoListImpl())
45impl::ForLoopStatInfoList::
56 impl::ForLoopCumulativeStat global_stat;
63ScopedStatLoop(ForLoopOneExecStat* s)
67 m_begin_time = platform::getRealTimeNS();
78 Int64 end_time = platform::getRealTimeNS();
79 m_stat_info->setBeginTime(m_begin_time);
80 m_stat_info->setEndTime(end_time);
93 std::lock_guard<std::mutex> lk(m_mutex);
96 m_for_loop_stat_info_list_vector.push_back(std::move(x));
99 impl::AcceleratorStatInfoList* createAcceleratorStatInfoList()
101 std::lock_guard<std::mutex> lk(m_mutex);
102 std::unique_ptr<impl::AcceleratorStatInfoList> x(
new impl::AcceleratorStatInfoList());
104 m_accelerator_stat_info_list_vector.push_back(std::move(x));
110 for (
const auto& x : m_for_loop_stat_info_list_vector)
114 void visitAccelerator(
const std::function<
void(
const impl::AcceleratorStatInfoList&)>& f)
116 for (
const auto& x : m_accelerator_stat_info_list_vector)
123 std::vector<std::unique_ptr<impl::ForLoopStatInfoList>> m_for_loop_stat_info_list_vector;
124 std::vector<std::unique_ptr<impl::AcceleratorStatInfoList>> m_accelerator_stat_info_list_vector;
142 return _createOrGetForLoopStatInfoList();
144 impl::AcceleratorStatInfoList* acceleratorStatInfoList()
146 return _createOrGetAcceleratorStatInfoList();
148 void merge(
const ForLoopOneExecStat& stat_info,
const ForLoopTraceInfo& trace_info)
151 stat_list->merge(stat_info, trace_info);
158 if (!m_for_loop_stat_info_list)
159 m_for_loop_stat_info_list = global_all_stat_info_list.createForLoopStatInfoList();
160 return m_for_loop_stat_info_list;
162 impl::AcceleratorStatInfoList* _createOrGetAcceleratorStatInfoList()
164 if (!m_accelerator_stat_info_list)
165 m_accelerator_stat_info_list = global_all_stat_info_list.createAcceleratorStatInfoList();
166 return m_accelerator_stat_info_list;
172 impl::AcceleratorStatInfoList* m_accelerator_stat_info_list =
nullptr;
179Int32 ProfilingRegistry::m_profiling_level = 0;
187 return thread_local_stat_info.forLoopStatInfoList();
193impl::ForLoopStatInfoList* ProfilingRegistry::
194_threadLocalForLoopInstance()
196 return thread_local_stat_info.forLoopStatInfoList();
202impl::AcceleratorStatInfoList* ProfilingRegistry::
203_threadLocalAcceleratorInstance()
205 return thread_local_stat_info.acceleratorStatInfoList();
211void ProfilingRegistry::
212visitLoopStat(
const std::function<
void(
const impl::ForLoopStatInfoList&)>& f)
214 global_all_stat_info_list.visitForLoop(f);
220void ProfilingRegistry::
221visitAcceleratorStat(
const std::function<
void(
const impl::AcceleratorStatInfoList&)>& f)
223 global_all_stat_info_list.visitAccelerator(f);
229void ProfilingRegistry::
230setProfilingLevel(Int32 level)
232 m_profiling_level = level;
238const impl::ForLoopCumulativeStat& ProfilingRegistry::
250void impl::ForLoopProfilingStat::
251add(
const ForLoopOneExecStat& s)
254 m_nb_chunk += s.nbChunk();
255 m_exec_time += s.execTime();
264void impl::ForLoopStatInfoList::
265merge(
const ForLoopOneExecStat& loop_stat_info,
const ForLoopTraceInfo& loop_trace_info)
267 global_stat.merge(loop_stat_info);
268 String loop_name =
"Unknown";
269 if (loop_trace_info.isValid()) {
270 loop_name = loop_trace_info.loopName();
271 if (loop_name.empty())
272 loop_name = loop_trace_info.traceInfo().name();
274 m_p->m_stat_map[loop_name].add(loop_stat_info);
280void impl::AcceleratorStatInfoList::
281print(std::ostream& o)
const
283 const auto& htod = memoryTransfer(eMemoryTransferType::HostToDevice);
284 const auto& dtoh = memoryTransfer(eMemoryTransferType::DeviceToHost);
285 o <<
"MemoryTransferSTATS: HTOD = " << htod.m_nb_byte <<
" (" << htod.m_nb_call <<
")"
286 <<
" DTOH = " << dtoh.m_nb_byte <<
" (" << dtoh.m_nb_call <<
")";
287 const auto& cpu_fault = memoryPageFault(eMemoryPageFaultType::Cpu);
288 const auto& gpu_fault = memoryPageFault(eMemoryPageFaultType::Gpu);
289 o <<
" PageFaultCPU = " << cpu_fault.m_nb_fault <<
" (" << cpu_fault.m_nb_call <<
")"
290 <<
" PageFaultGPU = " << gpu_fault.m_nb_fault <<
" (" << gpu_fault.m_nb_call <<
")";
Informations de trace pour une boucle 'for'.
Statistiques d'exécution des boucles.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-