14#include "arcane/impl/ExecutionStatsDumper.h"
16#include "arcane/utils/PlatformUtils.h"
17#include "arcane/utils/OStringStream.h"
18#include "arcane/utils/IMemoryInfo.h"
19#include "arcane/utils/Profiling.h"
20#include "arcane/utils/ITraceMng.h"
21#include "arcane/utils/JSONWriter.h"
22#include "arcane/utils/FloatingPointExceptionSentry.h"
24#include "arcane/utils/internal/ProfilingInternal.h"
26#include "arcane/core/ISubDomain.h"
27#include "arcane/core/IVariableMng.h"
28#include "arcane/core/IVariableSynchronizerMng.h"
29#include "arcane/core/IPropertyMng.h"
30#include "arcane/core/ITimeLoopMng.h"
31#include "arcane/core/IMesh.h"
32#include "arcane/core/ITimeStats.h"
33#include "arcane/core/Directory.h"
34#include "arcane/core/IParallelMng.h"
35#include "arcane/core/ServiceBuilder.h"
36#include "arcane/core/ISimpleTableOutput.h"
51void ExecutionStatsDumper::
52_dumpProfilingJSON(JSONWriter& json_writer)
54 json_writer.write(
"Version",
"1");
55 auto f = [&](
const impl::ForLoopStatInfoList& stat_list) {
56 JSONWriter::Object jo(json_writer);
57 json_writer.writeKey(
"Loops");
58 json_writer.beginArray();
59 for (
const auto& x : stat_list._internalImpl()->m_stat_map) {
60 JSONWriter::Object jo2(json_writer);
61 const auto& s = x.second;
62 json_writer.write(
"Name", x.first);
63 json_writer.write(
"TotalTime", s.execTime());
64 json_writer.write(
"NbLoop", s.nbCall());
65 json_writer.write(
"NbChunk", s.nbChunk());
67 json_writer.endArray();
70 json_writer.writeKey(
"AllLoops");
71 json_writer.beginArray();
73 json_writer.endArray();
79void ExecutionStatsDumper::
80_dumpProfilingJSON(
const String& filename)
82 JSONWriter json_writer;
84 JSONWriter::Object jo(json_writer);
85 _dumpProfilingJSON(json_writer);
88 ofstream ofile(filename.localstr());
89 ofile << json_writer.getBuffer();
96void ExecutionStatsDumper::
97_dumpProfilingTable(ISimpleTableOutput* table)
99 table->init(
"Profiling");
101 table->addColumn(
"TotalTime");
102 table->addColumn(
"NbLoop");
103 table->addColumn(
"NbChunk");
106 auto f = [&](
const impl::ForLoopStatInfoList& stat_list) {
107 for (
const auto& x : stat_list._internalImpl()->m_stat_map) {
108 const auto& s = x.second;
109 String row_name = x.first;
111 row_name = String::format(
"{0}_{1}", x.first, list_index);
112 Integer row = table->addRow(row_name);
113 table->addElementInRow(row,
static_cast<Real>(s.execTime()));
114 table->addElementInRow(row,
static_cast<Real>(s.nbCall()));
115 table->addElementInRow(row,
static_cast<Real>(s.nbChunk()));
126void ExecutionStatsDumper::
127_dumpProfiling(std::ostream& o)
130 _printGlobalLoopInfos(o, ProfilingRegistry::globalLoopStat());
132 auto f = [&](
const impl::ForLoopStatInfoList& stat_list) {
133 _dumpOneLoopListStat(o, stat_list);
140 auto f = [&](
const impl::AcceleratorStatInfoList& stat_list) {
150void ExecutionStatsDumper::
151_dumpOneLoopListStat(std::ostream& o,
const impl::ForLoopStatInfoList& stat_list)
153 struct SortedStatInfo
155 bool operator<(
const SortedStatInfo& rhs)
const
157 return m_stat.execTime() > rhs.m_stat.execTime();
160 impl::ForLoopProfilingStat m_stat;
164 Int64 cumulative_total = 1;
167 std::set<SortedStatInfo> sorted_set;
168 for (
const auto& x : stat_list._internalImpl()->m_stat_map) {
169 const auto& s = x.second;
170 sorted_set.insert({ x.first, s });
171 cumulative_total += s.execTime();
174 o <<
"ProfilingStat\n";
175 o << std::setw(10) <<
"Ncall" << std::setw(10) <<
"Nchunk"
176 << std::setw(11) <<
" T (ms)" << std::setw(10) <<
"Tck (ns)"
179 char old_filler = o.fill();
180 for (
const auto& x : sorted_set) {
181 const impl::ForLoopProfilingStat& s = x.m_stat;
182 Int64 nb_loop = s.nbCall();
183 Int64 nb_chunk = s.nbChunk();
184 Int64 total_time_ns = s.execTime();
185 Int64 total_time_us = total_time_ns / 1000;
186 Int64 total_time_ms = total_time_us / 1000;
187 Int64 total_time_remaining_us = total_time_us % 1000;
188 Int64 time_per_chunk = (nb_chunk == 0) ? 0 : (total_time_ns / nb_chunk);
189 Int64 per_mil = (total_time_ns * 1000) / cumulative_total;
190 Int64 percent = per_mil / 10;
191 Int64 percent_digit = per_mil % 10;
193 o << std::setw(10) << nb_loop << std::setw(10) << nb_chunk
194 << std::setw(7) << total_time_ms <<
".";
195 o << std::setfill(
'0') << std::setw(3) << total_time_remaining_us << std::setfill(old_filler);
196 o << std::setw(10) << time_per_chunk
197 << std::setw(4) << percent <<
"." << percent_digit <<
" " << x.m_name <<
"\n";
199 o <<
"TOTAL=" << cumulative_total / 1000000 <<
"\n";
205void ExecutionStatsDumper::
206_printGlobalLoopInfos(std::ostream& o,
const impl::ForLoopCumulativeStat& cumulative_stat)
208 Int64 nb_loop_parallel_for = cumulative_stat.nbLoopParallelFor();
209 if (nb_loop_parallel_for == 0)
211 Int64 nb_chunk_parallel_for = cumulative_stat.nbChunkParallelFor();
212 Int64 total_time = cumulative_stat.totalTime();
213 double x =
static_cast<double>(total_time);
215 if (nb_loop_parallel_for > 0)
216 x1 = x /
static_cast<double>(nb_loop_parallel_for);
218 if (nb_chunk_parallel_for > 0)
219 x2 = x /
static_cast<double>(nb_chunk_parallel_for);
220 o <<
"LoopStat: global_time (ms) = " << x / 1.0e6 <<
"\n";
221 o <<
"LoopStat: global_nb_loop = " << std::setw(10) << nb_loop_parallel_for <<
" time=" << x1 <<
"\n";
222 o <<
"LoopStat: global_nb_chunk = " << std::setw(10) << nb_chunk_parallel_for <<
" time=" << x2 <<
"\n";
228void ExecutionStatsDumper::
229dumpStats(ISubDomain* sd, ITimeStats* time_stat)
231 FloatingPointExceptionSentry fp_sentry(
false);
234 double mem = platform::getMemoryUsed();
235 info() <<
"Memory consumption (Mo): " << mem / 1.e6;
241 postr() <<
"Properties:\n";
242 sd->propertyMng()->print(postr());
243 plog() << postr.str();
245 IVariableMng* vm = sd->variableMng();
247 IVariableSynchronizerMng* vsm = vm->synchronizerMng();
248 OStringStream ostr_vsm;
249 vsm->dumpStats(ostr_vsm());
250 info() << ostr_vsm.str();
252 OStringStream ostr_full;
253 vm->dumpStats(ostr_full(),
true);
255 vm->dumpStats(ostr(),
false);
256 plog() << ostr_full.str();
257 info() << ostr.str();
261 IMemoryInfo* mem_info = arcaneGlobalMemoryInfo();
263 mem_info->printInfos(ostr());
264 info() << ostr.str();
270 nb_loop = sd->timeLoopMng()->nbLoop();
271 if (sd->defaultMesh())
272 nb_cell = sd->defaultMesh()->nbCell();
275 info() <<
"NB_CELL=" << nb_cell <<
" nb_loop=" << nb_loop;
280 _dumpProfiling(ostr());
281 String str = ostr.str();
283 info() <<
"LoopStatistics:\n"
286 Directory dir = sd->listingDirectory();
287 Int32 rank = sd->parallelMng()->commRank();
289 String filename = String::format(
"loop_profiling.{0}.json", rank);
290 String full_filename(dir.file(filename));
291 _dumpProfilingJSON(full_filename);
294 ServiceBuilder<ISimpleTableOutput> sb(sd);
295 Ref<ISimpleTableOutput> table(sb.createReference(
"SimpleCsvOutput"));
296 _dumpProfilingTable(table.get());
301 _dumpProfilingJSON(
"loop_profiling.json");
305 bool use_elapsed_time =
true;
306 if (!platform::getEnvironmentVariable(
"ARCANE_USE_REAL_TIMER").null())
307 use_elapsed_time =
true;
308 if (!platform::getEnvironmentVariable(
"ARCANE_USE_VIRTUAL_TIMER").null())
309 use_elapsed_time =
false;
310 OStringStream ostr_full;
311 time_stat->dumpStats(ostr_full(),
true, n,
"cell", use_elapsed_time);
313 time_stat->dumpStats(ostr(),
false, n,
"cell", use_elapsed_time);
314 info() << ostr.str();
315 plog() << ostr_full.str();
Lecteur des fichiers de maillage via la bibliothèque LIMA.
static void visitAcceleratorStat(const std::function< void(const impl::AcceleratorStatInfoList &)> &f)
Visite la liste des statistiques sur accélérateur.
static void visitLoopStat(const std::function< void(const impl::ForLoopStatInfoList &)> &f)
Visite la liste des statistiques des boucles.
virtual void flush()=0
Flush tous les flots.
ITraceMng * traceMng() const
Gestionnaire de trace.
TraceMessage plog() const
Flot pour un message de log.
TraceMessage info() const
Flot pour un message d'information.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
bool operator<(const Item &item1, const Item &item2)
Compare deux entités.
Int32 Integer
Type représentant un entier.