Arcane  v3.15.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
TBBTaskImplementation.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* TBBTaskImplementation.cc (C) 2000-2025 */
9/* */
10/* Implémentation des tâches utilisant TBB (Intel Threads Building Blocks). */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/utils/IThreadImplementation.h"
15#include "arcane/utils/NotImplementedException.h"
16#include "arcane/utils/IFunctor.h"
17#include "arcane/utils/CheckedConvert.h"
18#include "arcane/utils/ForLoopRanges.h"
20#include "arcane/utils/IObservable.h"
21#include "arcane/utils/PlatformUtils.h"
22#include "arcane/utils/Profiling.h"
23#include "arcane/utils/MemoryAllocator.h"
24#include "arcane/utils/FixedArray.h"
25#include "arcane/utils/internal/TaskFactoryInternal.h"
26#include "arcane/utils/internal/DependencyInjection.h"
27
28#include "arcane/core/FactoryService.h"
29
30#include <new>
31#include <stack>
32
33// Il faut définir cette macro pour que la classe 'blocked_rangeNd' soit disponible
34
35#define TBB_PREVIEW_BLOCKED_RANGE_ND 1
36
37// la macro 'ARCANE_USE_ONETBB' est définie dans le CMakeLists.txt
38// si on compile avec la version OneTBB version 2021+
39// (https://github.com/oneapi-src/oneTBB.git)
40// A terme ce sera la seule version supportée par Arcane.
41
42#ifdef ARCANE_USE_ONETBB
43
44// Nécessaire pour avoir accès à task_scheduler_handle
45#define TBB_PREVIEW_WAITING_FOR_WORKERS 1
46#include <tbb/tbb.h>
47#include <oneapi/tbb/concurrent_set.h>
48#include <oneapi/tbb/global_control.h>
49
50#else // ARCANE_USE_ONETBB
51
52// NOTE GG: depuis mars 2019, la version 2018+ des TBB est obligatoire.
53#include <tbb/tbb.h>
54#include <tbb/blocked_rangeNd.h>
55/*
56 * Maintenant vérifie que la version est au moins 2018.
57 */
58#if (TBB_VERSION_MAJOR < 4) || (TBB_VERSION_MAJOR==4 && TBB_VERSION_MINOR<2)
59#define ARCANE_OLD_TBB
60#endif
61
62#if (TBB_VERSION_MAJOR<2018)
63#define ARCANE_OLD_TBB
64#endif
65
66#ifdef ARCANE_OLD_TBB
67#error "Your version of TBB is tool old. TBB 2018+ is required. Please disable TBB in configuration"
68#endif
69
70#include <thread>
71#include <mutex>
72
73#endif // ARCANE_USE_ONETBB
74
75/*---------------------------------------------------------------------------*/
76/*---------------------------------------------------------------------------*/
77
78namespace Arcane
79{
80
81class TBBTaskImplementation;
82
83// TODO: utiliser un pool mémoire spécifique pour gérer les
84// OneTBBTask pour optimiser les new/delete des instances de cette classe.
85// Auparavant avec les anciennes versions de TBB cela était géré avec
86// la méthode 'tbb::task::allocate_child()'.
87
88/*---------------------------------------------------------------------------*/
89/*---------------------------------------------------------------------------*/
90
91namespace
92{
93
94// Positif si on récupère les statistiques d'exécution
95bool isStatActive()
96{
98}
99
104class ScopedExecInfo
105{
106 public:
107
108 explicit ScopedExecInfo(const ForLoopRunInfo& run_info)
109 : m_run_info(run_info)
110 {
111 // Si run_info.execInfo() n'est pas nul, on l'utilise.
112 // Cela signifie que c'est l'appelant de qui va gérer les statistiques
113 // d'exécution. Sinon, on utilise \a m_stat_info si les statistiques
114 // d'exécution sont demandées.
115 ForLoopOneExecStat* ptr = run_info.execStat();
116 if (ptr){
117 m_stat_info_ptr = ptr;
118 m_use_own_run_info = false;
119 }
120 else
121 m_stat_info_ptr = isStatActive() ? &m_stat_info : nullptr;
122 }
123 ~ScopedExecInfo()
124 {
125#ifdef PRINT_STAT_INFO
126 if (m_stat_info_ptr){
127 bool is_valid = m_run_info.traceInfo().isValid();
128 if (!is_valid)
129 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
130 << " stack=" << platform::getStackTrace()
131 << "\n";
132 else
133 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
134 << " trace_name=" << m_run_info.traceInfo().traceInfo().name() << "\n";
135 }
136#endif
137 if (m_stat_info_ptr && m_use_own_run_info){
138 ProfilingRegistry::_threadLocalForLoopInstance()->merge(*m_stat_info_ptr,m_run_info.traceInfo());
139 }
140 }
141
142 public:
143
144 ForLoopOneExecStat* statInfo() const { return m_stat_info_ptr; }
145 bool isOwn() const { return m_use_own_run_info; }
146 private:
147
148 ForLoopOneExecStat m_stat_info;
149 ForLoopOneExecStat* m_stat_info_ptr = nullptr;
150 ForLoopRunInfo m_run_info;
152 bool m_use_own_run_info = true;
153};
154
155/*---------------------------------------------------------------------------*/
156/*---------------------------------------------------------------------------*/
157
158inline int _currentTaskTreadIndex()
159{
160 // NOTE: Avec OneTBB 2021, la valeur n'est plus '0' si on appelle cette méthode
161 // depuis un thread en dehors d'un task_arena. Avec la version 2021,
162 // la valeur est 65535.
163 // NOTE: Il semble que cela soit un bug de la 2021.3.
164 return tbb::this_task_arena::current_thread_index();
165}
166
167inline tbb::blocked_rangeNd<Int32,1>
168_toTBBRange(const ComplexForLoopRanges<1>& r)
169{
170 return {{r.lowerBound<0>(), r.upperBound<0>()}};
171}
172
173inline tbb::blocked_rangeNd<Int32,2>
174_toTBBRange(const ComplexForLoopRanges<2>& r)
175{
176 return {{r.lowerBound<0>(), r.upperBound<0>()},
177 {r.lowerBound<1>(), r.upperBound<1>()}};
178
179}
180
181inline tbb::blocked_rangeNd<Int32,3>
182_toTBBRange(const ComplexForLoopRanges<3>& r)
183{
184 return {{r.lowerBound<0>(), r.upperBound<0>()},
185 {r.lowerBound<1>(), r.upperBound<1>()},
186 {r.lowerBound<2>(), r.upperBound<2>()}};
187}
188
189inline tbb::blocked_rangeNd<Int32,4>
190_toTBBRange(const ComplexForLoopRanges<4>& r)
191{
192 return {{r.lowerBound<0>(), r.upperBound<0>()},
193 {r.lowerBound<1>(), r.upperBound<1>()},
194 {r.lowerBound<2>(), r.upperBound<2>()},
195 {r.lowerBound<3>(), r.upperBound<3>()}};
196}
197
198/*---------------------------------------------------------------------------*/
199/*---------------------------------------------------------------------------*/
200
201inline tbb::blocked_rangeNd<Int32,2>
202_toTBBRangeWithGrain(const tbb::blocked_rangeNd<Int32,2>& r,FixedArray<size_t,2> grain_sizes)
203{
204 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
205 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]}};
206}
207
208inline tbb::blocked_rangeNd<Int32,3>
209_toTBBRangeWithGrain(const tbb::blocked_rangeNd<Int32,3>& r,FixedArray<size_t,3> grain_sizes)
210{
211 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
212 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]},
213 {r.dim(2).begin(), r.dim(2).end(), grain_sizes[2]}};
214}
215
216inline tbb::blocked_rangeNd<Int32,4>
217_toTBBRangeWithGrain(const tbb::blocked_rangeNd<Int32,4>& r,FixedArray<size_t,4> grain_sizes)
218{
219 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
220 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]},
221 {r.dim(2).begin(), r.dim(2).end(), grain_sizes[2]},
222 {r.dim(3).begin(), r.dim(3).end(), grain_sizes[3]}};
223}
224
225/*---------------------------------------------------------------------------*/
226/*---------------------------------------------------------------------------*/
227
228inline ComplexForLoopRanges<2>
229_fromTBBRange(const tbb::blocked_rangeNd<Int32,2>& r)
230{
231 using BoundsType = ArrayBounds<MDDim2>;
232 using ArrayExtentType = typename BoundsType::ArrayExtentType;
233
234 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin()));
235 auto s0 = static_cast<Int32>(r.dim(0).size());
236 auto s1 = static_cast<Int32>(r.dim(1).size());
237 BoundsType sizes(ArrayExtentType(s0,s1));
238 return { lower_bounds, sizes };
239}
240
241inline ComplexForLoopRanges<3>
242_fromTBBRange(const tbb::blocked_rangeNd<Int32,3> & r)
243{
244 using BoundsType = ArrayBounds<MDDim3>;
245 using ArrayExtentType = typename BoundsType::ArrayExtentType;
246
247 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin(),r.dim(2).begin()));
248 auto s0 = static_cast<Int32>(r.dim(0).size());
249 auto s1 = static_cast<Int32>(r.dim(1).size());
250 auto s2 = static_cast<Int32>(r.dim(2).size());
251 BoundsType sizes(ArrayExtentType(s0,s1,s2));
252 return { lower_bounds, sizes };
253}
254
255inline ComplexForLoopRanges<4>
256_fromTBBRange(const tbb::blocked_rangeNd<Int32,4>& r)
257{
258 using BoundsType = ArrayBounds<MDDim4>;
259 using ArrayExtentType = typename BoundsType::ArrayExtentType;
260
261 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin(),r.dim(2).begin(),r.dim(3).begin()));
262 auto s0 = static_cast<Int32>(r.dim(0).size());
263 auto s1 = static_cast<Int32>(r.dim(1).size());
264 auto s2 = static_cast<Int32>(r.dim(2).size());
265 auto s3 = static_cast<Int32>(r.dim(3).size());
266 BoundsType sizes(ArrayExtentType(s0,s1,s2,s3));
267 return { lower_bounds, sizes };
268}
269
270}
271
272/*---------------------------------------------------------------------------*/
273/*---------------------------------------------------------------------------*/
274
275#ifdef ARCANE_USE_ONETBB
276
277/*---------------------------------------------------------------------------*/
278/*---------------------------------------------------------------------------*/
279
280class OneTBBTaskFunctor
281{
282 public:
283 OneTBBTaskFunctor(ITaskFunctor* functor,ITask* task)
284 : m_functor(functor), m_task(task) {}
285 public:
286 void operator()() const
287 {
288 if (m_functor){
289 ITaskFunctor* tf = m_functor;
290 m_functor = 0;
291 TaskContext task_context(m_task);
292 //cerr << "FUNC=" << typeid(*tf).name();
293 tf->executeFunctor(task_context);
294 }
295 }
296 public:
297 mutable ITaskFunctor* m_functor;
298 ITask* m_task;
299};
300
301/*---------------------------------------------------------------------------*/
302/*---------------------------------------------------------------------------*/
303
304class OneTBBTask
305: public ITask
306{
307 public:
308 static const int FUNCTOR_CLASS_SIZE = 32;
309 public:
310 OneTBBTask(ITaskFunctor* f)
311 : m_functor(f)
312 {
313 m_functor = f->clone(functor_buf,FUNCTOR_CLASS_SIZE);
314 }
315 public:
316 OneTBBTaskFunctor taskFunctor() { return OneTBBTaskFunctor(m_functor,this); }
317 void launchAndWait() override;
318 void launchAndWait(ConstArrayView<ITask*> tasks) override;
319 protected:
320 virtual ITask* _createChildTask(ITaskFunctor* functor) override;
321 public:
322 ITaskFunctor* m_functor;
323 char functor_buf[FUNCTOR_CLASS_SIZE];
324};
325using TBBTask = OneTBBTask;
326
327/*---------------------------------------------------------------------------*/
328/*---------------------------------------------------------------------------*/
329
330#else // ARCANE_USE_ONETBB
331
332/*---------------------------------------------------------------------------*/
333/*---------------------------------------------------------------------------*/
334
336: public tbb::task
337, public ITask
338{
339 public:
340 static const int FUNCTOR_CLASS_SIZE = 32;
341 public:
343 : m_functor(f)
344 {
345 m_functor = f->clone(functor_buf,FUNCTOR_CLASS_SIZE);
346 }
347 public:
348 tbb::task* execute() override
349 {
350 if (m_functor){
351 ITaskFunctor* tf = m_functor;
352 m_functor = 0;
354 //cerr << "FUNC=" << typeid(*tf).name();
355 tf->executeFunctor(task_context);
356 }
357 return nullptr;
358 }
359 void launchAndWait() override;
361 protected:
362 ITask* _createChildTask(ITaskFunctor* functor) final;
363 public:
364 ITaskFunctor* m_functor;
365 char functor_buf[FUNCTOR_CLASS_SIZE];
366};
367using TBBTask = LegacyTBBTask;
368
369/*---------------------------------------------------------------------------*/
370/*---------------------------------------------------------------------------*/
371
372#endif // ARCANE_USE_ONETBB
373
374/*---------------------------------------------------------------------------*/
375/*---------------------------------------------------------------------------*/
376/*
377 * Ne pas utiliser l'observer locale au task_arena.
378 * Utiliser l'observer global au scheduler.
379 * Pour l'id, utiliser tbb::this_task_arena::current_thread_index().
380 */
382: public ITaskImplementation
383{
384 class Impl;
385 class ParallelForExecute;
386 template<int RankValue>
388
389 public:
390 // Pour des raisons de performance, s'aligne sur une ligne de cache
391 // et utilise un padding.
393 {
394 public:
395 TaskThreadInfo() : m_task_index(-1){}
396 public:
397 void setTaskIndex(Integer v) { m_task_index = v; }
398 Integer taskIndex() const { return m_task_index; }
399 private:
400 Integer m_task_index;
401 };
410 {
411 public:
413 : m_tti(tti), m_old_task_index(-1)
414 {
415 if (tti){
416 m_old_task_index = tti->taskIndex();
417 tti->setTaskIndex(task_index);
418 }
419 }
421 {
422 if (m_tti)
423 m_tti->setTaskIndex(m_old_task_index);
424 }
425 private:
426 TaskThreadInfo* m_tti;
427 Integer m_old_task_index;
428 };
429
430 public:
432 {
433 }
434 TBBTaskImplementation() = default;
435 ~TBBTaskImplementation() override;
436 public:
437 void build() {}
438 void initialize(Int32 nb_thread) override;
439 void terminate() override;
440
442 {
443#ifdef ARCANE_USE_ONETBB
444 OneTBBTask* t = new OneTBBTask(f);
445#else
446 LegacyTBBTask* t = new(tbb::task::allocate_root()) LegacyTBBTask(f);
447#endif
448 return t;
449 }
450
451 void executeParallelFor(Int32 begin,Int32 size,const ParallelLoopOptions& options,IRangeFunctor* f) final;
452 void executeParallelFor(Int32 begin,Int32 size,Integer grain_size,IRangeFunctor* f) final;
453 void executeParallelFor(Int32 begin,Int32 size,IRangeFunctor* f) final
454 {
455 executeParallelFor(begin,size,TaskFactory::defaultParallelLoopOptions(),f);
456 }
457 void executeParallelFor(const ParallelFor1DLoopInfo& loop_info) override;
458
483
484 bool isActive() const final
485 {
486 return m_is_active;
487 }
488
489 Int32 nbAllowedThread() const final;
490
492 {
493 return (nbAllowedThread() <= 1 ) ? 0 : _currentTaskTreadIndex();
494 }
495
496 Int32 currentTaskIndex() const final;
497
498 void printInfos(std::ostream& o) const final;
499
500 public:
501
508 TaskThreadInfo* currentTaskThreadInfo() const;
509
510 private:
511
512 bool m_is_active = false;
513 Impl* m_p = nullptr;
514
515 private:
516
517 template<int RankValue> void
519 IMDRangeFunctor<RankValue>* functor,
520 const ForLoopRunInfo& run_info);
521 void _executeParallelFor(const ParallelFor1DLoopInfo& loop_info);
522};
523
524/*---------------------------------------------------------------------------*/
525/*---------------------------------------------------------------------------*/
526
528{
530 : public tbb::task_scheduler_observer
531 {
532 public:
534 :
535#ifdef ARCANE_USE_ONETBB
536 tbb::task_scheduler_observer(p->m_main_arena),
537#endif
538 m_p(p)
539 {
540 }
541 void on_scheduler_entry(bool is_worker) override
542 {
543 m_p->notifyThreadCreated(is_worker);
544 }
545 void on_scheduler_exit(bool is_worker) override
546 {
547 m_p->notifyThreadDestroyed(is_worker);
548 }
550 };
551
552 public:
553 Impl() :
554 m_task_observer(this),
555 m_thread_task_infos(AlignedMemoryAllocator::CacheLine())
556 {
557#ifdef ARCANE_USE_ONETBB
558 m_nb_allowed_thread = tbb::info::default_concurrency();
559#else
560 m_nb_allowed_thread = tbb::task_scheduler_init::default_num_threads();
561#endif
562 _init();
563 }
564 Impl(Int32 nb_thread)
565 :
567 m_scheduler_init(nb_thread),
568#endif
569 m_main_arena(nb_thread),
570 m_task_observer(this),
571 m_thread_task_infos(AlignedMemoryAllocator::CacheLine())
572 {
573 m_nb_allowed_thread = nb_thread;
574 _init();
575 }
576 public:
577 Int32 nbAllowedThread() const { return m_nb_allowed_thread; }
578 TaskThreadInfo* threadTaskInfo(Integer index) { return &m_thread_task_infos[index]; }
579 private:
580 Int32 m_nb_allowed_thread;
581
582 public:
583 void terminate()
584 {
585 for( auto x : m_sub_arena_list ){
586 if (x)
587 x->terminate();
588 delete x;
589 }
590 m_sub_arena_list.clear();
591 m_main_arena.terminate();
592#ifdef ARCANE_USE_ONETBB
593 m_task_observer.observe(false);
594 oneapi::tbb::finalize(m_task_scheduler_handle);
595#else
596 m_scheduler_init.terminate();
597 m_task_observer.observe(false);
598#endif
599 }
600 public:
601 void notifyThreadCreated(bool is_worker)
602 {
603 std::thread::id my_thread_id = std::this_thread::get_id();
604
605#ifdef ARCANE_USE_ONETBB
606 // Avec OneTBB, cette méthode est appelée à chaque fois qu'on rentre
607 // dans notre 'task_arena'. Comme il ne faut appeler qu'une seule
608 // fois la méthode de notification on utilise un ensemble pour
609 // conserver la liste des threads déjà créés.
610 // NOTE: On ne peut pas utiliser cette méthode avec la version TBB historique
611 // (2018) car cette méthode 'contains' n'existe pas
612 if (m_constructed_thread_map.contains(my_thread_id))
613 return;
614 m_constructed_thread_map.insert(my_thread_id);
615#endif
616
617 {
619 std::ostringstream ostr;
620 ostr << "TBB: CREATE THREAD"
621 << " nb_allowed=" << m_nb_allowed_thread
622#ifdef ARCANE_USE_ONETBB
623 << " tbb_default_allowed=" << tbb::info::default_concurrency()
624#else
625 << " tbb_default_allowed=" << tbb::task_scheduler_init::default_num_threads()
626#endif
627 << " id=" << my_thread_id
628 << " arena_id=" << _currentTaskTreadIndex()
629 << " is_worker=" << is_worker
630 << "\n";
631 std::cout << ostr.str();
632 }
634 }
635 }
636
637 void notifyThreadDestroyed([[maybe_unused]] bool is_worker)
638 {
639#ifdef ARCANE_USE_ONETBB
640 // Avec OneTBB, cette méthode est appelée à chaque fois qu'on sort
641 // de l'arène principale. Du coup elle ne correspond pas vraiment à une
642 // destruction de thread. On ne fait donc rien pour cette notification
643 // TODO: Regarder comment on peut être notifié de la destruction effective
644 // du thread.
645#else
646 // Il faut toujours un verrou car on n'est pas certain que
647 // les méthodes appelées par l'observable soient thread-safe
648 // (et aussi TaskFactory::createThreadObservable() ne l'est pas)
649 std::scoped_lock sl(m_thread_created_mutex);
651 std::cout << "TBB: DESTROY THREAD"
652 << " id=" << std::this_thread::get_id()
653 << " arena_id=" << _currentTaskTreadIndex()
654 << " is_worker=" << is_worker
655 << '\n';
656 }
657 // TODO: jamais utilisé. Sera supprimé au passage à OneTBB.
658 TaskFactory::destroyThreadObservable()->notifyAllObservers();
659#endif
660 }
661 private:
662#ifdef ARCANE_USE_ONETBB
663#if TBB_VERSION_MAJOR>2021 || (TBB_VERSION_MAJOR==2021 && TBB_VERSION_MINOR>5)
664 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = oneapi::tbb::attach();
665#else
666 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = tbb::task_scheduler_handle::get();
667#endif
668#else
669 tbb::task_scheduler_init m_scheduler_init;
670#endif
671 public:
672 tbb::task_arena m_main_arena;
674 std::vector<tbb::task_arena*> m_sub_arena_list;
675 private:
676 TaskObserver m_task_observer;
677 std::mutex m_thread_created_mutex;
678 UniqueArray<TaskThreadInfo> m_thread_task_infos;
679#ifdef ARCANE_USE_ONETBB
680 tbb::concurrent_set<std::thread::id> m_constructed_thread_map;
681#endif
682 void _init()
683 {
685 std::cout << "TBB: TBBTaskImplementationInit nb_allowed_thread=" << m_nb_allowed_thread
686 << " id=" << std::this_thread::get_id()
687 << " version=" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR
688 << "\n";
689 }
690 m_thread_task_infos.resize(m_nb_allowed_thread);
691 m_task_observer.observe(true);
692 Integer max_arena_size = m_nb_allowed_thread;
693 // Limite artificiellement le nombre de tbb::task_arena
694 // pour éviter d'avoir trop d'objets alloués.
695 if (max_arena_size>512)
696 max_arena_size = 512;
697 m_sub_arena_list.resize(max_arena_size);
698 m_sub_arena_list[0] = m_sub_arena_list[1] = nullptr;
699 for( Integer i=2; i<max_arena_size; ++i )
700 m_sub_arena_list[i] = new tbb::task_arena(i);
701 }
702};
703
704/*---------------------------------------------------------------------------*/
705/*---------------------------------------------------------------------------*/
710{
711 public:
713 : m_functor(f), m_stat_info(stat_info), m_nb_allowed_thread(nb_allowed_thread){}
714 public:
715
716 void operator()(tbb::blocked_range<Integer>& range) const
717 {
718#ifdef ARCANE_CHECK
719 if (TaskFactory::verboseLevel()>=3){
720 std::ostringstream o;
721 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
722 << " id=" << std::this_thread::get_id()
723 << " max_allowed=" << m_nb_allowed_thread
724 << " range_begin=" << range.begin() << " range_size=" << range.size()
725 << "\n";
726 std::cout << o.str();
727 std::cout.flush();
728 }
729
731 if (tbb_index<0 || tbb_index>=m_nb_allowed_thread)
732 ARCANE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
733 tbb_index,m_nb_allowed_thread);
734#endif
735
736 if (m_stat_info)
737 m_stat_info->incrementNbChunk();
738 m_functor->executeFunctor(range.begin(),CheckedConvert::toInteger(range.size()));
739 }
740
741 private:
742 IRangeFunctor* m_functor;
743 ForLoopOneExecStat* m_stat_info = nullptr;
744 Int32 m_nb_allowed_thread;
745};
746
747/*---------------------------------------------------------------------------*/
748/*---------------------------------------------------------------------------*/
752template<int RankValue>
754{
755 public:
756
758 : m_functor(f), m_stat_info(stat_info), m_nb_allowed_thread(nb_allowed_thread){}
759
760 public:
761
762 void operator()(tbb::blocked_rangeNd<Int32,RankValue>& range) const
763 {
764#ifdef ARCANE_CHECK
765 if (TaskFactory::verboseLevel()>=3){
766 std::ostringstream o;
767 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
768 << " id=" << std::this_thread::get_id()
769 << " max_allowed=" << m_nb_allowed_thread
770 << " MDFor ";
771 for( Int32 i=0; i<RankValue; ++i ){
772 Int32 r0 = static_cast<Int32>(range.dim(i).begin());
773 Int32 r1 = static_cast<Int32>(range.dim(i).size());
774 o << " range" << i << " (begin=" << r0 << " size=" << r1 << ")";
775 }
776 o << "\n";
777 std::cout << o.str();
778 std::cout.flush();
779 }
780
782 if (tbb_index<0 || tbb_index>=m_nb_allowed_thread)
783 ARCANE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
784 tbb_index,m_nb_allowed_thread);
785#endif
786
787 if (m_stat_info)
788 m_stat_info->incrementNbChunk();
789 m_functor->executeFunctor(_fromTBBRange(range));
790 }
791
792 private:
793
794 IMDRangeFunctor<RankValue>* m_functor = nullptr;
795 ForLoopOneExecStat* m_stat_info = nullptr;
796 Int32 m_nb_allowed_thread;
797};
798
799/*---------------------------------------------------------------------------*/
800/*---------------------------------------------------------------------------*/
819{
820 public:
822 Integer begin_index,Integer size,Integer grain_size,Integer nb_thread)
823 : m_impl(impl), m_tbb_for(tbb_for), m_nb_thread(nb_thread), m_begin_index(begin_index), m_size(size),
824 m_grain_size(grain_size), m_nb_block(0), m_block_size(0), m_nb_block_per_thread(0)
825 {
826 if (m_nb_thread<1)
827 m_nb_thread = 1;
828
829 if (m_grain_size>0){
830 m_block_size = m_grain_size;
831 if (m_block_size>0){
832 m_nb_block = m_size / m_block_size;
833 if ((m_size % m_block_size)!=0)
834 ++m_nb_block;
835 }
836 else
837 m_nb_block = 1;
838 m_nb_block_per_thread = m_nb_block / m_nb_thread;
839 if ((m_nb_block % m_nb_thread) != 0)
840 ++m_nb_block_per_thread;
841 }
842 else{
843 if (m_nb_block<1)
844 m_nb_block = m_nb_thread;
845 m_block_size = m_size / m_nb_block;
846 m_nb_block_per_thread = 1;
847 }
848 if (TaskFactory::verboseLevel()>=2){
849 std::cout << "TBBDeterministicParallelFor: BEGIN=" << m_begin_index << " size=" << m_size
850 << " grain_size=" << m_grain_size
851 << " nb_block=" << m_nb_block << " nb_thread=" << m_nb_thread
852 << " nb_block_per_thread=" << m_nb_block_per_thread
853 << " block_size=" << m_block_size
854 << " block_size*nb_block=" << m_block_size*m_nb_block << '\n';
855 }
856 }
857 public:
858
865 void operator()(tbb::blocked_range<Integer>& range) const
866 {
867 Integer nb_iter = static_cast<Integer>(range.size());
868 for( Integer i=0; i<nb_iter; ++i ){
869 Integer task_id = range.begin() + i;
870 for ( Integer k=0, kn=m_nb_block_per_thread; k<kn; ++k ){
871 Integer block_id = task_id + (k * m_nb_thread);
872 if (block_id<m_nb_block)
873 _doBlock(task_id,block_id);
874 }
875 }
876 }
877
878 void _doBlock(Integer task_id,Integer block_id) const
879 {
880 TBBTaskImplementation::TaskInfoLockGuard guard(m_impl->currentTaskThreadInfo(),task_id);
881
882 Integer iter_begin = block_id * m_block_size;
883 Integer iter_size = m_block_size;
884 if ((block_id+1)==m_nb_block){
885 // Pour le dernier bloc, la taille est le nombre d'éléments restants
886 iter_size = m_size - iter_begin;
887 }
888 iter_begin += m_begin_index;
889#ifdef ARCANE_CHECK
890 if (TaskFactory::verboseLevel()>=3){
891 std::ostringstream o;
892 o << "TBB: DoBlock: BLOCK task_id=" << task_id << " block_id=" << block_id
893 << " iter_begin=" << iter_begin << " iter_size=" << iter_size << '\n';
894 std::cout << o.str();
895 std::cout.flush();
896 }
897#endif
898 if (iter_size>0){
899 auto r = tbb::blocked_range<int>(iter_begin,iter_begin + iter_size);
900 m_tbb_for(r);
901 }
902 }
903
904 private:
905
906 TBBTaskImplementation* m_impl;
907 const TBBParallelFor& m_tbb_for;
908 Integer m_nb_thread;
909 Integer m_begin_index;
910 Integer m_size;
911 Integer m_grain_size;
912 Integer m_nb_block;
913 Integer m_block_size;
914 Integer m_nb_block_per_thread;
915};
916
917/*---------------------------------------------------------------------------*/
918/*---------------------------------------------------------------------------*/
919
921{
922 public:
923
925 Integer begin,Integer size,IRangeFunctor* f,ForLoopOneExecStat* stat_info)
926 : m_impl(impl), m_begin(begin), m_size(size), m_functor(f), m_options(options), m_stat_info(stat_info){}
927
928 public:
929
930 void operator()() const
931 {
932 Integer nb_thread = m_options.maxThread();
933 TBBParallelFor pf(m_functor,nb_thread,m_stat_info);
934 Integer gsize = m_options.grainSize();
935 tbb::blocked_range<Integer> range(m_begin,m_begin+m_size);
936 if (TaskFactory::verboseLevel()>=1)
937 std::cout << "TBB: TBBTaskImplementationInit ParallelForExecute begin=" << m_begin
938 << " size=" << m_size << " gsize=" << gsize
939 << " partitioner=" << (int)m_options.partitioner()
940 << " nb_thread=" << nb_thread
941 << " has_stat_info=" << (m_stat_info!=nullptr)
942 << '\n';
943
944 if (gsize>0)
945 range = tbb::blocked_range<Integer>(m_begin,m_begin+m_size,gsize);
946
947 if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Static){
948 tbb::parallel_for(range,pf,tbb::static_partitioner());
949 }
950 else if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Deterministic){
951 tbb::blocked_range<Integer> range2(0,nb_thread,1);
952 TBBDeterministicParallelFor dpf(m_impl,pf,m_begin,m_size,gsize,nb_thread);
953 tbb::parallel_for(range2,dpf);
954 }
955 else
956 tbb::parallel_for(range,pf);
957 }
958 private:
959 TBBTaskImplementation* m_impl = nullptr;
960 Integer m_begin;
961 Integer m_size;
962 IRangeFunctor* m_functor = nullptr;
963 ParallelLoopOptions m_options;
964 ForLoopOneExecStat* m_stat_info = nullptr;
965};
966
967/*---------------------------------------------------------------------------*/
968/*---------------------------------------------------------------------------*/
969
970template<int RankValue>
972{
973 public:
974
976 const ParallelLoopOptions& options,
979 : m_impl(impl)
980 , m_tbb_range(_toTBBRange(range))
981 , m_functor(f)
982 , m_options(options)
983 , m_stat_info(stat_info)
984 {
985 // On ne peut pas modifier les valeurs d'une instance de tbb::blocked_rangeNd.
986 // Il faut donc en reconstruire une complètement.
988 Int32 gsize = m_options.grainSize();
989 if (gsize>0){
990 // Si la taille du grain est différent zéro, il faut la répartir
991 // sur l'ensemble des dimensions. On commence par la dernière.
992 // TODO: regarder pourquoi dans certains cas les performances sont
993 // inférieures à celles qu'on obtient en utilisant un partitionneur
994 // statique.
995 constexpr bool is_verbose = false;
996 std::array<Int32,RankValue> range_extents = range.extents().asStdArray();
997 double ratio = static_cast<double>(gsize) / static_cast<double>(range.nbElement());
998 if constexpr (is_verbose){
999 std::cout << "GSIZE=" << gsize << " rank=" << RankValue << " ratio=" << ratio;
1000 for(Int32 i=0; i<RankValue; ++i )
1001 std::cout << " range" << i << "=" << range_extents[i];
1002 std::cout << "\n";
1003 }
1004 Int32 index = RankValue - 1;
1005 Int32 remaining_grain = gsize;
1006 for( ; index>=0; --index ){
1007 Int32 current = range_extents[index];
1008 if constexpr (is_verbose)
1009 std::cout << "Check index=" << index << " remaining=" << remaining_grain << " current=" << current << "\n";
1010 if (remaining_grain>current){
1011 all_grain_sizes[index] = current;
1012 remaining_grain /= current;
1013 }
1014 else{
1016 break;
1017 }
1018 }
1019 for( Int32 i=0; i<index; ++i )
1020 all_grain_sizes[i] = 1;
1021 if constexpr (is_verbose){
1022 for(Int32 i=0; i<RankValue; ++i )
1023 std::cout << " grain" << i << "=" << all_grain_sizes[i];
1024 std::cout << "\n";
1025 }
1026 m_tbb_range = _toTBBRangeWithGrain(m_tbb_range,all_grain_sizes);
1027 }
1028 }
1029
1030 public:
1031
1032 void operator()() const
1033 {
1034 Integer nb_thread = m_options.maxThread();
1035 TBBMDParallelFor<RankValue> pf(m_functor,nb_thread,m_stat_info);
1036
1037 if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Static){
1038 tbb::parallel_for(m_tbb_range,pf,tbb::static_partitioner());
1039 }
1040 else if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Deterministic){
1041 // TODO: implémenter le mode déterministe
1042 ARCANE_THROW(NotImplementedException,"ParallelLoopOptions::Partitioner::Deterministic for multi-dimensionnal loops");
1043 //tbb::blocked_range<Integer> range2(0,nb_thread,1);
1044 //TBBDeterministicParallelFor dpf(m_impl,pf,m_begin,m_size,gsize,nb_thread);
1045 //tbb::parallel_for(range2,dpf);
1046 }
1047 else{
1048 tbb::parallel_for(m_tbb_range,pf);
1049 }
1050 }
1051 private:
1052 TBBTaskImplementation* m_impl = nullptr;
1053 tbb::blocked_rangeNd<Int32,RankValue> m_tbb_range;
1054 IMDRangeFunctor<RankValue>* m_functor = nullptr;
1055 ParallelLoopOptions m_options;
1056 ForLoopOneExecStat* m_stat_info = nullptr;
1057};
1058
1059/*---------------------------------------------------------------------------*/
1060/*---------------------------------------------------------------------------*/
1061
1062TBBTaskImplementation::
1063~TBBTaskImplementation()
1064{
1065 delete m_p;
1066}
1067
1068/*---------------------------------------------------------------------------*/
1069/*---------------------------------------------------------------------------*/
1070
1072initialize(Int32 nb_thread)
1073{
1074 if (nb_thread<0)
1075 nb_thread = 0;
1076 m_is_active = (nb_thread!=1);
1077 if (nb_thread!=0)
1078 m_p = new Impl(nb_thread);
1079 else
1080 m_p = new Impl();
1082 opts.setMaxThread(nbAllowedThread());
1084}
1085
1086/*---------------------------------------------------------------------------*/
1087/*---------------------------------------------------------------------------*/
1088
1090terminate()
1091{
1092 m_p->terminate();
1093}
1094
1095/*---------------------------------------------------------------------------*/
1096/*---------------------------------------------------------------------------*/
1097
1099nbAllowedThread() const
1100{
1101 return m_p->nbAllowedThread();
1102}
1103
1104/*---------------------------------------------------------------------------*/
1105/*---------------------------------------------------------------------------*/
1106
1108printInfos(std::ostream& o) const
1109{
1110#ifdef ARCANE_USE_ONETBB
1111 o << "OneTBBTaskImplementation"
1112 << " version=" << TBB_VERSION_STRING
1113 << " interface=" << TBB_INTERFACE_VERSION
1114 << " runtime_interface=" << TBB_runtime_interface_version();
1115#else
1116 o << "TBBTaskImplementation"
1117 << " version=" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR
1118 << " interface=" << TBB_INTERFACE_VERSION;
1119#endif
1120}
1121
1122/*---------------------------------------------------------------------------*/
1123/*---------------------------------------------------------------------------*/
1124
1125void TBBTaskImplementation::
1126_executeParallelFor(const ParallelFor1DLoopInfo& loop_info)
1127{
1128 ScopedExecInfo sei(loop_info.runInfo());
1129 ForLoopOneExecStat* stat_info = sei.statInfo();
1131
1132 Int32 begin = loop_info.beginIndex();
1133 Int32 size = loop_info.size();
1134 ParallelLoopOptions options = loop_info.runInfo().options().value_or(TaskFactory::defaultParallelLoopOptions());
1135 IRangeFunctor* f = loop_info.functor();
1136
1137 Integer max_thread = options.maxThread();
1138 Integer nb_allowed_thread = m_p->nbAllowedThread();
1139 if (max_thread<0)
1141
1143 std::cout << "TBB: TBBTaskImplementation executeParallelFor begin=" << begin
1144 << " size=" << size << " max_thread=" << max_thread
1145 << " grain_size=" << options.grainSize()
1146 << " nb_allowed=" << nb_allowed_thread << '\n';
1147
1148 // En exécution séquentielle, appelle directement la méthode \a f.
1149 if (max_thread==1 || max_thread==0){
1150 f->executeFunctor(begin,size);
1151 return;
1152 }
1153
1154 // Remplace les valeurs non initialisées de \a options par celles de \a m_default_loop_options
1155 ParallelLoopOptions true_options(options);
1156 true_options.mergeUnsetValues(TaskFactory::defaultParallelLoopOptions());
1157 true_options.setMaxThread(max_thread);
1158
1159 ParallelForExecute pfe(this,true_options,begin,size,f,stat_info);
1160
1161 tbb::task_arena* used_arena = nullptr;
1162 if (max_thread<nb_allowed_thread)
1163 used_arena = m_p->m_sub_arena_list[max_thread];
1164 if (!used_arena)
1165 used_arena = &(m_p->m_main_arena);
1166 used_arena->execute(pfe);
1167}
1168
1169/*---------------------------------------------------------------------------*/
1170/*---------------------------------------------------------------------------*/
1171
1172void TBBTaskImplementation::
1173executeParallelFor(const ParallelFor1DLoopInfo& loop_info)
1174{
1175 _executeParallelFor(loop_info);
1176}
1177
1178/*---------------------------------------------------------------------------*/
1179/*---------------------------------------------------------------------------*/
1186template<int RankValue> void TBBTaskImplementation::
1189 const ForLoopRunInfo& run_info)
1190{
1191 ParallelLoopOptions options;
1192 if (run_info.options().has_value())
1193 options = run_info.options().value();
1194
1195 ScopedExecInfo sei(run_info);
1196 ForLoopOneExecStat* stat_info = sei.statInfo();
1197 impl::ScopedStatLoop scoped_loop(sei.isOwn() ? stat_info : nullptr);
1198
1199 if (TaskFactory::verboseLevel()>=1){
1200 std::cout << "TBB: TBBTaskImplementation executeMDParallelFor nb_dim=" << RankValue
1201 << " nb_element=" << loop_ranges.nbElement()
1202 << " grain_size=" << options.grainSize()
1203 << " name=" << run_info.traceInfo().traceInfo()
1204 << " has_stat_info=" << (stat_info!=nullptr)
1205 << '\n';
1206 }
1207
1208 Integer max_thread = options.maxThread();
1209 // En exécution séquentielle, appelle directement la méthode \a f.
1210 if (max_thread==1 || max_thread==0){
1211 functor->executeFunctor(loop_ranges);
1212 return;
1213 }
1214
1215 // Remplace les valeurs non initialisées de \a options par celles de \a m_default_loop_options
1218
1219 Integer nb_allowed_thread = m_p->nbAllowedThread();
1220 if (max_thread<0)
1222 tbb::task_arena* used_arena = nullptr;
1225 if (!used_arena)
1226 used_arena = &(m_p->m_main_arena);
1227
1228 // Pour l'instant pour la dimension 1, utilise le 'ParallelForExecute' historique
1229 if constexpr (RankValue==1){
1231 auto x1 = [&](Integer begin,Integer size)
1232 {
1233 functor->executeFunctor(makeLoopRanges(ForLoopRange(begin,size)));
1234 //functor->executeFunctor(ComplexForLoopRanges<1>(begin,size));
1235 };
1236 LambdaRangeFunctorT<decltype(x1)> functor_1d(x1);
1237 Integer begin1 = CheckedConvert::toInteger(range_1d.dim(0).begin());
1238 Integer size1 = CheckedConvert::toInteger(range_1d.dim(0).size());
1240 used_arena->execute(pfe);
1241 }
1242 else{
1244 used_arena->execute(pfe);
1245 }
1246}
1247
1248/*---------------------------------------------------------------------------*/
1249/*---------------------------------------------------------------------------*/
1250
1251void TBBTaskImplementation::
1252executeParallelFor(Integer begin,Integer size,Integer grain_size,IRangeFunctor* f)
1253{
1255 opts.setGrainSize(grain_size);
1257 executeParallelFor(ParallelFor1DLoopInfo(begin,size,f,run_info));
1258}
1259
1260/*---------------------------------------------------------------------------*/
1261/*---------------------------------------------------------------------------*/
1262
1263void TBBTaskImplementation::
1264executeParallelFor(Integer begin,Integer size,const ParallelLoopOptions& options,IRangeFunctor* f)
1265{
1266 executeParallelFor(ParallelFor1DLoopInfo(begin,size,f,ForLoopRunInfo(options)));
1267}
1268
1269/*---------------------------------------------------------------------------*/
1270/*---------------------------------------------------------------------------*/
1271
1274{
1276 if (thread_id>=0)
1277 return m_p->threadTaskInfo(thread_id);
1278 return nullptr;
1279}
1280
1281/*---------------------------------------------------------------------------*/
1282/*---------------------------------------------------------------------------*/
1283
1285currentTaskIndex() const
1286{
1288#ifdef ARCANE_USE_ONETBB
1289 if (thread_id<0 || thread_id>=m_p->nbAllowedThread())
1290 return 0;
1291#endif
1293 if (tti){
1294 Int32 task_index = tti->taskIndex();
1295 if (task_index>=0)
1296 return task_index;
1297 }
1298 return thread_id;
1299}
1300
1301/*---------------------------------------------------------------------------*/
1302/*---------------------------------------------------------------------------*/
1303
1304#ifdef ARCANE_USE_ONETBB
1305
1306/*---------------------------------------------------------------------------*/
1307/*---------------------------------------------------------------------------*/
1308
1309void OneTBBTask::
1310launchAndWait()
1311{
1312 tbb::task_group task_group;
1313 task_group.run(taskFunctor());
1314 task_group.wait();
1315 delete this;
1316}
1317
1318/*---------------------------------------------------------------------------*/
1319/*---------------------------------------------------------------------------*/
1320
1321void OneTBBTask::
1322launchAndWait(ConstArrayView<ITask*> tasks)
1323{
1324 tbb::task_group task_group;
1325 Integer n = tasks.size();
1326 if (n==0)
1327 return;
1328
1329 //set_ref_count(n+1);
1330 for( Integer i=0; i<n; ++i ){
1331 OneTBBTask* t = static_cast<OneTBBTask*>(tasks[i]);
1332 task_group.run(t->taskFunctor());
1333 }
1334 task_group.wait();
1335 for( Integer i=0; i<n; ++i ){
1336 OneTBBTask* t = static_cast<OneTBBTask*>(tasks[i]);
1337 delete t;
1338 }
1339}
1340
1341/*---------------------------------------------------------------------------*/
1342/*---------------------------------------------------------------------------*/
1343
1344ITask* OneTBBTask::
1345_createChildTask(ITaskFunctor* functor)
1346{
1347 OneTBBTask* t = new OneTBBTask(functor);
1348 return t;
1349}
1350
1351/*---------------------------------------------------------------------------*/
1352/*---------------------------------------------------------------------------*/
1353
1354#else // ARCANE_USE_ONETBB
1355
1356/*---------------------------------------------------------------------------*/
1357/*---------------------------------------------------------------------------*/
1358
1361{
1362 task::spawn_root_and_wait(*this);
1363}
1364
1365/*---------------------------------------------------------------------------*/
1366/*---------------------------------------------------------------------------*/
1367
1370{
1371 Integer n = tasks.size();
1372 if (n==0)
1373 return;
1374
1375 set_ref_count(n+1);
1376 for( Integer i=0; i<n-1; ++i ){
1377 TBBTask* t = static_cast<TBBTask*>(tasks[i]);
1378 spawn(*t);
1379 }
1380 spawn_and_wait_for_all(*static_cast<TBBTask*>(tasks[n-1]));
1381}
1382
1383/*---------------------------------------------------------------------------*/
1384/*---------------------------------------------------------------------------*/
1385
1386ITask* LegacyTBBTask::
1387_createChildTask(ITaskFunctor* functor)
1388{
1389 TBBTask* t = new(allocate_child()) TBBTask(functor);
1390 return t;
1391}
1392
1393/*---------------------------------------------------------------------------*/
1394/*---------------------------------------------------------------------------*/
1395
1396#endif // ARCANE_USE_ONETBB
1397
1398/*---------------------------------------------------------------------------*/
1399/*---------------------------------------------------------------------------*/
1400
1401// TODO: a supprimer maintenant qu'on utilise 'DependencyInjection'
1402ARCANE_REGISTER_APPLICATION_FACTORY(TBBTaskImplementation,ITaskImplementation,
1403 TBBTaskImplementation);
1404
1405ARCANE_DI_REGISTER_PROVIDER(TBBTaskImplementation,
1406 DependencyInjection::ProviderProperty("TBBTaskImplementation"),
1407 ARCANE_DI_INTERFACES(ITaskImplementation),
1408 ARCANE_DI_EMPTY_CONSTRUCTOR());
1409
1410/*---------------------------------------------------------------------------*/
1411/*---------------------------------------------------------------------------*/
1412
1413} // End namespace Arcane
1414
1415/*---------------------------------------------------------------------------*/
1416/*---------------------------------------------------------------------------*/
#define ARCANE_THROW(exception_class,...)
Macro pour envoyer une exception avec formattage.
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
#define ARCANE_ALIGNAS_PACKED(value)
Macro pour garantir le compactage et l'alignement d'une classe sur value octets.
Classes, Types et macros pour gérer la concurrence.
#define ARCANE_REGISTER_APPLICATION_FACTORY(aclass, ainterface, aname)
Enregistre un service de fabrique pour la classe aclass.
Interval d'itération complexe.
Classe pour gérer le profiling d'une seule exécution d'une boucle.
Definition Profiling.h:93
Intervalle d'itération pour une boucle.
Informations d'exécution d'une boucle.
Interface d'un fonctor sur un interval d'itération multi-dimensionnel de dimension RankValue.
Interface d'un fonctor sur un interval d'itération.
virtual void executeFunctor(Integer begin, Integer size)=0
Exécute la méthode associée.
Interface d'un fonctor pour une tâche.
Implémentation d'une fabrique de tâches.
Interface d'une tâche concourante.
Fonctor sur un interval d'itération instancié via une lambda fonction.
void launchAndWait() override
Lance la tâche et bloque jusqu'à ce qu'elle se termine.
Lecteur des fichiers de maillage via la bibliothèque LIMA.
Definition Lima.cc:149
Caractéristiques d'un boucle 1D multi-thread.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
Int32 maxThread() const
Nombre maximal de threads autorisés.
static impl::ForLoopStatInfoList * _threadLocalForLoopInstance()
Definition Profiling.cc:194
static bool hasProfiling()
Indique si le profilage est actif.
Definition Profiling.h:173
Structure contenant les informations pour créer un service.
Implémentation déterministe de ParallelFor.
void operator()(tbb::blocked_range< Integer > &range) const
Opérateur pour un thread donné.
Exécuteur pour une boucle multi-dimension.
Exécuteur pour une boucle 1D.
std::vector< tbb::task_arena * > m_sub_arena_list
Tableau dont le i-ème élément contient la tbb::task_arena pour i thread.
Classe pour positionner TaskThreadInfo::taskIndex().
Int32 currentTaskThreadIndex() const final
Implémentation de TaskFactory::currentTaskThreadIndex()
void initialize(Int32 nb_thread) override
void executeParallelFor(const ComplexForLoopRanges< 1 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 1 > *functor) final
Exécute une boucle 1D en concurrence.
ITask * createRootTask(ITaskFunctor *f) override
Créé une tâche racine. L'implémentation doit recopier la valeur de f qui est soit un TaskFunctor,...
void printInfos(std::ostream &o) const final
Affiche les informations sur le runtime utilisé
void executeParallelFor(const ComplexForLoopRanges< 3 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 3 > *functor) final
Exécute une boucle 3D en concurrence.
void executeParallelFor(const ComplexForLoopRanges< 4 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 4 > *functor) final
Exécute une boucle 4D en concurrence.
TaskThreadInfo * currentTaskThreadInfo() const
Instance de TaskThreadInfo associé au thread courant.
bool isActive() const final
Indique si l'implémentation est active.
void _executeMDParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, IMDRangeFunctor< RankValue > *functor, const ForLoopRunInfo &run_info)
Exécution d'une boucle N-dimensions.
Int32 currentTaskIndex() const final
Implémentation de TaskFactory::currentTaskIndex()
void executeParallelFor(const ComplexForLoopRanges< 2 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 2 > *functor) final
Exécute une boucle 2D en concurrence.
Int32 nbAllowedThread() const final
Nombre de threads utilisés au maximum pour gérer les tâches.
Contexte d'éxecution d'une tâche.
static void notifyThreadCreated()
Notifie tous les observateurs de création de thread.
static IObservable * destroyThreadObservable()
Observable appelé lors de la destruction d'un thread pour une tâche.
static const ParallelLoopOptions & defaultParallelLoopOptions()
Valeurs par défaut d'exécution d'une boucle parallèle.
static Integer verboseLevel()
Niveau de verbosité
static void setDefaultParallelLoopOptions(const ParallelLoopOptions &v)
Positionne les valeurs par défaut d'exécution d'une boucle parallèle.
Classe permettant de récupérer le temps passé entre l'appel au constructeur et au destructeur.
Definition Profiling.h:38
Allocateur mémoire avec alignement mémoire spécifique.
Exception lorsqu'une fonction n'est pas implémentée.
Integer toInteger(Real r)
Converti un Int64 en un Integer.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
SimpleForLoopRanges< 1 > makeLoopRanges(Int32 n1)
Créé un intervalle d'itération [0,n1[.
Int32 Integer
Type représentant un entier.