Arcane  v4.1.2.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
TBBTaskImplementation.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* TBBTaskImplementation.cc (C) 2000-2025 */
9/* */
10/* Implémentation des tâches utilisant TBB (Intel Threads Building Blocks). */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arccore/base/NotImplementedException.h"
15#include "arccore/base/IFunctor.h"
16#include "arccore/base/ForLoopRanges.h"
17#include "arccore/base/IObservable.h"
18#include "arccore/base/PlatformUtils.h"
19#include "arccore/base/FixedArray.h"
20#include "arccore/base/Profiling.h"
21#include "arccore/base/CheckedConvert.h"
22#include "arccore/base/FixedArray.h"
23#include "arccore/base/ForLoopRunInfo.h"
24#include "arccore/base/internal/DependencyInjection.h"
25
26#include "arccore/concurrency/IThreadImplementation.h"
27#include "arccore/concurrency/Task.h"
28#include "arccore/concurrency/ITaskImplementation.h"
29#include "arccore/concurrency/TaskFactory.h"
30#include "arccore/concurrency/ParallelFor.h"
31#include "arccore/concurrency/internal/TaskFactoryInternal.h"
32
33#include <new>
34#include <stack>
35#include <vector>
36
37// Il faut définir cette macro pour que la classe 'blocked_rangeNd' soit disponible
38
39#define TBB_PREVIEW_BLOCKED_RANGE_ND 1
40
41// la macro 'ARCCORE_USE_ONETBB' est définie dans le CMakeLists.txt
42// si on compile avec la version OneTBB version 2021+
43// (https://github.com/oneapi-src/oneTBB.git)
44// A terme ce sera la seule version supportée par Arcane.
45
46// Nécessaire pour avoir accès à task_scheduler_handle
47#define TBB_PREVIEW_WAITING_FOR_WORKERS 1
48#include <tbb/tbb.h>
49#include <oneapi/tbb/concurrent_set.h>
50#include <oneapi/tbb/global_control.h>
51
52#include <thread>
53#include <mutex>
54
55/*---------------------------------------------------------------------------*/
56/*---------------------------------------------------------------------------*/
57
58namespace Arcane
59{
60
62
63// TODO: utiliser un pool mémoire spécifique pour gérer les
64// OneTBBTask pour optimiser les new/delete des instances de cette classe.
65// Auparavant avec les anciennes versions de TBB cela était géré avec
66// la méthode 'tbb::task::allocate_child()'.
67
68/*---------------------------------------------------------------------------*/
69/*---------------------------------------------------------------------------*/
70
71#if (TBB_VERSION_MAJOR > 2022) || (TBB_VERSION_MAJOR == 2022 && TBB_VERSION_MINOR > 0) || defined __TBB_blocked_nd_range_H
72
73// La classe "blocked_rangeNd" a été retirée dans la version
74// 2022.0.0 et remplacée par "blocked_nd_range".
75template <typename Value, unsigned int N>
76using blocked_nd_range = tbb::blocked_nd_range<Value, N>;
77
78#else
79
80template <typename Value, unsigned int N>
81using blocked_nd_range = tbb::blocked_rangeNd<Value, N>;
82
83#endif
84
85/*---------------------------------------------------------------------------*/
86/*---------------------------------------------------------------------------*/
87
88namespace
89{
90 constexpr Int32 cache_line_size = 64;
91 // Positif si on récupère les statistiques d'exécution
92 bool isStatActive()
93 {
95 }
96
101 class ScopedExecInfo
102 {
103 public:
104
105 explicit ScopedExecInfo(const ForLoopRunInfo& run_info)
106 : m_run_info(run_info)
107 {
108 // Si run_info.execInfo() n'est pas nul, on l'utilise.
109 // Cela signifie que c'est l'appelant de qui va gérer les statistiques
110 // d'exécution. Sinon, on utilise \a m_stat_info si les statistiques
111 // d'exécution sont demandées.
112 ForLoopOneExecStat* ptr = run_info.execStat();
113 if (ptr) {
114 m_stat_info_ptr = ptr;
115 m_use_own_run_info = false;
116 }
117 else
118 m_stat_info_ptr = isStatActive() ? &m_stat_info : nullptr;
119 }
120 ~ScopedExecInfo()
121 {
122#ifdef PRINT_STAT_INFO
123 if (m_stat_info_ptr) {
124 bool is_valid = m_run_info.traceInfo().isValid();
125 if (!is_valid)
126 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
127 << " stack=" << platform::getStackTrace()
128 << "\n";
129 else
130 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
131 << " trace_name=" << m_run_info.traceInfo().traceInfo().name() << "\n";
132 }
133#endif
134 if (m_stat_info_ptr && m_use_own_run_info) {
135 ProfilingRegistry::_threadLocalForLoopInstance()->merge(*m_stat_info_ptr, m_run_info.traceInfo());
136 }
137 }
138
139 public:
140
141 ForLoopOneExecStat* statInfo() const { return m_stat_info_ptr; }
142 bool isOwn() const { return m_use_own_run_info; }
143
144 private:
145
146 ForLoopOneExecStat m_stat_info;
147 ForLoopOneExecStat* m_stat_info_ptr = nullptr;
148 ForLoopRunInfo m_run_info;
150 bool m_use_own_run_info = true;
151 };
152
153 /*---------------------------------------------------------------------------*/
154 /*---------------------------------------------------------------------------*/
155
156 inline int _currentTaskTreadIndex()
157 {
158 // NOTE: Avec OneTBB 2021, la valeur n'est plus '0' si on appelle cette méthode
159 // depuis un thread en dehors d'un task_arena. Avec la version 2021,
160 // la valeur est 65535.
161 // NOTE: Il semble que cela soit un bug de la 2021.3.
162 return tbb::this_task_arena::current_thread_index();
163 }
164
165 inline blocked_nd_range<Int32, 1>
166 _toTBBRange(const ComplexForLoopRanges<1>& r)
167 {
168 return { { r.lowerBound<0>(), r.upperBound<0>() } };
169 }
170
171 inline blocked_nd_range<Int32, 2>
172 _toTBBRange(const ComplexForLoopRanges<2>& r)
173 {
174 return { { r.lowerBound<0>(), r.upperBound<0>() },
175 { r.lowerBound<1>(), r.upperBound<1>() } };
176 }
177
178 inline blocked_nd_range<Int32, 3>
179 _toTBBRange(const ComplexForLoopRanges<3>& r)
180 {
181 return { { r.lowerBound<0>(), r.upperBound<0>() },
182 { r.lowerBound<1>(), r.upperBound<1>() },
183 { r.lowerBound<2>(), r.upperBound<2>() } };
184 }
185
186 inline blocked_nd_range<Int32, 4>
187 _toTBBRange(const ComplexForLoopRanges<4>& r)
188 {
189 return { { r.lowerBound<0>(), r.upperBound<0>() },
190 { r.lowerBound<1>(), r.upperBound<1>() },
191 { r.lowerBound<2>(), r.upperBound<2>() },
192 { r.lowerBound<3>(), r.upperBound<3>() } };
193 }
194
195 /*---------------------------------------------------------------------------*/
196 /*---------------------------------------------------------------------------*/
197
198 inline blocked_nd_range<Int32, 2>
199 _toTBBRangeWithGrain(const blocked_nd_range<Int32, 2>& r, FixedArray<size_t, 2> grain_sizes)
200 {
201 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
202 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] } };
203 }
204
205 inline blocked_nd_range<Int32, 3>
206 _toTBBRangeWithGrain(const blocked_nd_range<Int32, 3>& r, FixedArray<size_t, 3> grain_sizes)
207 {
208 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
209 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] },
210 { r.dim(2).begin(), r.dim(2).end(), grain_sizes[2] } };
211 }
212
213 inline blocked_nd_range<Int32, 4>
214 _toTBBRangeWithGrain(const blocked_nd_range<Int32, 4>& r, FixedArray<size_t, 4> grain_sizes)
215 {
216 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
217 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] },
218 { r.dim(2).begin(), r.dim(2).end(), grain_sizes[2] },
219 { r.dim(3).begin(), r.dim(3).end(), grain_sizes[3] } };
220 }
221
222 /*---------------------------------------------------------------------------*/
223 /*---------------------------------------------------------------------------*/
224
226 _fromTBBRange(const blocked_nd_range<Int32, 2>& r)
227 {
228 using BoundsType = ArrayBounds<MDDim2>;
229 using ArrayExtentType = BoundsType::ArrayExtentType;
230
231 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin()));
232 auto s0 = static_cast<Int32>(r.dim(0).size());
233 auto s1 = static_cast<Int32>(r.dim(1).size());
234 BoundsType sizes(ArrayExtentType(s0, s1));
235 return { lower_bounds, sizes };
236 }
237
239 _fromTBBRange(const blocked_nd_range<Int32, 3>& r)
240 {
241 using BoundsType = ArrayBounds<MDDim3>;
242 using ArrayExtentType = BoundsType::ArrayExtentType;
243
244 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin(), r.dim(2).begin()));
245 auto s0 = static_cast<Int32>(r.dim(0).size());
246 auto s1 = static_cast<Int32>(r.dim(1).size());
247 auto s2 = static_cast<Int32>(r.dim(2).size());
248 BoundsType sizes(ArrayExtentType(s0, s1, s2));
249 return { lower_bounds, sizes };
250 }
251
253 _fromTBBRange(const blocked_nd_range<Int32, 4>& r)
254 {
255 using BoundsType = ArrayBounds<MDDim4>;
256 using ArrayExtentType = typename BoundsType::ArrayExtentType;
257
258 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin(), r.dim(2).begin(), r.dim(3).begin()));
259 auto s0 = static_cast<Int32>(r.dim(0).size());
260 auto s1 = static_cast<Int32>(r.dim(1).size());
261 auto s2 = static_cast<Int32>(r.dim(2).size());
262 auto s3 = static_cast<Int32>(r.dim(3).size());
263 BoundsType sizes(ArrayExtentType(s0, s1, s2, s3));
264 return { lower_bounds, sizes };
265 }
266
267} // namespace
268
269/*---------------------------------------------------------------------------*/
270/*---------------------------------------------------------------------------*/
271
272class OneTBBTaskFunctor
273{
274 public:
275
276 OneTBBTaskFunctor(ITaskFunctor* functor, ITask* task)
277 : m_functor(functor)
278 , m_task(task)
279 {}
280
281 public:
282
283 void operator()() const
284 {
285 if (m_functor) {
286 ITaskFunctor* tf = m_functor;
287 m_functor = nullptr;
288 TaskContext task_context(m_task);
289 //cerr << "FUNC=" << typeid(*tf).name();
290 tf->executeFunctor(task_context);
291 }
292 }
293
294 public:
295
296 mutable ITaskFunctor* m_functor;
297 ITask* m_task;
298};
299
300/*---------------------------------------------------------------------------*/
301/*---------------------------------------------------------------------------*/
302
303class OneTBBTask
304: public ITask
305{
306 public:
307
308 static const int FUNCTOR_CLASS_SIZE = 32;
309
310 public:
311
312 explicit OneTBBTask(ITaskFunctor* f)
313 : m_functor(f)
314 {
315 m_functor = f->clone(m_functor_buf.data(), FUNCTOR_CLASS_SIZE);
316 }
317
318 public:
319
320 OneTBBTaskFunctor taskFunctor() { return OneTBBTaskFunctor(m_functor, this); }
321 void launchAndWait() override;
322 void launchAndWait(ConstArrayView<ITask*> tasks) override;
323
324 protected:
325
326 ITask* _createChildTask(ITaskFunctor* functor) override;
327
328 public:
329
330 ITaskFunctor* m_functor = nullptr;
332};
333using TBBTask = OneTBBTask;
334
335/*---------------------------------------------------------------------------*/
336/*---------------------------------------------------------------------------*/
337/*
338 * Ne pas utiliser l'observer locale au task_arena.
339 * Utiliser l'observer global au scheduler.
340 * Pour l'id, utiliser tbb::this_task_arena::current_thread_index().
341 */
342class TBBTaskImplementation
343: public ITaskImplementation
344{
345 class Impl;
346 class ParallelForExecute;
347 template <int RankValue>
349
350 public:
351
352 // Pour des raisons de performance, s'aligne sur une ligne de cache
353 // et utilise un padding.
354 class ARCCORE_ALIGNAS_PACKED(64) TaskThreadInfo
355 {
356 public:
357
358 TaskThreadInfo()
359 : m_task_index(-1)
360 {}
361
362 public:
363
364 void setTaskIndex(Integer v) { m_task_index = v; }
365 Integer taskIndex() const { return m_task_index; }
366
367 private:
368
369 Integer m_task_index;
370 };
378 class TaskInfoLockGuard
379 {
380 public:
381
382 TaskInfoLockGuard(TaskThreadInfo* tti, Integer task_index)
383 : m_tti(tti)
384 , m_old_task_index(-1)
385 {
386 if (tti) {
387 m_old_task_index = tti->taskIndex();
388 tti->setTaskIndex(task_index);
389 }
390 }
391 ~TaskInfoLockGuard()
392 {
393 if (m_tti)
394 m_tti->setTaskIndex(m_old_task_index);
395 }
396
397 private:
398
399 TaskThreadInfo* m_tti;
400 Integer m_old_task_index;
401 };
402
403 public:
404
405 TBBTaskImplementation() = default;
406 ~TBBTaskImplementation() override;
407
408 public:
409
410 void build() {}
411 void initialize(Int32 nb_thread) override;
412 void terminate() override;
413
415 {
416 OneTBBTask* t = new OneTBBTask(f);
417 return t;
418 }
419
420 void executeParallelFor(Int32 begin, Int32 size, const ParallelLoopOptions& options, IRangeFunctor* f) final;
421 void executeParallelFor(Int32 begin, Int32 size, Integer grain_size, IRangeFunctor* f) final;
422 void executeParallelFor(Int32 begin, Int32 size, IRangeFunctor* f) final
423 {
425 }
426 void executeParallelFor(const ParallelFor1DLoopInfo& loop_info) override;
427
429 const ForLoopRunInfo& run_info,
430 IMDRangeFunctor<1>* functor) final
431 {
432 _executeMDParallelFor<1>(loop_ranges, functor, run_info);
433 }
435 const ForLoopRunInfo& run_info,
436 IMDRangeFunctor<2>* functor) final
437 {
438 _executeMDParallelFor<2>(loop_ranges, functor, run_info);
439 }
441 const ForLoopRunInfo& run_info,
442 IMDRangeFunctor<3>* functor) final
443 {
444 _executeMDParallelFor<3>(loop_ranges, functor, run_info);
445 }
447 const ForLoopRunInfo& run_info,
448 IMDRangeFunctor<4>* functor) final
449 {
450 _executeMDParallelFor<4>(loop_ranges, functor, run_info);
451 }
452
453 bool isActive() const final
454 {
455 return m_is_active;
456 }
457
459 {
460 return (nbAllowedThread() <= 1) ? 0 : _currentTaskTreadIndex();
461 }
462
463 Int32 currentTaskIndex() const final;
464
465 void printInfos(std::ostream& o) const final;
466
467 public:
468
475 TaskThreadInfo* currentTaskThreadInfo() const;
476
477 private:
478
479 bool m_is_active = false;
480 Impl* m_p = nullptr;
481
482 private:
483
484 template <int RankValue> void
485 _executeMDParallelFor(const ComplexForLoopRanges<RankValue>& loop_ranges,
486 IMDRangeFunctor<RankValue>* functor,
487 const ForLoopRunInfo& run_info);
488 void _executeParallelFor(const ParallelFor1DLoopInfo& loop_info);
489};
490
491/*---------------------------------------------------------------------------*/
492/*---------------------------------------------------------------------------*/
493
494class TBBTaskImplementation::Impl
495{
496 class TaskObserver
497 : public tbb::task_scheduler_observer
498 {
499 public:
500
501 explicit TaskObserver(TBBTaskImplementation::Impl* p)
502 : tbb::task_scheduler_observer(p->m_main_arena)
503 , m_p(p)
504 {
505 }
506 void on_scheduler_entry(bool is_worker) override
507 {
508 m_p->notifyThreadCreated(is_worker);
509 }
510 void on_scheduler_exit(bool is_worker) override
511 {
512 m_p->notifyThreadDestroyed(is_worker);
513 }
515 };
516
517 public:
518
519 Impl()
520 : m_task_observer(this)
521 , m_thread_task_infos(cache_line_size)
522 {
523 m_nb_allowed_thread = tbb::info::default_concurrency();
524 _init();
525 }
526 Impl(Int32 nb_thread)
527 : m_main_arena(nb_thread)
528 , m_task_observer(this)
529 , m_thread_task_infos(cache_line_size)
530 {
531 m_nb_allowed_thread = nb_thread;
532 _init();
533 }
534
535 public:
536
537 Int32 nbAllowedThread() const { return m_nb_allowed_thread; }
538 TaskThreadInfo* threadTaskInfo(Integer index) { return &m_thread_task_infos[index]; }
539
540 private:
541
542 Int32 m_nb_allowed_thread = 0;
543
544 public:
545
546 void terminate()
547 {
548 for (auto x : m_sub_arena_list) {
549 if (x)
550 x->terminate();
551 delete x;
552 }
553 m_sub_arena_list.clear();
554 m_main_arena.terminate();
555 m_task_observer.observe(false);
556 oneapi::tbb::finalize(m_task_scheduler_handle);
557 }
558
559 public:
560
561 void notifyThreadCreated(bool is_worker)
562 {
563 std::thread::id my_thread_id = std::this_thread::get_id();
564
565 // Avec OneTBB, cette méthode est appelée à chaque fois qu'on rentre
566 // dans notre 'task_arena'. Comme il ne faut appeler qu'une seule
567 // fois la méthode de notification on utilise un ensemble pour
568 // conserver la liste des threads déjà créés.
569 // NOTE: On ne peut pas utiliser cette méthode avec la version TBB historique
570 // (2018) car cette méthode 'contains' n'existe pas
571 if (m_constructed_thread_map.contains(my_thread_id))
572 return;
573 m_constructed_thread_map.insert(my_thread_id);
574
575 {
576 if (TaskFactory::verboseLevel() >= 1) {
577 std::ostringstream ostr;
578 ostr << "TBB: CREATE THREAD"
579 << " nb_allowed=" << m_nb_allowed_thread
580 << " tbb_default_allowed=" << tbb::info::default_concurrency()
581 << " id=" << my_thread_id
582 << " arena_id=" << _currentTaskTreadIndex()
583 << " is_worker=" << is_worker
584 << "\n";
585 std::cout << ostr.str();
586 }
588 }
589 }
590
591 void notifyThreadDestroyed([[maybe_unused]] bool is_worker)
592 {
593 // Avec OneTBB, cette méthode est appelée à chaque fois qu'on sort
594 // de l'arène principale. Du coup elle ne correspond pas vraiment à une
595 // destruction de thread. On ne fait donc rien pour cette notification
596 // TODO: Regarder comment on peut être notifié de la destruction effective
597 // du thread.
598 }
599
600 private:
601
602#if TBB_VERSION_MAJOR > 2021 || (TBB_VERSION_MAJOR == 2021 && TBB_VERSION_MINOR > 5)
603 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = oneapi::tbb::attach();
604#else
605 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = tbb::task_scheduler_handle::get();
606#endif
607
608 public:
609
610 tbb::task_arena m_main_arena;
612 std::vector<tbb::task_arena*> m_sub_arena_list;
613
614 private:
615
616 TaskObserver m_task_observer;
617 std::mutex m_thread_created_mutex;
618 std::vector<TaskThreadInfo> m_thread_task_infos;
619 tbb::concurrent_set<std::thread::id> m_constructed_thread_map;
620 void _init()
621 {
622 ConcurrencyBase::_setMaxAllowedThread(m_nb_allowed_thread);
623
624 if (TaskFactory::verboseLevel() >= 1) {
625 std::cout << "TBB: TBBTaskImplementationInit nb_allowed_thread=" << m_nb_allowed_thread
626 << " id=" << std::this_thread::get_id()
627 << " version=" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR
628 << "\n";
629 }
630 m_thread_task_infos.resize(m_nb_allowed_thread);
631 m_task_observer.observe(true);
632 Integer max_arena_size = m_nb_allowed_thread;
633 // Limite artificiellement le nombre de tbb::task_arena
634 // pour éviter d'avoir trop d'objets alloués.
635 if (max_arena_size > 512)
636 max_arena_size = 512;
637 if (max_arena_size < 2)
638 max_arena_size = 2;
639 m_sub_arena_list.resize(max_arena_size);
640 m_sub_arena_list[0] = m_sub_arena_list[1] = nullptr;
641 for (Integer i = 2; i < max_arena_size; ++i)
642 m_sub_arena_list[i] = new tbb::task_arena(i);
643 }
644};
645
646/*---------------------------------------------------------------------------*/
647/*---------------------------------------------------------------------------*/
651class TBBParallelFor
652{
653 public:
654
655 TBBParallelFor(IRangeFunctor* f, Int32 nb_allowed_thread, ForLoopOneExecStat* stat_info)
656 : m_functor(f)
657 , m_stat_info(stat_info)
658 , m_nb_allowed_thread(nb_allowed_thread)
659 {}
660
661 public:
662
663 void operator()(tbb::blocked_range<Integer>& range) const
664 {
665#ifdef ARCCORE_CHECK
666 if (TaskFactory::verboseLevel() >= 3) {
667 std::ostringstream o;
668 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
669 << " id=" << std::this_thread::get_id()
670 << " max_allowed=" << m_nb_allowed_thread
671 << " range_begin=" << range.begin() << " range_size=" << range.size()
672 << "\n";
673 std::cout << o.str();
674 std::cout.flush();
675 }
676
677 int tbb_index = _currentTaskTreadIndex();
678 if (tbb_index < 0 || tbb_index >= m_nb_allowed_thread)
679 ARCCORE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
680 tbb_index, m_nb_allowed_thread);
681#endif
682
683 if (m_stat_info)
684 m_stat_info->incrementNbChunk();
685 m_functor->executeFunctor(range.begin(), CheckedConvert::toInteger(range.size()));
686 }
687
688 private:
689
690 IRangeFunctor* m_functor;
691 ForLoopOneExecStat* m_stat_info = nullptr;
692 Int32 m_nb_allowed_thread;
693};
694
695/*---------------------------------------------------------------------------*/
696/*---------------------------------------------------------------------------*/
700template <int RankValue>
701class TBBMDParallelFor
702{
703 public:
704
705 TBBMDParallelFor(IMDRangeFunctor<RankValue>* f, Int32 nb_allowed_thread, ForLoopOneExecStat* stat_info)
706 : m_functor(f)
707 , m_stat_info(stat_info)
708 , m_nb_allowed_thread(nb_allowed_thread)
709 {}
710
711 public:
712
713 void operator()(blocked_nd_range<Int32, RankValue>& range) const
714 {
715#ifdef ARCCORE_CHECK
716 if (TaskFactory::verboseLevel() >= 3) {
717 std::ostringstream o;
718 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
719 << " id=" << std::this_thread::get_id()
720 << " max_allowed=" << m_nb_allowed_thread
721 << " MDFor ";
722 for (Int32 i = 0; i < RankValue; ++i) {
723 auto r0 = static_cast<Int32>(range.dim(i).begin());
724 auto r1 = static_cast<Int32>(range.dim(i).size());
725 o << " range" << i << " (begin=" << r0 << " size=" << r1 << ")";
726 }
727 o << "\n";
728 std::cout << o.str();
729 std::cout.flush();
730 }
731
732 int tbb_index = _currentTaskTreadIndex();
733 if (tbb_index < 0 || tbb_index >= m_nb_allowed_thread)
734 ARCCORE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
735 tbb_index, m_nb_allowed_thread);
736#endif
737
738 if (m_stat_info)
739 m_stat_info->incrementNbChunk();
740 m_functor->executeFunctor(_fromTBBRange(range));
741 }
742
743 private:
744
745 IMDRangeFunctor<RankValue>* m_functor = nullptr;
746 ForLoopOneExecStat* m_stat_info = nullptr;
747 Int32 m_nb_allowed_thread;
748};
749
750/*---------------------------------------------------------------------------*/
751/*---------------------------------------------------------------------------*/
769class TBBDeterministicParallelFor
770{
771 public:
772
773 TBBDeterministicParallelFor(TBBTaskImplementation* impl, const TBBParallelFor& tbb_for,
774 Integer begin_index, Integer size, Integer grain_size, Integer nb_thread)
775 : m_impl(impl)
776 , m_tbb_for(tbb_for)
777 , m_nb_thread(nb_thread)
778 , m_begin_index(begin_index)
779 , m_size(size)
780 , m_grain_size(grain_size)
781 , m_nb_block(0)
782 , m_block_size(0)
783 , m_nb_block_per_thread(0)
784 {
785 if (m_nb_thread < 1)
786 m_nb_thread = 1;
787
788 if (m_grain_size > 0) {
789 m_block_size = m_grain_size;
790 if (m_block_size > 0) {
791 m_nb_block = m_size / m_block_size;
792 if ((m_size % m_block_size) != 0)
793 ++m_nb_block;
794 }
795 else
796 m_nb_block = 1;
797 m_nb_block_per_thread = m_nb_block / m_nb_thread;
798 if ((m_nb_block % m_nb_thread) != 0)
799 ++m_nb_block_per_thread;
800 }
801 else {
802 if (m_nb_block < 1)
803 m_nb_block = m_nb_thread;
804 m_block_size = m_size / m_nb_block;
805 m_nb_block_per_thread = 1;
806 }
807 if (TaskFactory::verboseLevel() >= 2) {
808 std::cout << "TBBDeterministicParallelFor: BEGIN=" << m_begin_index << " size=" << m_size
809 << " grain_size=" << m_grain_size
810 << " nb_block=" << m_nb_block << " nb_thread=" << m_nb_thread
811 << " nb_block_per_thread=" << m_nb_block_per_thread
812 << " block_size=" << m_block_size
813 << " block_size*nb_block=" << m_block_size * m_nb_block << '\n';
814 }
815 }
816
817 public:
818
825 void operator()(tbb::blocked_range<Integer>& range) const
826 {
827 auto nb_iter = static_cast<Integer>(range.size());
828 for (Integer i = 0; i < nb_iter; ++i) {
829 Integer task_id = range.begin() + i;
830 for (Integer k = 0, kn = m_nb_block_per_thread; k < kn; ++k) {
831 Integer block_id = task_id + (k * m_nb_thread);
832 if (block_id < m_nb_block)
833 _doBlock(task_id, block_id);
834 }
835 }
836 }
837
838 void _doBlock(Integer task_id, Integer block_id) const
839 {
840 TBBTaskImplementation::TaskInfoLockGuard guard(m_impl->currentTaskThreadInfo(), task_id);
841
842 Integer iter_begin = block_id * m_block_size;
843 Integer iter_size = m_block_size;
844 if ((block_id + 1) == m_nb_block) {
845 // Pour le dernier bloc, la taille est le nombre d'éléments restants
846 iter_size = m_size - iter_begin;
847 }
848 iter_begin += m_begin_index;
849#ifdef ARCCORE_CHECK
850 if (TaskFactory::verboseLevel() >= 3) {
851 std::ostringstream o;
852 o << "TBB: DoBlock: BLOCK task_id=" << task_id << " block_id=" << block_id
853 << " iter_begin=" << iter_begin << " iter_size=" << iter_size << '\n';
854 std::cout << o.str();
855 std::cout.flush();
856 }
857#endif
858 if (iter_size > 0) {
859 auto r = tbb::blocked_range<int>(iter_begin, iter_begin + iter_size);
860 m_tbb_for(r);
861 }
862 }
863
864 private:
865
866 TBBTaskImplementation* m_impl;
867 const TBBParallelFor& m_tbb_for;
868 Integer m_nb_thread;
869 Integer m_begin_index;
870 Integer m_size;
871 Integer m_grain_size;
872 Integer m_nb_block;
873 Integer m_block_size;
874 Integer m_nb_block_per_thread;
875};
876
877/*---------------------------------------------------------------------------*/
878/*---------------------------------------------------------------------------*/
879
881{
882 public:
883
884 ParallelForExecute(TBBTaskImplementation* impl, const ParallelLoopOptions& options,
885 Integer begin, Integer size, IRangeFunctor* f, ForLoopOneExecStat* stat_info)
886 : m_impl(impl)
887 , m_begin(begin)
888 , m_size(size)
889 , m_functor(f)
890 , m_options(options)
891 , m_stat_info(stat_info)
892 {}
893
894 public:
895
896 void operator()() const
897 {
898 Integer nb_thread = m_options.maxThread();
899 TBBParallelFor pf(m_functor, nb_thread, m_stat_info);
900 Integer gsize = m_options.grainSize();
901 tbb::blocked_range<Integer> range(m_begin, m_begin + m_size);
902 if (TaskFactory::verboseLevel() >= 1)
903 std::cout << "TBB: TBBTaskImplementationInit ParallelForExecute begin=" << m_begin
904 << " size=" << m_size << " gsize=" << gsize
905 << " partitioner=" << (int)m_options.partitioner()
906 << " nb_thread=" << nb_thread
907 << " has_stat_info=" << (m_stat_info != nullptr)
908 << '\n';
909
910 if (gsize > 0)
911 range = tbb::blocked_range<Integer>(m_begin, m_begin + m_size, gsize);
912
913 if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Static) {
914 tbb::parallel_for(range, pf, tbb::static_partitioner());
915 }
916 else if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Deterministic) {
917 tbb::blocked_range<Integer> range2(0, nb_thread, 1);
918 TBBDeterministicParallelFor dpf(m_impl, pf, m_begin, m_size, gsize, nb_thread);
919 tbb::parallel_for(range2, dpf);
920 }
921 else
922 tbb::parallel_for(range, pf);
923 }
924
925 private:
926
927 TBBTaskImplementation* m_impl = nullptr;
928 Integer m_begin;
929 Integer m_size;
930 IRangeFunctor* m_functor = nullptr;
931 ParallelLoopOptions m_options;
932 ForLoopOneExecStat* m_stat_info = nullptr;
933};
934
935/*---------------------------------------------------------------------------*/
936/*---------------------------------------------------------------------------*/
937
938template <int RankValue>
940{
941 public:
942
943 MDParallelForExecute(TBBTaskImplementation* impl,
944 const ParallelLoopOptions& options,
946 IMDRangeFunctor<RankValue>* f, [[maybe_unused]] ForLoopOneExecStat* stat_info)
947 : m_impl(impl)
948 , m_tbb_range(_toTBBRange(range))
949 , m_functor(f)
950 , m_options(options)
951 , m_stat_info(stat_info)
952 {
953 // On ne peut pas modifier les valeurs d'une instance de tbb::blocked_rangeNd.
954 // Il faut donc en reconstruire une complètement.
955 FixedArray<size_t, RankValue> all_grain_sizes;
956 Int32 gsize = m_options.grainSize();
957 if (gsize > 0) {
958 // Si la taille du grain est différent zéro, il faut la répartir
959 // sur l'ensemble des dimensions. On commence par la dernière.
960 // TODO: regarder pourquoi dans certains cas les performances sont
961 // inférieures à celles qu'on obtient en utilisant un partitionneur
962 // statique.
963 constexpr bool is_verbose = false;
964 std::array<Int32, RankValue> range_extents = range.extents().asStdArray();
965 double ratio = static_cast<double>(gsize) / static_cast<double>(range.nbElement());
966 if constexpr (is_verbose) {
967 std::cout << "GSIZE=" << gsize << " rank=" << RankValue << " ratio=" << ratio;
968 for (Int32 i = 0; i < RankValue; ++i)
969 std::cout << " range" << i << "=" << range_extents[i];
970 std::cout << "\n";
971 }
972 Int32 index = RankValue - 1;
973 Int32 remaining_grain = gsize;
974 for (; index >= 0; --index) {
975 Int32 current = range_extents[index];
976 if constexpr (is_verbose)
977 std::cout << "Check index=" << index << " remaining=" << remaining_grain << " current=" << current << "\n";
978 if (remaining_grain > current) {
979 all_grain_sizes[index] = current;
980 remaining_grain /= current;
981 }
982 else {
983 all_grain_sizes[index] = remaining_grain;
984 break;
985 }
986 }
987 for (Int32 i = 0; i < index; ++i)
988 all_grain_sizes[i] = 1;
989 if constexpr (is_verbose) {
990 for (Int32 i = 0; i < RankValue; ++i)
991 std::cout << " grain" << i << "=" << all_grain_sizes[i];
992 std::cout << "\n";
993 }
994 m_tbb_range = _toTBBRangeWithGrain(m_tbb_range, all_grain_sizes);
995 }
996 }
997
998 public:
999
1000 void operator()() const
1001 {
1002 Integer nb_thread = m_options.maxThread();
1003 TBBMDParallelFor<RankValue> pf(m_functor, nb_thread, m_stat_info);
1004
1005 if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Static) {
1006 tbb::parallel_for(m_tbb_range, pf, tbb::static_partitioner());
1007 }
1008 else if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Deterministic) {
1009 // TODO: implémenter le mode déterministe
1010 ARCCORE_THROW(NotImplementedException, "ParallelLoopOptions::Partitioner::Deterministic for multi-dimensionnal loops");
1011 //tbb::blocked_range<Integer> range2(0,nb_thread,1);
1012 //TBBDeterministicParallelFor dpf(m_impl,pf,m_begin,m_size,gsize,nb_thread);
1013 //tbb::parallel_for(range2,dpf);
1014 }
1015 else {
1016 tbb::parallel_for(m_tbb_range, pf);
1017 }
1018 }
1019
1020 private:
1021
1022 TBBTaskImplementation* m_impl = nullptr;
1023 blocked_nd_range<Int32, RankValue> m_tbb_range;
1024 IMDRangeFunctor<RankValue>* m_functor = nullptr;
1025 ParallelLoopOptions m_options;
1026 ForLoopOneExecStat* m_stat_info = nullptr;
1027};
1028
1029/*---------------------------------------------------------------------------*/
1030/*---------------------------------------------------------------------------*/
1031
1032TBBTaskImplementation::
1033~TBBTaskImplementation()
1034{
1035 delete m_p;
1036}
1037
1038/*---------------------------------------------------------------------------*/
1039/*---------------------------------------------------------------------------*/
1040
1042initialize(Int32 nb_thread)
1043{
1044 if (nb_thread < 0)
1045 nb_thread = 0;
1046 m_is_active = (nb_thread != 1);
1047 if (nb_thread != 0)
1048 m_p = new Impl(nb_thread);
1049 else
1050 m_p = new Impl();
1054}
1055
1056/*---------------------------------------------------------------------------*/
1057/*---------------------------------------------------------------------------*/
1058
1060terminate()
1061{
1062 m_p->terminate();
1063}
1064
1065/*---------------------------------------------------------------------------*/
1066/*---------------------------------------------------------------------------*/
1067
1069printInfos(std::ostream& o) const
1070{
1071 o << "OneTBBTaskImplementation"
1072 << " version=" << TBB_VERSION_STRING
1073 << " interface=" << TBB_INTERFACE_VERSION
1074 << " runtime_interface=" << TBB_runtime_interface_version();
1075}
1076
1077/*---------------------------------------------------------------------------*/
1078/*---------------------------------------------------------------------------*/
1079
1080void TBBTaskImplementation::
1081_executeParallelFor(const ParallelFor1DLoopInfo& loop_info)
1082{
1083 ScopedExecInfo sei(loop_info.runInfo());
1084 ForLoopOneExecStat* stat_info = sei.statInfo();
1085 ::Arcane::Impl::ScopedStatLoop scoped_loop(sei.isOwn() ? stat_info : nullptr);
1086
1087 Int32 begin = loop_info.beginIndex();
1088 Int32 size = loop_info.size();
1089 ParallelLoopOptions options = loop_info.runInfo().options().value_or(TaskFactory::defaultParallelLoopOptions());
1090 IRangeFunctor* f = loop_info.functor();
1092
1093 Integer max_thread = options.maxThread();
1094 Integer nb_allowed_thread = m_p->nbAllowedThread();
1095 if (max_thread < 0)
1096 max_thread = nb_allowed_thread;
1097
1098 if (TaskFactory::verboseLevel() >= 1)
1099 std::cout << "TBB: TBBTaskImplementation executeParallelFor begin=" << begin
1100 << " size=" << size << " max_thread=" << max_thread
1101 << " grain_size=" << options.grainSize()
1102 << " nb_allowed=" << nb_allowed_thread << '\n';
1103
1104 // En exécution séquentielle, appelle directement la méthode \a f.
1105 if (max_thread == 1 || max_thread == 0) {
1106 f->executeFunctor(begin, size);
1107 return;
1108 }
1109
1110 // Remplace les valeurs non initialisées de \a options par celles de \a m_default_loop_options
1111 ParallelLoopOptions true_options(options);
1112 true_options.mergeUnsetValues(TaskFactory::defaultParallelLoopOptions());
1113 true_options.setMaxThread(max_thread);
1114
1115 ParallelForExecute pfe(this, true_options, begin, size, f, stat_info);
1116
1117 tbb::task_arena* used_arena = nullptr;
1118 if (max_thread < nb_allowed_thread && max_thread < m_p->m_sub_arena_list.size())
1119 used_arena = m_p->m_sub_arena_list[max_thread];
1120 if (!used_arena)
1121 used_arena = &(m_p->m_main_arena);
1122 used_arena->execute(pfe);
1123}
1124
1125/*---------------------------------------------------------------------------*/
1126/*---------------------------------------------------------------------------*/
1127
1130{
1131 _executeParallelFor(loop_info);
1132}
1133
1134/*---------------------------------------------------------------------------*/
1135/*---------------------------------------------------------------------------*/
1142template <int RankValue> void TBBTaskImplementation::
1145 const ForLoopRunInfo& run_info)
1146{
1147 ParallelLoopOptions options;
1148 if (run_info.options().has_value())
1149 options = run_info.options().value();
1150
1151 ScopedExecInfo sei(run_info);
1152 ForLoopOneExecStat* stat_info = sei.statInfo();
1153 ::Arcane::Impl::ScopedStatLoop scoped_loop(sei.isOwn() ? stat_info : nullptr);
1154
1155 if (TaskFactory::verboseLevel() >= 1) {
1156 std::cout << "TBB: TBBTaskImplementation executeMDParallelFor nb_dim=" << RankValue
1157 << " nb_element=" << loop_ranges.nbElement()
1158 << " grain_size=" << options.grainSize()
1159 << " name=" << run_info.traceInfo().traceInfo()
1160 << " has_stat_info=" << (stat_info != nullptr)
1161 << '\n';
1162 }
1163
1164 Integer max_thread = options.maxThread();
1165 // En exécution séquentielle, appelle directement la méthode \a f.
1166 if (max_thread == 1 || max_thread == 0) {
1167 functor->executeFunctor(loop_ranges);
1168 return;
1169 }
1170
1171 // Remplace les valeurs non initialisées de \a options par celles de \a m_default_loop_options
1172 ParallelLoopOptions true_options(options);
1174
1175 Integer nb_allowed_thread = m_p->nbAllowedThread();
1176 if (max_thread < 0)
1177 max_thread = nb_allowed_thread;
1178 tbb::task_arena* used_arena = nullptr;
1179 if (max_thread < nb_allowed_thread)
1180 used_arena = m_p->m_sub_arena_list[max_thread];
1181 if (!used_arena)
1182 used_arena = &(m_p->m_main_arena);
1183
1184 // Pour l'instant pour la dimension 1, utilise le 'ParallelForExecute' historique
1185 if constexpr (RankValue == 1) {
1186 auto range_1d = _toTBBRange(loop_ranges);
1187 auto x1 = [&](Integer begin, Integer size) {
1188 functor->executeFunctor(makeLoopRanges(ForLoopRange(begin, size)));
1189 //functor->executeFunctor(ComplexForLoopRanges<1>(begin,size));
1190 };
1191 LambdaRangeFunctorT<decltype(x1)> functor_1d(x1);
1192 Integer begin1 = CheckedConvert::toInteger(range_1d.dim(0).begin());
1193 Integer size1 = CheckedConvert::toInteger(range_1d.dim(0).size());
1194 ParallelForExecute pfe(this, true_options, begin1, size1, &functor_1d, stat_info);
1195 used_arena->execute(pfe);
1196 }
1197 else {
1198 MDParallelForExecute<RankValue> pfe(this, true_options, loop_ranges, functor, stat_info);
1199 used_arena->execute(pfe);
1200 }
1201}
1202
1203/*---------------------------------------------------------------------------*/
1204/*---------------------------------------------------------------------------*/
1205
1207executeParallelFor(Integer begin, Integer size, Integer grain_size, IRangeFunctor* f)
1208{
1210 opts.setGrainSize(grain_size);
1211 ForLoopRunInfo run_info(opts);
1212 executeParallelFor(ParallelFor1DLoopInfo(begin, size, f, run_info));
1213}
1214
1215/*---------------------------------------------------------------------------*/
1216/*---------------------------------------------------------------------------*/
1217
1223
1224/*---------------------------------------------------------------------------*/
1225/*---------------------------------------------------------------------------*/
1226
1227TBBTaskImplementation::TaskThreadInfo* TBBTaskImplementation::
1229{
1230 Int32 thread_id = currentTaskThreadIndex();
1231 if (thread_id >= 0)
1232 return m_p->threadTaskInfo(thread_id);
1233 return nullptr;
1234}
1235
1236/*---------------------------------------------------------------------------*/
1237/*---------------------------------------------------------------------------*/
1238
1240currentTaskIndex() const
1241{
1242 Int32 thread_id = currentTaskThreadIndex();
1243 // Ce test avait été ajouté pour coutourner un bug dans une des versions
1244 // de OneTBB. Il est surement inutile aujourd'hui (2025)
1245 if (thread_id < 0 || thread_id >= m_p->nbAllowedThread())
1246 return 0;
1247 TBBTaskImplementation::TaskThreadInfo* tti = currentTaskThreadInfo();
1248 if (tti) {
1249 Int32 task_index = tti->taskIndex();
1250 if (task_index >= 0)
1251 return task_index;
1252 }
1253 return thread_id;
1254}
1255
1256/*---------------------------------------------------------------------------*/
1257/*---------------------------------------------------------------------------*/
1258
1261{
1262 tbb::task_group task_group;
1263 task_group.run(taskFunctor());
1264 task_group.wait();
1265 delete this;
1266}
1267
1268/*---------------------------------------------------------------------------*/
1269/*---------------------------------------------------------------------------*/
1270
1273{
1274 tbb::task_group task_group;
1275 Integer n = tasks.size();
1276 if (n == 0)
1277 return;
1278
1279 //set_ref_count(n+1);
1280 for (Integer i = 0; i < n; ++i) {
1281 auto* t = static_cast<OneTBBTask*>(tasks[i]);
1282 task_group.run(t->taskFunctor());
1283 }
1284 task_group.wait();
1285 for (Integer i = 0; i < n; ++i) {
1286 auto* t = static_cast<OneTBBTask*>(tasks[i]);
1287 delete t;
1288 }
1289}
1290
1291/*---------------------------------------------------------------------------*/
1292/*---------------------------------------------------------------------------*/
1293
1294ITask* OneTBBTask::
1295_createChildTask(ITaskFunctor* functor)
1296{
1297 auto* t = new OneTBBTask(functor);
1298 return t;
1299}
1300
1301/*---------------------------------------------------------------------------*/
1302/*---------------------------------------------------------------------------*/
1303
1304ARCANE_DI_REGISTER_PROVIDER(TBBTaskImplementation,
1305 DependencyInjection::ProviderProperty("TBBTaskImplementation"),
1306 ARCANE_DI_INTERFACES(ITaskImplementation),
1307 ARCANE_DI_EMPTY_CONSTRUCTOR());
1308
1309/*---------------------------------------------------------------------------*/
1310/*---------------------------------------------------------------------------*/
1311
1312} // End namespace Arcane
1313
1314/*---------------------------------------------------------------------------*/
1315/*---------------------------------------------------------------------------*/
#define ARCCORE_FATAL(...)
Macro envoyant une exception FatalErrorException.
#define ARCCORE_THROW(exception_class,...)
Macro pour envoyer une exception avec formattage.
#define ARCCORE_CHECK_POINTER(ptr)
Macro retournant le pointeur ptr s'il est non nul ou lancant une exception s'il est nul.
static void _setMaxAllowedThread(Int32 v)
Positionne le nombre maximum de thread à utiliser.
Vue constante d'un tableau de type T.
constexpr Integer size() const noexcept
Nombre d'éléments du tableau.
Classe pour gérer le profiling d'une seule exécution d'une boucle.
Intervalle d'itération pour une boucle.
Informations d'exécution d'une boucle.
Interface d'un fonctor sur un interval d'itération multi-dimensionnel de dimension RankValue.
Interface d'un fonctor sur un interval d'itération.
virtual void executeFunctor(Int32 begin, Int32 size)=0
Exécute la méthode associée.
Interface d'un fonctor pour une tâche.
Definition Task.h:73
virtual void executeFunctor(const TaskContext &tc)=0
Exécute la méthode associé
Implémentation d'une fabrique de tâches.
Int32 nbAllowedThread() const
Nombre de threads utilisés au maximum pour gérer les tâches.
Interface d'une tâche concourante.
Definition Task.h:188
Classe permettant de récupérer le temps passé entre l'appel au constructeur et au destructeur.
Fonctor sur un interval d'itération instancié via une lambda fonction.
Exception lorsqu'une fonction n'est pas implémentée.
void launchAndWait() override
Lance la tâche et bloque jusqu'à ce qu'elle se termine.
Caractéristiques d'un boucle 1D multi-thread.
Definition ParallelFor.h:34
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
void mergeUnsetValues(const ParallelLoopOptions &po)
Fusionne les valeurs non modifiées de l'instance par celles de po.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
void setMaxThread(Integer v)
Positionne le nombre maximal de threads autorisé.
@ Deterministic
Utilise un partitionnement et un ordonnancement statique.
static Impl::ForLoopStatInfoList * _threadLocalForLoopInstance()
Definition Profiling.cc:194
static bool hasProfiling()
Indique si le profilage est actif.
Implémentation déterministe de ParallelFor.
void operator()(tbb::blocked_range< Integer > &range) const
Opérateur pour un thread donné.
Exécuteur pour une boucle multi-dimension.
Exécuteur pour une boucle 1D.
std::vector< tbb::task_arena * > m_sub_arena_list
Tableau dont le i-ème élément contient la tbb::task_arena pour i thread.
Classe pour positionner TaskThreadInfo::taskIndex().
Int32 currentTaskThreadIndex() const final
Implémentation de TaskFactory::currentTaskThreadIndex()
void initialize(Int32 nb_thread) override
void executeParallelFor(const ComplexForLoopRanges< 1 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 1 > *functor) final
Exécute une boucle 1D en concurrence.
void executeParallelFor(Int32 begin, Int32 size, IRangeFunctor *f) final
Exécute le fonctor f en concurrence.
ITask * createRootTask(ITaskFunctor *f) override
Créé une tâche racine. L'implémentation doit recopier la valeur de f qui est soit un TaskFunctor,...
void printInfos(std::ostream &o) const final
Affiche les informations sur le runtime utilisé
void executeParallelFor(Int32 begin, Int32 size, const ParallelLoopOptions &options, IRangeFunctor *f) final
Exécute le fonctor f en concurrence.
void executeParallelFor(const ComplexForLoopRanges< 3 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 3 > *functor) final
Exécute une boucle 3D en concurrence.
void executeParallelFor(const ComplexForLoopRanges< 4 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 4 > *functor) final
Exécute une boucle 4D en concurrence.
TaskThreadInfo * currentTaskThreadInfo() const
Instance de TaskThreadInfo associé au thread courant.
bool isActive() const final
Indique si l'implémentation est active.
void _executeMDParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, IMDRangeFunctor< RankValue > *functor, const ForLoopRunInfo &run_info)
Exécution d'une boucle N-dimensions.
Int32 currentTaskIndex() const final
Implémentation de TaskFactory::currentTaskIndex()
void executeParallelFor(const ComplexForLoopRanges< 2 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 2 > *functor) final
Exécute une boucle 2D en concurrence.
Contexte d'éxecution d'une tâche.
Definition Task.h:48
static void notifyThreadCreated()
Notifie tous les observateurs de création de thread.
static const ParallelLoopOptions & defaultParallelLoopOptions()
Valeurs par défaut d'exécution d'une boucle parallèle.
static Integer verboseLevel()
Niveau de verbosité
static void setDefaultParallelLoopOptions(const ParallelLoopOptions &v)
Positionne les valeurs par défaut d'exécution d'une boucle parallèle.
static Int32 currentTaskThreadIndex()
Indice (entre 0 et nbAllowedThread()-1) du thread exécutant la tâche actuelle.
ARCCORE_BASE_EXPORT String getStackTrace()
Retourne une chaîne de caractere contenant la pile d'appel.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
Int32 Integer
Type représentant un entier.
SimpleForLoopRanges< 1 > makeLoopRanges(Int32 n1)
Créé un intervalle d'itération [0,n1[.
std::int32_t Int32
Type entier signé sur 32 bits.