Arcane  v4.1.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
TBBTaskImplementation.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* TBBTaskImplementation.cc (C) 2000-2025 */
9/* */
10/* Implémentation des tâches utilisant TBB (Intel Threads Building Blocks). */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arccore/base/NotImplementedException.h"
15#include "arccore/base/IFunctor.h"
16#include "arccore/base/ForLoopRanges.h"
17#include "arccore/base/IObservable.h"
18#include "arccore/base/PlatformUtils.h"
19#include "arccore/base/FixedArray.h"
20#include "arccore/base/Profiling.h"
21#include "arccore/base/CheckedConvert.h"
22#include "arccore/base/FixedArray.h"
23#include "arccore/base/ForLoopRunInfo.h"
24#include "arccore/base/internal/DependencyInjection.h"
25
26#include "arccore/concurrency/IThreadImplementation.h"
27#include "arccore/concurrency/Task.h"
28#include "arccore/concurrency/ITaskImplementation.h"
29#include "arccore/concurrency/TaskFactory.h"
30#include "arccore/concurrency/ParallelFor.h"
31#include "arccore/concurrency/internal/TaskFactoryInternal.h"
32
33#include "arcane/utils/Array.h"
34
35#include <new>
36#include <stack>
37
38// Il faut définir cette macro pour que la classe 'blocked_rangeNd' soit disponible
39
40#define TBB_PREVIEW_BLOCKED_RANGE_ND 1
41
42// la macro 'ARCANE_USE_ONETBB' est définie dans le CMakeLists.txt
43// si on compile avec la version OneTBB version 2021+
44// (https://github.com/oneapi-src/oneTBB.git)
45// A terme ce sera la seule version supportée par Arcane.
46
47// Nécessaire pour avoir accès à task_scheduler_handle
48#define TBB_PREVIEW_WAITING_FOR_WORKERS 1
49#include <tbb/tbb.h>
50#include <oneapi/tbb/concurrent_set.h>
51#include <oneapi/tbb/global_control.h>
52
53#include <thread>
54#include <mutex>
55
56/*---------------------------------------------------------------------------*/
57/*---------------------------------------------------------------------------*/
58
59namespace Arcane
60{
61
63
64// TODO: utiliser un pool mémoire spécifique pour gérer les
65// OneTBBTask pour optimiser les new/delete des instances de cette classe.
66// Auparavant avec les anciennes versions de TBB cela était géré avec
67// la méthode 'tbb::task::allocate_child()'.
68
69/*---------------------------------------------------------------------------*/
70/*---------------------------------------------------------------------------*/
71
72#if (TBB_VERSION_MAJOR > 2022) || (TBB_VERSION_MAJOR == 2022 && TBB_VERSION_MINOR > 0) || defined __TBB_blocked_nd_range_H
73
74// La classe "blocked_rangeNd" a été retirée dans la version
75// 2022.0.0 et remplacée par "blocked_nd_range".
76template <typename Value, unsigned int N>
77using blocked_nd_range = tbb::blocked_nd_range<Value, N>;
78
79#else
80
81template <typename Value, unsigned int N>
82using blocked_nd_range = tbb::blocked_rangeNd<Value, N>;
83
84#endif
85
86/*---------------------------------------------------------------------------*/
87/*---------------------------------------------------------------------------*/
88
89namespace
90{
91
92 // Positif si on récupère les statistiques d'exécution
93 bool isStatActive()
94 {
96 }
97
102 class ScopedExecInfo
103 {
104 public:
105
106 explicit ScopedExecInfo(const ForLoopRunInfo& run_info)
107 : m_run_info(run_info)
108 {
109 // Si run_info.execInfo() n'est pas nul, on l'utilise.
110 // Cela signifie que c'est l'appelant de qui va gérer les statistiques
111 // d'exécution. Sinon, on utilise \a m_stat_info si les statistiques
112 // d'exécution sont demandées.
113 ForLoopOneExecStat* ptr = run_info.execStat();
114 if (ptr) {
115 m_stat_info_ptr = ptr;
116 m_use_own_run_info = false;
117 }
118 else
119 m_stat_info_ptr = isStatActive() ? &m_stat_info : nullptr;
120 }
121 ~ScopedExecInfo()
122 {
123#ifdef PRINT_STAT_INFO
124 if (m_stat_info_ptr) {
125 bool is_valid = m_run_info.traceInfo().isValid();
126 if (!is_valid)
127 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
128 << " stack=" << platform::getStackTrace()
129 << "\n";
130 else
131 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
132 << " trace_name=" << m_run_info.traceInfo().traceInfo().name() << "\n";
133 }
134#endif
135 if (m_stat_info_ptr && m_use_own_run_info) {
136 ProfilingRegistry::_threadLocalForLoopInstance()->merge(*m_stat_info_ptr, m_run_info.traceInfo());
137 }
138 }
139
140 public:
141
142 ForLoopOneExecStat* statInfo() const { return m_stat_info_ptr; }
143 bool isOwn() const { return m_use_own_run_info; }
144
145 private:
146
147 ForLoopOneExecStat m_stat_info;
148 ForLoopOneExecStat* m_stat_info_ptr = nullptr;
149 ForLoopRunInfo m_run_info;
151 bool m_use_own_run_info = true;
152 };
153
154 /*---------------------------------------------------------------------------*/
155 /*---------------------------------------------------------------------------*/
156
157 inline int _currentTaskTreadIndex()
158 {
159 // NOTE: Avec OneTBB 2021, la valeur n'est plus '0' si on appelle cette méthode
160 // depuis un thread en dehors d'un task_arena. Avec la version 2021,
161 // la valeur est 65535.
162 // NOTE: Il semble que cela soit un bug de la 2021.3.
163 return tbb::this_task_arena::current_thread_index();
164 }
165
166 inline blocked_nd_range<Int32, 1>
167 _toTBBRange(const ComplexForLoopRanges<1>& r)
168 {
169 return { { r.lowerBound<0>(), r.upperBound<0>() } };
170 }
171
172 inline blocked_nd_range<Int32, 2>
173 _toTBBRange(const ComplexForLoopRanges<2>& r)
174 {
175 return { { r.lowerBound<0>(), r.upperBound<0>() },
176 { r.lowerBound<1>(), r.upperBound<1>() } };
177 }
178
179 inline blocked_nd_range<Int32, 3>
180 _toTBBRange(const ComplexForLoopRanges<3>& r)
181 {
182 return { { r.lowerBound<0>(), r.upperBound<0>() },
183 { r.lowerBound<1>(), r.upperBound<1>() },
184 { r.lowerBound<2>(), r.upperBound<2>() } };
185 }
186
187 inline blocked_nd_range<Int32, 4>
188 _toTBBRange(const ComplexForLoopRanges<4>& r)
189 {
190 return { { r.lowerBound<0>(), r.upperBound<0>() },
191 { r.lowerBound<1>(), r.upperBound<1>() },
192 { r.lowerBound<2>(), r.upperBound<2>() },
193 { r.lowerBound<3>(), r.upperBound<3>() } };
194 }
195
196 /*---------------------------------------------------------------------------*/
197 /*---------------------------------------------------------------------------*/
198
199 inline blocked_nd_range<Int32, 2>
200 _toTBBRangeWithGrain(const blocked_nd_range<Int32, 2>& r, FixedArray<size_t, 2> grain_sizes)
201 {
202 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
203 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] } };
204 }
205
206 inline blocked_nd_range<Int32, 3>
207 _toTBBRangeWithGrain(const blocked_nd_range<Int32, 3>& r, FixedArray<size_t, 3> grain_sizes)
208 {
209 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
210 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] },
211 { r.dim(2).begin(), r.dim(2).end(), grain_sizes[2] } };
212 }
213
214 inline blocked_nd_range<Int32, 4>
215 _toTBBRangeWithGrain(const blocked_nd_range<Int32, 4>& r, FixedArray<size_t, 4> grain_sizes)
216 {
217 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
218 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] },
219 { r.dim(2).begin(), r.dim(2).end(), grain_sizes[2] },
220 { r.dim(3).begin(), r.dim(3).end(), grain_sizes[3] } };
221 }
222
223 /*---------------------------------------------------------------------------*/
224 /*---------------------------------------------------------------------------*/
225
227 _fromTBBRange(const blocked_nd_range<Int32, 2>& r)
228 {
229 using BoundsType = ArrayBounds<MDDim2>;
230 using ArrayExtentType = BoundsType::ArrayExtentType;
231
232 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin()));
233 auto s0 = static_cast<Int32>(r.dim(0).size());
234 auto s1 = static_cast<Int32>(r.dim(1).size());
235 BoundsType sizes(ArrayExtentType(s0, s1));
236 return { lower_bounds, sizes };
237 }
238
240 _fromTBBRange(const blocked_nd_range<Int32, 3>& r)
241 {
242 using BoundsType = ArrayBounds<MDDim3>;
243 using ArrayExtentType = BoundsType::ArrayExtentType;
244
245 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin(), r.dim(2).begin()));
246 auto s0 = static_cast<Int32>(r.dim(0).size());
247 auto s1 = static_cast<Int32>(r.dim(1).size());
248 auto s2 = static_cast<Int32>(r.dim(2).size());
249 BoundsType sizes(ArrayExtentType(s0, s1, s2));
250 return { lower_bounds, sizes };
251 }
252
254 _fromTBBRange(const blocked_nd_range<Int32, 4>& r)
255 {
256 using BoundsType = ArrayBounds<MDDim4>;
257 using ArrayExtentType = typename BoundsType::ArrayExtentType;
258
259 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin(), r.dim(2).begin(), r.dim(3).begin()));
260 auto s0 = static_cast<Int32>(r.dim(0).size());
261 auto s1 = static_cast<Int32>(r.dim(1).size());
262 auto s2 = static_cast<Int32>(r.dim(2).size());
263 auto s3 = static_cast<Int32>(r.dim(3).size());
264 BoundsType sizes(ArrayExtentType(s0, s1, s2, s3));
265 return { lower_bounds, sizes };
266 }
267
268} // namespace
269
270/*---------------------------------------------------------------------------*/
271/*---------------------------------------------------------------------------*/
272
273class OneTBBTaskFunctor
274{
275 public:
276
277 OneTBBTaskFunctor(ITaskFunctor* functor, ITask* task)
278 : m_functor(functor)
279 , m_task(task)
280 {}
281
282 public:
283
284 void operator()() const
285 {
286 if (m_functor) {
287 ITaskFunctor* tf = m_functor;
288 m_functor = nullptr;
289 TaskContext task_context(m_task);
290 //cerr << "FUNC=" << typeid(*tf).name();
291 tf->executeFunctor(task_context);
292 }
293 }
294
295 public:
296
297 mutable ITaskFunctor* m_functor;
298 ITask* m_task;
299};
300
301/*---------------------------------------------------------------------------*/
302/*---------------------------------------------------------------------------*/
303
304class OneTBBTask
305: public ITask
306{
307 public:
308
309 static const int FUNCTOR_CLASS_SIZE = 32;
310
311 public:
312
313 explicit OneTBBTask(ITaskFunctor* f)
314 : m_functor(f)
315 {
316 m_functor = f->clone(m_functor_buf.data(), FUNCTOR_CLASS_SIZE);
317 }
318
319 public:
320
321 OneTBBTaskFunctor taskFunctor() { return OneTBBTaskFunctor(m_functor, this); }
322 void launchAndWait() override;
323 void launchAndWait(ConstArrayView<ITask*> tasks) override;
324
325 protected:
326
327 ITask* _createChildTask(ITaskFunctor* functor) override;
328
329 public:
330
331 ITaskFunctor* m_functor = nullptr;
333};
334using TBBTask = OneTBBTask;
335
336/*---------------------------------------------------------------------------*/
337/*---------------------------------------------------------------------------*/
338/*
339 * Ne pas utiliser l'observer locale au task_arena.
340 * Utiliser l'observer global au scheduler.
341 * Pour l'id, utiliser tbb::this_task_arena::current_thread_index().
342 */
343class TBBTaskImplementation
344: public ITaskImplementation
345{
346 class Impl;
347 class ParallelForExecute;
348 template <int RankValue>
350
351 public:
352
353 // Pour des raisons de performance, s'aligne sur une ligne de cache
354 // et utilise un padding.
355 class ARCANE_ALIGNAS_PACKED(64) TaskThreadInfo
356 {
357 public:
358
359 TaskThreadInfo()
360 : m_task_index(-1)
361 {}
362
363 public:
364
365 void setTaskIndex(Integer v) { m_task_index = v; }
366 Integer taskIndex() const { return m_task_index; }
367
368 private:
369
370 Integer m_task_index;
371 };
372
379 class TaskInfoLockGuard
380 {
381 public:
382
383 TaskInfoLockGuard(TaskThreadInfo* tti, Integer task_index)
384 : m_tti(tti)
385 , m_old_task_index(-1)
386 {
387 if (tti) {
388 m_old_task_index = tti->taskIndex();
389 tti->setTaskIndex(task_index);
390 }
391 }
392 ~TaskInfoLockGuard()
393 {
394 if (m_tti)
395 m_tti->setTaskIndex(m_old_task_index);
396 }
397
398 private:
399
400 TaskThreadInfo* m_tti;
401 Integer m_old_task_index;
402 };
403
404 public:
405
406 TBBTaskImplementation() = default;
407 ~TBBTaskImplementation() override;
408
409 public:
410
411 void build() {}
412 void initialize(Int32 nb_thread) override;
413 void terminate() override;
414
416 {
417 OneTBBTask* t = new OneTBBTask(f);
418 return t;
419 }
420
421 void executeParallelFor(Int32 begin, Int32 size, const ParallelLoopOptions& options, IRangeFunctor* f) final;
422 void executeParallelFor(Int32 begin, Int32 size, Integer grain_size, IRangeFunctor* f) final;
423 void executeParallelFor(Int32 begin, Int32 size, IRangeFunctor* f) final
424 {
426 }
427 void executeParallelFor(const ParallelFor1DLoopInfo& loop_info) override;
428
430 const ForLoopRunInfo& run_info,
431 IMDRangeFunctor<1>* functor) final
432 {
433 _executeMDParallelFor<1>(loop_ranges, functor, run_info);
434 }
436 const ForLoopRunInfo& run_info,
437 IMDRangeFunctor<2>* functor) final
438 {
439 _executeMDParallelFor<2>(loop_ranges, functor, run_info);
440 }
442 const ForLoopRunInfo& run_info,
443 IMDRangeFunctor<3>* functor) final
444 {
445 _executeMDParallelFor<3>(loop_ranges, functor, run_info);
446 }
448 const ForLoopRunInfo& run_info,
449 IMDRangeFunctor<4>* functor) final
450 {
451 _executeMDParallelFor<4>(loop_ranges, functor, run_info);
452 }
453
454 bool isActive() const final
455 {
456 return m_is_active;
457 }
458
460 {
461 return (nbAllowedThread() <= 1) ? 0 : _currentTaskTreadIndex();
462 }
463
464 Int32 currentTaskIndex() const final;
465
466 void printInfos(std::ostream& o) const final;
467
468 public:
469
476 TaskThreadInfo* currentTaskThreadInfo() const;
477
478 private:
479
480 bool m_is_active = false;
481 Impl* m_p = nullptr;
482
483 private:
484
485 template <int RankValue> void
486 _executeMDParallelFor(const ComplexForLoopRanges<RankValue>& loop_ranges,
487 IMDRangeFunctor<RankValue>* functor,
488 const ForLoopRunInfo& run_info);
489 void _executeParallelFor(const ParallelFor1DLoopInfo& loop_info);
490};
491
492/*---------------------------------------------------------------------------*/
493/*---------------------------------------------------------------------------*/
494
495class TBBTaskImplementation::Impl
496{
497 class TaskObserver
498 : public tbb::task_scheduler_observer
499 {
500 public:
501
502 explicit TaskObserver(TBBTaskImplementation::Impl* p)
503 : tbb::task_scheduler_observer(p->m_main_arena)
504 , m_p(p)
505 {
506 }
507 void on_scheduler_entry(bool is_worker) override
508 {
509 m_p->notifyThreadCreated(is_worker);
510 }
511 void on_scheduler_exit(bool is_worker) override
512 {
513 m_p->notifyThreadDestroyed(is_worker);
514 }
516 };
517
518 public:
519
520 Impl()
521 : m_task_observer(this)
522 , m_thread_task_infos(AlignedMemoryAllocator::CacheLine())
523 {
524 m_nb_allowed_thread = tbb::info::default_concurrency();
525 _init();
526 }
527 Impl(Int32 nb_thread)
528 : m_main_arena(nb_thread)
529 , m_task_observer(this)
530 , m_thread_task_infos(AlignedMemoryAllocator::CacheLine())
531 {
532 m_nb_allowed_thread = nb_thread;
533 _init();
534 }
535
536 public:
537
538 Int32 nbAllowedThread() const { return m_nb_allowed_thread; }
539 TaskThreadInfo* threadTaskInfo(Integer index) { return &m_thread_task_infos[index]; }
540
541 private:
542
543 Int32 m_nb_allowed_thread = 0;
544
545 public:
546
547 void terminate()
548 {
549 for (auto x : m_sub_arena_list) {
550 if (x)
551 x->terminate();
552 delete x;
553 }
554 m_sub_arena_list.clear();
555 m_main_arena.terminate();
556 m_task_observer.observe(false);
557 oneapi::tbb::finalize(m_task_scheduler_handle);
558 }
559
560 public:
561
562 void notifyThreadCreated(bool is_worker)
563 {
564 std::thread::id my_thread_id = std::this_thread::get_id();
565
566 // Avec OneTBB, cette méthode est appelée à chaque fois qu'on rentre
567 // dans notre 'task_arena'. Comme il ne faut appeler qu'une seule
568 // fois la méthode de notification on utilise un ensemble pour
569 // conserver la liste des threads déjà créés.
570 // NOTE: On ne peut pas utiliser cette méthode avec la version TBB historique
571 // (2018) car cette méthode 'contains' n'existe pas
572 if (m_constructed_thread_map.contains(my_thread_id))
573 return;
574 m_constructed_thread_map.insert(my_thread_id);
575
576 {
577 if (TaskFactory::verboseLevel() >= 1) {
578 std::ostringstream ostr;
579 ostr << "TBB: CREATE THREAD"
580 << " nb_allowed=" << m_nb_allowed_thread
581 << " tbb_default_allowed=" << tbb::info::default_concurrency()
582 << " id=" << my_thread_id
583 << " arena_id=" << _currentTaskTreadIndex()
584 << " is_worker=" << is_worker
585 << "\n";
586 std::cout << ostr.str();
587 }
589 }
590 }
591
592 void notifyThreadDestroyed([[maybe_unused]] bool is_worker)
593 {
594 // Avec OneTBB, cette méthode est appelée à chaque fois qu'on sort
595 // de l'arène principale. Du coup elle ne correspond pas vraiment à une
596 // destruction de thread. On ne fait donc rien pour cette notification
597 // TODO: Regarder comment on peut être notifié de la destruction effective
598 // du thread.
599 }
600
601 private:
602
603#if TBB_VERSION_MAJOR > 2021 || (TBB_VERSION_MAJOR == 2021 && TBB_VERSION_MINOR > 5)
604 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = oneapi::tbb::attach();
605#else
606 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = tbb::task_scheduler_handle::get();
607#endif
608
609 public:
610
611 tbb::task_arena m_main_arena;
614
615 private:
616
617 TaskObserver m_task_observer;
618 std::mutex m_thread_created_mutex;
619 UniqueArray<TaskThreadInfo> m_thread_task_infos;
620 tbb::concurrent_set<std::thread::id> m_constructed_thread_map;
621 void _init()
622 {
623 ConcurrencyBase::_setMaxAllowedThread(m_nb_allowed_thread);
624
625 if (TaskFactory::verboseLevel() >= 1) {
626 std::cout << "TBB: TBBTaskImplementationInit nb_allowed_thread=" << m_nb_allowed_thread
627 << " id=" << std::this_thread::get_id()
628 << " version=" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR
629 << "\n";
630 }
631 m_thread_task_infos.resize(m_nb_allowed_thread);
632 m_task_observer.observe(true);
633 Integer max_arena_size = m_nb_allowed_thread;
634 // Limite artificiellement le nombre de tbb::task_arena
635 // pour éviter d'avoir trop d'objets alloués.
636 if (max_arena_size > 512)
637 max_arena_size = 512;
638 if (max_arena_size < 2)
639 max_arena_size = 2;
640 m_sub_arena_list.resize(max_arena_size);
641 m_sub_arena_list[0] = m_sub_arena_list[1] = nullptr;
642 for (Integer i = 2; i < max_arena_size; ++i)
643 m_sub_arena_list[i] = new tbb::task_arena(i);
644 }
645};
646
647/*---------------------------------------------------------------------------*/
648/*---------------------------------------------------------------------------*/
652class TBBParallelFor
653{
654 public:
655
656 TBBParallelFor(IRangeFunctor* f, Int32 nb_allowed_thread, ForLoopOneExecStat* stat_info)
657 : m_functor(f)
658 , m_stat_info(stat_info)
659 , m_nb_allowed_thread(nb_allowed_thread)
660 {}
661
662 public:
663
664 void operator()(tbb::blocked_range<Integer>& range) const
665 {
666#ifdef ARCANE_CHECK
667 if (TaskFactory::verboseLevel() >= 3) {
668 std::ostringstream o;
669 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
670 << " id=" << std::this_thread::get_id()
671 << " max_allowed=" << m_nb_allowed_thread
672 << " range_begin=" << range.begin() << " range_size=" << range.size()
673 << "\n";
674 std::cout << o.str();
675 std::cout.flush();
676 }
677
678 int tbb_index = _currentTaskTreadIndex();
679 if (tbb_index < 0 || tbb_index >= m_nb_allowed_thread)
680 ARCANE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
681 tbb_index, m_nb_allowed_thread);
682#endif
683
684 if (m_stat_info)
685 m_stat_info->incrementNbChunk();
686 m_functor->executeFunctor(range.begin(), CheckedConvert::toInteger(range.size()));
687 }
688
689 private:
690
691 IRangeFunctor* m_functor;
692 ForLoopOneExecStat* m_stat_info = nullptr;
693 Int32 m_nb_allowed_thread;
694};
695
696/*---------------------------------------------------------------------------*/
697/*---------------------------------------------------------------------------*/
701template <int RankValue>
702class TBBMDParallelFor
703{
704 public:
705
706 TBBMDParallelFor(IMDRangeFunctor<RankValue>* f, Int32 nb_allowed_thread, ForLoopOneExecStat* stat_info)
707 : m_functor(f)
708 , m_stat_info(stat_info)
709 , m_nb_allowed_thread(nb_allowed_thread)
710 {}
711
712 public:
713
714 void operator()(blocked_nd_range<Int32, RankValue>& range) const
715 {
716#ifdef ARCANE_CHECK
717 if (TaskFactory::verboseLevel() >= 3) {
718 std::ostringstream o;
719 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
720 << " id=" << std::this_thread::get_id()
721 << " max_allowed=" << m_nb_allowed_thread
722 << " MDFor ";
723 for (Int32 i = 0; i < RankValue; ++i) {
724 auto r0 = static_cast<Int32>(range.dim(i).begin());
725 auto r1 = static_cast<Int32>(range.dim(i).size());
726 o << " range" << i << " (begin=" << r0 << " size=" << r1 << ")";
727 }
728 o << "\n";
729 std::cout << o.str();
730 std::cout.flush();
731 }
732
733 int tbb_index = _currentTaskTreadIndex();
734 if (tbb_index < 0 || tbb_index >= m_nb_allowed_thread)
735 ARCANE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
736 tbb_index, m_nb_allowed_thread);
737#endif
738
739 if (m_stat_info)
740 m_stat_info->incrementNbChunk();
741 m_functor->executeFunctor(_fromTBBRange(range));
742 }
743
744 private:
745
746 IMDRangeFunctor<RankValue>* m_functor = nullptr;
747 ForLoopOneExecStat* m_stat_info = nullptr;
748 Int32 m_nb_allowed_thread;
749};
750
751/*---------------------------------------------------------------------------*/
752/*---------------------------------------------------------------------------*/
770class TBBDeterministicParallelFor
771{
772 public:
773
774 TBBDeterministicParallelFor(TBBTaskImplementation* impl, const TBBParallelFor& tbb_for,
775 Integer begin_index, Integer size, Integer grain_size, Integer nb_thread)
776 : m_impl(impl)
777 , m_tbb_for(tbb_for)
778 , m_nb_thread(nb_thread)
779 , m_begin_index(begin_index)
780 , m_size(size)
781 , m_grain_size(grain_size)
782 , m_nb_block(0)
783 , m_block_size(0)
784 , m_nb_block_per_thread(0)
785 {
786 if (m_nb_thread < 1)
787 m_nb_thread = 1;
788
789 if (m_grain_size > 0) {
790 m_block_size = m_grain_size;
791 if (m_block_size > 0) {
792 m_nb_block = m_size / m_block_size;
793 if ((m_size % m_block_size) != 0)
794 ++m_nb_block;
795 }
796 else
797 m_nb_block = 1;
798 m_nb_block_per_thread = m_nb_block / m_nb_thread;
799 if ((m_nb_block % m_nb_thread) != 0)
800 ++m_nb_block_per_thread;
801 }
802 else {
803 if (m_nb_block < 1)
804 m_nb_block = m_nb_thread;
805 m_block_size = m_size / m_nb_block;
806 m_nb_block_per_thread = 1;
807 }
808 if (TaskFactory::verboseLevel() >= 2) {
809 std::cout << "TBBDeterministicParallelFor: BEGIN=" << m_begin_index << " size=" << m_size
810 << " grain_size=" << m_grain_size
811 << " nb_block=" << m_nb_block << " nb_thread=" << m_nb_thread
812 << " nb_block_per_thread=" << m_nb_block_per_thread
813 << " block_size=" << m_block_size
814 << " block_size*nb_block=" << m_block_size * m_nb_block << '\n';
815 }
816 }
817
818 public:
819
826 void operator()(tbb::blocked_range<Integer>& range) const
827 {
828 auto nb_iter = static_cast<Integer>(range.size());
829 for (Integer i = 0; i < nb_iter; ++i) {
830 Integer task_id = range.begin() + i;
831 for (Integer k = 0, kn = m_nb_block_per_thread; k < kn; ++k) {
832 Integer block_id = task_id + (k * m_nb_thread);
833 if (block_id < m_nb_block)
834 _doBlock(task_id, block_id);
835 }
836 }
837 }
838
839 void _doBlock(Integer task_id, Integer block_id) const
840 {
841 TBBTaskImplementation::TaskInfoLockGuard guard(m_impl->currentTaskThreadInfo(), task_id);
842
843 Integer iter_begin = block_id * m_block_size;
844 Integer iter_size = m_block_size;
845 if ((block_id + 1) == m_nb_block) {
846 // Pour le dernier bloc, la taille est le nombre d'éléments restants
847 iter_size = m_size - iter_begin;
848 }
849 iter_begin += m_begin_index;
850#ifdef ARCANE_CHECK
851 if (TaskFactory::verboseLevel() >= 3) {
852 std::ostringstream o;
853 o << "TBB: DoBlock: BLOCK task_id=" << task_id << " block_id=" << block_id
854 << " iter_begin=" << iter_begin << " iter_size=" << iter_size << '\n';
855 std::cout << o.str();
856 std::cout.flush();
857 }
858#endif
859 if (iter_size > 0) {
860 auto r = tbb::blocked_range<int>(iter_begin, iter_begin + iter_size);
861 m_tbb_for(r);
862 }
863 }
864
865 private:
866
867 TBBTaskImplementation* m_impl;
868 const TBBParallelFor& m_tbb_for;
869 Integer m_nb_thread;
870 Integer m_begin_index;
871 Integer m_size;
872 Integer m_grain_size;
873 Integer m_nb_block;
874 Integer m_block_size;
875 Integer m_nb_block_per_thread;
876};
877
878/*---------------------------------------------------------------------------*/
879/*---------------------------------------------------------------------------*/
880
882{
883 public:
884
885 ParallelForExecute(TBBTaskImplementation* impl, const ParallelLoopOptions& options,
886 Integer begin, Integer size, IRangeFunctor* f, ForLoopOneExecStat* stat_info)
887 : m_impl(impl)
888 , m_begin(begin)
889 , m_size(size)
890 , m_functor(f)
891 , m_options(options)
892 , m_stat_info(stat_info)
893 {}
894
895 public:
896
897 void operator()() const
898 {
899 Integer nb_thread = m_options.maxThread();
900 TBBParallelFor pf(m_functor, nb_thread, m_stat_info);
901 Integer gsize = m_options.grainSize();
902 tbb::blocked_range<Integer> range(m_begin, m_begin + m_size);
903 if (TaskFactory::verboseLevel() >= 1)
904 std::cout << "TBB: TBBTaskImplementationInit ParallelForExecute begin=" << m_begin
905 << " size=" << m_size << " gsize=" << gsize
906 << " partitioner=" << (int)m_options.partitioner()
907 << " nb_thread=" << nb_thread
908 << " has_stat_info=" << (m_stat_info != nullptr)
909 << '\n';
910
911 if (gsize > 0)
912 range = tbb::blocked_range<Integer>(m_begin, m_begin + m_size, gsize);
913
914 if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Static) {
915 tbb::parallel_for(range, pf, tbb::static_partitioner());
916 }
917 else if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Deterministic) {
918 tbb::blocked_range<Integer> range2(0, nb_thread, 1);
919 TBBDeterministicParallelFor dpf(m_impl, pf, m_begin, m_size, gsize, nb_thread);
920 tbb::parallel_for(range2, dpf);
921 }
922 else
923 tbb::parallel_for(range, pf);
924 }
925
926 private:
927
928 TBBTaskImplementation* m_impl = nullptr;
929 Integer m_begin;
930 Integer m_size;
931 IRangeFunctor* m_functor = nullptr;
932 ParallelLoopOptions m_options;
933 ForLoopOneExecStat* m_stat_info = nullptr;
934};
935
936/*---------------------------------------------------------------------------*/
937/*---------------------------------------------------------------------------*/
938
939template <int RankValue>
941{
942 public:
943
944 MDParallelForExecute(TBBTaskImplementation* impl,
945 const ParallelLoopOptions& options,
947 IMDRangeFunctor<RankValue>* f, [[maybe_unused]] ForLoopOneExecStat* stat_info)
948 : m_impl(impl)
949 , m_tbb_range(_toTBBRange(range))
950 , m_functor(f)
951 , m_options(options)
952 , m_stat_info(stat_info)
953 {
954 // On ne peut pas modifier les valeurs d'une instance de tbb::blocked_rangeNd.
955 // Il faut donc en reconstruire une complètement.
956 FixedArray<size_t, RankValue> all_grain_sizes;
957 Int32 gsize = m_options.grainSize();
958 if (gsize > 0) {
959 // Si la taille du grain est différent zéro, il faut la répartir
960 // sur l'ensemble des dimensions. On commence par la dernière.
961 // TODO: regarder pourquoi dans certains cas les performances sont
962 // inférieures à celles qu'on obtient en utilisant un partitionneur
963 // statique.
964 constexpr bool is_verbose = false;
965 std::array<Int32, RankValue> range_extents = range.extents().asStdArray();
966 double ratio = static_cast<double>(gsize) / static_cast<double>(range.nbElement());
967 if constexpr (is_verbose) {
968 std::cout << "GSIZE=" << gsize << " rank=" << RankValue << " ratio=" << ratio;
969 for (Int32 i = 0; i < RankValue; ++i)
970 std::cout << " range" << i << "=" << range_extents[i];
971 std::cout << "\n";
972 }
973 Int32 index = RankValue - 1;
974 Int32 remaining_grain = gsize;
975 for (; index >= 0; --index) {
976 Int32 current = range_extents[index];
977 if constexpr (is_verbose)
978 std::cout << "Check index=" << index << " remaining=" << remaining_grain << " current=" << current << "\n";
979 if (remaining_grain > current) {
980 all_grain_sizes[index] = current;
981 remaining_grain /= current;
982 }
983 else {
984 all_grain_sizes[index] = remaining_grain;
985 break;
986 }
987 }
988 for (Int32 i = 0; i < index; ++i)
989 all_grain_sizes[i] = 1;
990 if constexpr (is_verbose) {
991 for (Int32 i = 0; i < RankValue; ++i)
992 std::cout << " grain" << i << "=" << all_grain_sizes[i];
993 std::cout << "\n";
994 }
995 m_tbb_range = _toTBBRangeWithGrain(m_tbb_range, all_grain_sizes);
996 }
997 }
998
999 public:
1000
1001 void operator()() const
1002 {
1003 Integer nb_thread = m_options.maxThread();
1004 TBBMDParallelFor<RankValue> pf(m_functor, nb_thread, m_stat_info);
1005
1006 if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Static) {
1007 tbb::parallel_for(m_tbb_range, pf, tbb::static_partitioner());
1008 }
1009 else if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Deterministic) {
1010 // TODO: implémenter le mode déterministe
1011 ARCANE_THROW(NotImplementedException, "ParallelLoopOptions::Partitioner::Deterministic for multi-dimensionnal loops");
1012 //tbb::blocked_range<Integer> range2(0,nb_thread,1);
1013 //TBBDeterministicParallelFor dpf(m_impl,pf,m_begin,m_size,gsize,nb_thread);
1014 //tbb::parallel_for(range2,dpf);
1015 }
1016 else {
1017 tbb::parallel_for(m_tbb_range, pf);
1018 }
1019 }
1020
1021 private:
1022
1023 TBBTaskImplementation* m_impl = nullptr;
1024 blocked_nd_range<Int32, RankValue> m_tbb_range;
1025 IMDRangeFunctor<RankValue>* m_functor = nullptr;
1026 ParallelLoopOptions m_options;
1027 ForLoopOneExecStat* m_stat_info = nullptr;
1028};
1029
1030/*---------------------------------------------------------------------------*/
1031/*---------------------------------------------------------------------------*/
1032
1033TBBTaskImplementation::
1034~TBBTaskImplementation()
1035{
1036 delete m_p;
1037}
1038
1039/*---------------------------------------------------------------------------*/
1040/*---------------------------------------------------------------------------*/
1041
1043initialize(Int32 nb_thread)
1044{
1045 if (nb_thread < 0)
1046 nb_thread = 0;
1047 m_is_active = (nb_thread != 1);
1048 if (nb_thread != 0)
1049 m_p = new Impl(nb_thread);
1050 else
1051 m_p = new Impl();
1055}
1056
1057/*---------------------------------------------------------------------------*/
1058/*---------------------------------------------------------------------------*/
1059
1061terminate()
1062{
1063 m_p->terminate();
1064}
1065
1066/*---------------------------------------------------------------------------*/
1067/*---------------------------------------------------------------------------*/
1068
1070printInfos(std::ostream& o) const
1071{
1072 o << "OneTBBTaskImplementation"
1073 << " version=" << TBB_VERSION_STRING
1074 << " interface=" << TBB_INTERFACE_VERSION
1075 << " runtime_interface=" << TBB_runtime_interface_version();
1076}
1077
1078/*---------------------------------------------------------------------------*/
1079/*---------------------------------------------------------------------------*/
1080
1081void TBBTaskImplementation::
1082_executeParallelFor(const ParallelFor1DLoopInfo& loop_info)
1083{
1084 ScopedExecInfo sei(loop_info.runInfo());
1085 ForLoopOneExecStat* stat_info = sei.statInfo();
1086 ::Arcane::Impl::ScopedStatLoop scoped_loop(sei.isOwn() ? stat_info : nullptr);
1087
1088 Int32 begin = loop_info.beginIndex();
1089 Int32 size = loop_info.size();
1090 ParallelLoopOptions options = loop_info.runInfo().options().value_or(TaskFactory::defaultParallelLoopOptions());
1091 IRangeFunctor* f = loop_info.functor();
1093
1094 Integer max_thread = options.maxThread();
1095 Integer nb_allowed_thread = m_p->nbAllowedThread();
1096 if (max_thread < 0)
1097 max_thread = nb_allowed_thread;
1098
1099 if (TaskFactory::verboseLevel() >= 1)
1100 std::cout << "TBB: TBBTaskImplementation executeParallelFor begin=" << begin
1101 << " size=" << size << " max_thread=" << max_thread
1102 << " grain_size=" << options.grainSize()
1103 << " nb_allowed=" << nb_allowed_thread << '\n';
1104
1105 // En exécution séquentielle, appelle directement la méthode \a f.
1106 if (max_thread == 1 || max_thread == 0) {
1107 f->executeFunctor(begin, size);
1108 return;
1109 }
1110
1111 // Remplace les valeurs non initialisées de \a options par celles de \a m_default_loop_options
1112 ParallelLoopOptions true_options(options);
1113 true_options.mergeUnsetValues(TaskFactory::defaultParallelLoopOptions());
1114 true_options.setMaxThread(max_thread);
1115
1116 ParallelForExecute pfe(this, true_options, begin, size, f, stat_info);
1117
1118 tbb::task_arena* used_arena = nullptr;
1119 if (max_thread < nb_allowed_thread && max_thread < m_p->m_sub_arena_list.size())
1120 used_arena = m_p->m_sub_arena_list[max_thread];
1121 if (!used_arena)
1122 used_arena = &(m_p->m_main_arena);
1123 used_arena->execute(pfe);
1124}
1125
1126/*---------------------------------------------------------------------------*/
1127/*---------------------------------------------------------------------------*/
1128
1131{
1132 _executeParallelFor(loop_info);
1133}
1134
1135/*---------------------------------------------------------------------------*/
1136/*---------------------------------------------------------------------------*/
1143template <int RankValue> void TBBTaskImplementation::
1146 const ForLoopRunInfo& run_info)
1147{
1148 ParallelLoopOptions options;
1149 if (run_info.options().has_value())
1150 options = run_info.options().value();
1151
1152 ScopedExecInfo sei(run_info);
1153 ForLoopOneExecStat* stat_info = sei.statInfo();
1154 ::Arcane::Impl::ScopedStatLoop scoped_loop(sei.isOwn() ? stat_info : nullptr);
1155
1156 if (TaskFactory::verboseLevel() >= 1) {
1157 std::cout << "TBB: TBBTaskImplementation executeMDParallelFor nb_dim=" << RankValue
1158 << " nb_element=" << loop_ranges.nbElement()
1159 << " grain_size=" << options.grainSize()
1160 << " name=" << run_info.traceInfo().traceInfo()
1161 << " has_stat_info=" << (stat_info != nullptr)
1162 << '\n';
1163 }
1164
1165 Integer max_thread = options.maxThread();
1166 // En exécution séquentielle, appelle directement la méthode \a f.
1167 if (max_thread == 1 || max_thread == 0) {
1168 functor->executeFunctor(loop_ranges);
1169 return;
1170 }
1171
1172 // Remplace les valeurs non initialisées de \a options par celles de \a m_default_loop_options
1173 ParallelLoopOptions true_options(options);
1175
1176 Integer nb_allowed_thread = m_p->nbAllowedThread();
1177 if (max_thread < 0)
1178 max_thread = nb_allowed_thread;
1179 tbb::task_arena* used_arena = nullptr;
1180 if (max_thread < nb_allowed_thread)
1181 used_arena = m_p->m_sub_arena_list[max_thread];
1182 if (!used_arena)
1183 used_arena = &(m_p->m_main_arena);
1184
1185 // Pour l'instant pour la dimension 1, utilise le 'ParallelForExecute' historique
1186 if constexpr (RankValue == 1) {
1187 auto range_1d = _toTBBRange(loop_ranges);
1188 auto x1 = [&](Integer begin, Integer size) {
1189 functor->executeFunctor(makeLoopRanges(ForLoopRange(begin, size)));
1190 //functor->executeFunctor(ComplexForLoopRanges<1>(begin,size));
1191 };
1192 LambdaRangeFunctorT<decltype(x1)> functor_1d(x1);
1193 Integer begin1 = CheckedConvert::toInteger(range_1d.dim(0).begin());
1194 Integer size1 = CheckedConvert::toInteger(range_1d.dim(0).size());
1195 ParallelForExecute pfe(this, true_options, begin1, size1, &functor_1d, stat_info);
1196 used_arena->execute(pfe);
1197 }
1198 else {
1199 MDParallelForExecute<RankValue> pfe(this, true_options, loop_ranges, functor, stat_info);
1200 used_arena->execute(pfe);
1201 }
1202}
1203
1204/*---------------------------------------------------------------------------*/
1205/*---------------------------------------------------------------------------*/
1206
1208executeParallelFor(Integer begin, Integer size, Integer grain_size, IRangeFunctor* f)
1209{
1211 opts.setGrainSize(grain_size);
1212 ForLoopRunInfo run_info(opts);
1213 executeParallelFor(ParallelFor1DLoopInfo(begin, size, f, run_info));
1214}
1215
1216/*---------------------------------------------------------------------------*/
1217/*---------------------------------------------------------------------------*/
1218
1224
1225/*---------------------------------------------------------------------------*/
1226/*---------------------------------------------------------------------------*/
1227
1230{
1231 Int32 thread_id = currentTaskThreadIndex();
1232 if (thread_id >= 0)
1233 return m_p->threadTaskInfo(thread_id);
1234 return nullptr;
1235}
1236
1237/*---------------------------------------------------------------------------*/
1238/*---------------------------------------------------------------------------*/
1239
1241currentTaskIndex() const
1242{
1243 Int32 thread_id = currentTaskThreadIndex();
1244 // Ce test avait été ajouté pour coutourner un bug dans une des versions
1245 // de OneTBB. Il est surement inutile aujourd'hui (2025)
1246 if (thread_id < 0 || thread_id >= m_p->nbAllowedThread())
1247 return 0;
1249 if (tti) {
1250 Int32 task_index = tti->taskIndex();
1251 if (task_index >= 0)
1252 return task_index;
1253 }
1254 return thread_id;
1255}
1256
1257/*---------------------------------------------------------------------------*/
1258/*---------------------------------------------------------------------------*/
1259
1262{
1263 tbb::task_group task_group;
1264 task_group.run(taskFunctor());
1265 task_group.wait();
1266 delete this;
1267}
1268
1269/*---------------------------------------------------------------------------*/
1270/*---------------------------------------------------------------------------*/
1271
1274{
1275 tbb::task_group task_group;
1276 Integer n = tasks.size();
1277 if (n == 0)
1278 return;
1279
1280 //set_ref_count(n+1);
1281 for (Integer i = 0; i < n; ++i) {
1282 auto* t = static_cast<OneTBBTask*>(tasks[i]);
1283 task_group.run(t->taskFunctor());
1284 }
1285 task_group.wait();
1286 for (Integer i = 0; i < n; ++i) {
1287 auto* t = static_cast<OneTBBTask*>(tasks[i]);
1288 delete t;
1289 }
1290}
1291
1292/*---------------------------------------------------------------------------*/
1293/*---------------------------------------------------------------------------*/
1294
1295ITask* OneTBBTask::
1296_createChildTask(ITaskFunctor* functor)
1297{
1298 auto* t = new OneTBBTask(functor);
1299 return t;
1300}
1301
1302/*---------------------------------------------------------------------------*/
1303/*---------------------------------------------------------------------------*/
1304
1305ARCANE_DI_REGISTER_PROVIDER(TBBTaskImplementation,
1306 DependencyInjection::ProviderProperty("TBBTaskImplementation"),
1307 ARCANE_DI_INTERFACES(ITaskImplementation),
1308 ARCANE_DI_EMPTY_CONSTRUCTOR());
1309
1310/*---------------------------------------------------------------------------*/
1311/*---------------------------------------------------------------------------*/
1312
1313} // End namespace Arcane
1314
1315/*---------------------------------------------------------------------------*/
1316/*---------------------------------------------------------------------------*/
#define ARCANE_CHECK_POINTER(ptr)
Macro retournant le pointeur ptr s'il est non nul ou lancant une exception s'il est nul.
#define ARCANE_THROW(exception_class,...)
Macro pour envoyer une exception avec formattage.
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
#define ARCANE_ALIGNAS_PACKED(value)
Macro pour garantir le compactage et l'alignement d'une classe sur value octets.
Allocateur mémoire avec alignement mémoire spécifique.
static void _setMaxAllowedThread(Int32 v)
Positionne le nombre maximum de thread à utiliser.
Vue constante d'un tableau de type T.
constexpr Integer size() const noexcept
Nombre d'éléments du tableau.
Classe pour gérer le profiling d'une seule exécution d'une boucle.
Intervalle d'itération pour une boucle.
Informations d'exécution d'une boucle.
Interface d'un fonctor sur un interval d'itération multi-dimensionnel de dimension RankValue.
Interface d'un fonctor sur un interval d'itération.
virtual void executeFunctor(Int32 begin, Int32 size)=0
Exécute la méthode associée.
Interface d'un fonctor pour une tâche.
Definition Task.h:71
virtual void executeFunctor(const TaskContext &tc)=0
Exécute la méthode associé
Implémentation d'une fabrique de tâches.
Int32 nbAllowedThread() const
Nombre de threads utilisés au maximum pour gérer les tâches.
Interface d'une tâche concourante.
Definition Task.h:186
Classe permettant de récupérer le temps passé entre l'appel au constructeur et au destructeur.
Fonctor sur un interval d'itération instancié via une lambda fonction.
Exception lorsqu'une fonction n'est pas implémentée.
void launchAndWait() override
Lance la tâche et bloque jusqu'à ce qu'elle se termine.
Caractéristiques d'un boucle 1D multi-thread.
Definition ParallelFor.h:34
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
void mergeUnsetValues(const ParallelLoopOptions &po)
Fusionne les valeurs non modifiées de l'instance par celles de po.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
void setMaxThread(Integer v)
Positionne le nombre maximal de threads autorisé.
@ Deterministic
Utilise un partitionnement et un ordonnancement statique.
static Impl::ForLoopStatInfoList * _threadLocalForLoopInstance()
Definition Profiling.cc:194
static bool hasProfiling()
Indique si le profilage est actif.
Implémentation déterministe de ParallelFor.
void operator()(tbb::blocked_range< Integer > &range) const
Opérateur pour un thread donné.
Exécuteur pour une boucle multi-dimension.
Exécuteur pour une boucle 1D.
UniqueArray< tbb::task_arena * > m_sub_arena_list
Tableau dont le i-ème élément contient la tbb::task_arena pour i thread.
Classe pour positionner TaskThreadInfo::taskIndex().
Int32 currentTaskThreadIndex() const final
Implémentation de TaskFactory::currentTaskThreadIndex()
void initialize(Int32 nb_thread) override
void executeParallelFor(const ComplexForLoopRanges< 1 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 1 > *functor) final
Exécute une boucle 1D en concurrence.
void executeParallelFor(Int32 begin, Int32 size, IRangeFunctor *f) final
Exécute le fonctor f en concurrence.
ITask * createRootTask(ITaskFunctor *f) override
Créé une tâche racine. L'implémentation doit recopier la valeur de f qui est soit un TaskFunctor,...
void printInfos(std::ostream &o) const final
Affiche les informations sur le runtime utilisé
void executeParallelFor(Int32 begin, Int32 size, const ParallelLoopOptions &options, IRangeFunctor *f) final
Exécute le fonctor f en concurrence.
void executeParallelFor(const ComplexForLoopRanges< 3 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 3 > *functor) final
Exécute une boucle 3D en concurrence.
void executeParallelFor(const ComplexForLoopRanges< 4 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 4 > *functor) final
Exécute une boucle 4D en concurrence.
TaskThreadInfo * currentTaskThreadInfo() const
Instance de TaskThreadInfo associé au thread courant.
bool isActive() const final
Indique si l'implémentation est active.
void _executeMDParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, IMDRangeFunctor< RankValue > *functor, const ForLoopRunInfo &run_info)
Exécution d'une boucle N-dimensions.
Int32 currentTaskIndex() const final
Implémentation de TaskFactory::currentTaskIndex()
void executeParallelFor(const ComplexForLoopRanges< 2 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 2 > *functor) final
Exécute une boucle 2D en concurrence.
Contexte d'éxecution d'une tâche.
Definition Task.h:46
static void notifyThreadCreated()
Notifie tous les observateurs de création de thread.
static const ParallelLoopOptions & defaultParallelLoopOptions()
Valeurs par défaut d'exécution d'une boucle parallèle.
static Integer verboseLevel()
Niveau de verbosité
static void setDefaultParallelLoopOptions(const ParallelLoopOptions &v)
Positionne les valeurs par défaut d'exécution d'une boucle parallèle.
static Int32 currentTaskThreadIndex()
Indice (entre 0 et nbAllowedThread()-1) du thread exécutant la tâche actuelle.
Vecteur 1D de données avec sémantique par valeur (style STL).
ARCCORE_BASE_EXPORT String getStackTrace()
Retourne une chaîne de caractere contenant la pile d'appel.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
Int32 Integer
Type représentant un entier.
SimpleForLoopRanges< 1 > makeLoopRanges(Int32 n1)
Créé un intervalle d'itération [0,n1[.
std::int32_t Int32
Type entier signé sur 32 bits.