Arcane  4.1.12.0
User documentation
Loading...
Searching...
No Matches
TBBTaskImplementation.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* TBBTaskImplementation.cc (C) 2000-2025 */
9/* */
10/* Implementation of tasks using TBB (Intel Threads Building Blocks). */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arccore/base/NotImplementedException.h"
15#include "arccore/base/IFunctor.h"
16#include "arccore/base/ForLoopRanges.h"
17#include "arccore/base/IObservable.h"
18#include "arccore/base/PlatformUtils.h"
19#include "arccore/base/FixedArray.h"
20#include "arccore/base/Profiling.h"
21#include "arccore/base/CheckedConvert.h"
22#include "arccore/base/FixedArray.h"
23#include "arccore/base/ForLoopRunInfo.h"
24#include "arccore/base/internal/DependencyInjection.h"
25
26#include "arccore/concurrency/IThreadImplementation.h"
27#include "arccore/concurrency/Task.h"
28#include "arccore/concurrency/ITaskImplementation.h"
29#include "arccore/concurrency/TaskFactory.h"
30#include "arccore/concurrency/ParallelFor.h"
31#include "arccore/concurrency/internal/TaskFactoryInternal.h"
32
33#include <new>
34#include <stack>
35#include <vector>
36
37// This macro must be defined for the class 'blocked_rangeNd' to be available
38
39#define TBB_PREVIEW_BLOCKED_RANGE_ND 1
40
41// The macro 'ARCCORE_USE_ONETBB' is defined in CMakeLists.txt
42// if compiling with the OneTBB version 2021+
43// (https://github.com/oneapi-src/oneTBB.git)
44// Eventually, this will be the only version supported by Arcane.
45
46// Necessary to access task_scheduler_handle
47#define TBB_PREVIEW_WAITING_FOR_WORKERS 1
48#include <tbb/tbb.h>
49#include <oneapi/tbb/concurrent_set.h>
50#include <oneapi/tbb/global_control.h>
51
52#include <thread>
53#include <mutex>
54
55/*---------------------------------------------------------------------------*/
56/*---------------------------------------------------------------------------*/
57
58namespace Arcane
59{
60
62
63// TODO: use a specific memory pool to manage the
64// OneTBBTask to optimize the new/delete of instances of this class.
65// Previously, with older versions of TBB, this was managed with
66// the method 'tbb::task::allocate_child()'.
67
68/*---------------------------------------------------------------------------*/
69/*---------------------------------------------------------------------------*/
70
71#if (TBB_VERSION_MAJOR > 2022) || (TBB_VERSION_MAJOR == 2022 && TBB_VERSION_MINOR > 0) || defined __TBB_blocked_nd_range_H
72
73// The class "blocked_rangeNd" was removed in version
74// 2022.0.0 and replaced by "blocked_nd_range".
75template <typename Value, unsigned int N>
76using blocked_nd_range = tbb::blocked_nd_range<Value, N>;
77
78#else
79
80template <typename Value, unsigned int N>
81using blocked_nd_range = tbb::blocked_rangeNd<Value, N>;
82
83#endif
84
85/*---------------------------------------------------------------------------*/
86/*---------------------------------------------------------------------------*/
87
88namespace
89{
90 constexpr Int32 cache_line_size = 64;
91 // Positive if execution statistics are retrieved
92 bool isStatActive()
93 {
95 }
96
97 /*!
98 * \brief Class that ensures execution statistics are recorded
99 * even in case of an exception.
100 */
101 class ScopedExecInfo
102 {
103 public:
104
105 explicit ScopedExecInfo(const ForLoopRunInfo& run_info)
106 : m_run_info(run_info)
107 {
108 // If run_info.execInfo() is not null, we use it.
109 // This means that the caller will manage the execution statistics
110 // execution statistics. Otherwise, we use m_stat_info if execution statistics
111 // are requested.
112 ForLoopOneExecStat* ptr = run_info.execStat();
113 if (ptr) {
114 m_stat_info_ptr = ptr;
115 m_use_own_run_info = false;
116 }
117 else
118 m_stat_info_ptr = isStatActive() ? &m_stat_info : nullptr;
119 }
120 ~ScopedExecInfo()
121 {
122#ifdef PRINT_STAT_INFO
123 if (m_stat_info_ptr) {
124 bool is_valid = m_run_info.traceInfo().isValid();
125 if (!is_valid)
126 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
127 << " stack=" << platform::getStackTrace()
128 << "\n";
129 else
130 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
131 << " trace_name=" << m_run_info.traceInfo().traceInfo().name() << "\n";
132 }
133#endif
134 if (m_stat_info_ptr && m_use_own_run_info) {
135 ProfilingRegistry::_threadLocalForLoopInstance()->merge(*m_stat_info_ptr, m_run_info.traceInfo());
136 }
137 }
138
139 public:
140
141 ForLoopOneExecStat* statInfo() const { return m_stat_info_ptr; }
142 bool isOwn() const { return m_use_own_run_info; }
143
144 private:
145
146 ForLoopOneExecStat m_stat_info;
147 ForLoopOneExecStat* m_stat_info_ptr = nullptr;
148 ForLoopRunInfo m_run_info;
149 //! Indicates if m_stat_info is used
150 bool m_use_own_run_info = true;
151 };
152
153 /*---------------------------------------------------------------------------*/
154 /*---------------------------------------------------------------------------*/
155
156 inline int _currentTaskTreadIndex()
157 {
158 // NOTE: With OneTBB 2021, the value is no longer '0' if this method is called
159 // from a thread outside of a task_arena. With version 2021,
160 // the value is 65535.
161 // NOTE: It seems this is a bug in 2021.3.
162 return tbb::this_task_arena::current_thread_index();
163 }
164
165 inline blocked_nd_range<Int32, 1>
166 _toTBBRange(const ComplexForLoopRanges<1>& r)
167 {
168 return { { r.lowerBound<0>(), r.upperBound<0>() } };
169 }
170
171 inline blocked_nd_range<Int32, 2>
172 _toTBBRange(const ComplexForLoopRanges<2>& r)
173 {
174 return { { r.lowerBound<0>(), r.upperBound<0>() },
175 { r.lowerBound<1>(), r.upperBound<1>() } };
176 }
177
178 inline blocked_nd_range<Int32, 3>
179 _toTBBRange(const ComplexForLoopRanges<3>& r)
180 {
181 return { { r.lowerBound<0>(), r.upperBound<0>() },
182 { r.lowerBound<1>(), r.upperBound<1>() },
183 { r.lowerBound<2>(), r.upperBound<2>() } };
184 }
185
186 inline blocked_nd_range<Int32, 4>
187 _toTBBRange(const ComplexForLoopRanges<4>& r)
188 {
189 return { { r.lowerBound<0>(), r.upperBound<0>() },
190 { r.lowerBound<1>(), r.upperBound<1>() },
191 { r.lowerBound<2>(), r.upperBound<2>() },
192 { r.lowerBound<3>(), r.upperBound<3>() } };
193 }
194
195 /*---------------------------------------------------------------------------*/
196 /*---------------------------------------------------------------------------*/
197
198 inline blocked_nd_range<Int32, 2>
199 _toTBBRangeWithGrain(const blocked_nd_range<Int32, 2>& r, FixedArray<size_t, 2> grain_sizes)
200 {
201 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
202 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] } };
203 }
204
205 inline blocked_nd_range<Int32, 3>
206 _toTBBRangeWithGrain(const blocked_nd_range<Int32, 3>& r, FixedArray<size_t, 3> grain_sizes)
207 {
208 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
209 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] },
210 { r.dim(2).begin(), r.dim(2).end(), grain_sizes[2] } };
211 }
212
213 inline blocked_nd_range<Int32, 4>
214 _toTBBRangeWithGrain(const blocked_nd_range<Int32, 4>& r, FixedArray<size_t, 4> grain_sizes)
215 {
216 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
217 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] },
218 { r.dim(2).begin(), r.dim(2).end(), grain_sizes[2] },
219 { r.dim(3).begin(), r.dim(3).end(), grain_sizes[3] } };
220 }
221
222 /*---------------------------------------------------------------------------*/
223 /*---------------------------------------------------------------------------*/
224
226 _fromTBBRange(const blocked_nd_range<Int32, 2>& r)
227 {
228 using BoundsType = ArrayBounds<MDDim2>;
229 using ArrayExtentType = BoundsType::ArrayExtentType;
230
231 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin()));
232 auto s0 = static_cast<Int32>(r.dim(0).size());
233 auto s1 = static_cast<Int32>(r.dim(1).size());
234 BoundsType sizes(ArrayExtentType(s0, s1));
235 return { lower_bounds, sizes };
236 }
237
239 _fromTBBRange(const blocked_nd_range<Int32, 3>& r)
240 {
241 using BoundsType = ArrayBounds<MDDim3>;
242 using ArrayExtentType = BoundsType::ArrayExtentType;
243
244 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin(), r.dim(2).begin()));
245 auto s0 = static_cast<Int32>(r.dim(0).size());
246 auto s1 = static_cast<Int32>(r.dim(1).size());
247 auto s2 = static_cast<Int32>(r.dim(2).size());
248 BoundsType sizes(ArrayExtentType(s0, s1, s2));
249 return { lower_bounds, sizes };
250 }
251
253 _fromTBBRange(const blocked_nd_range<Int32, 4>& r)
254 {
255 using BoundsType = ArrayBounds<MDDim4>;
256 using ArrayExtentType = typename BoundsType::ArrayExtentType;
257
258 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin(), r.dim(2).begin(), r.dim(3).begin()));
259 auto s0 = static_cast<Int32>(r.dim(0).size());
260 auto s1 = static_cast<Int32>(r.dim(1).size());
261 auto s2 = static_cast<Int32>(r.dim(2).size());
262 auto s3 = static_cast<Int32>(r.dim(3).size());
263 BoundsType sizes(ArrayExtentType(s0, s1, s2, s3));
264 return { lower_bounds, sizes };
265 }
266
267} // namespace
268
269/*---------------------------------------------------------------------------*/
270/*---------------------------------------------------------------------------*/
271
272class OneTBBTaskFunctor
273{
274 public:
275
276 OneTBBTaskFunctor(ITaskFunctor* functor, ITask* task)
277 : m_functor(functor)
278 , m_task(task)
279 {}
280
281 public:
282
283 void operator()() const
284 {
285 if (m_functor) {
286 ITaskFunctor* tf = m_functor;
287 m_functor = nullptr;
288 TaskContext task_context(m_task);
289 //cerr << "FUNC=" << typeid(*tf).name();
290 tf->executeFunctor(task_context);
291 }
292 }
293
294 public:
295
296 mutable ITaskFunctor* m_functor;
297 ITask* m_task;
298};
299
300/*---------------------------------------------------------------------------*/
301/*---------------------------------------------------------------------------*/
302
303class OneTBBTask
304: public ITask
305{
306 public:
307
308 static const int FUNCTOR_CLASS_SIZE = 32;
309
310 public:
311
312 explicit OneTBBTask(ITaskFunctor* f)
313 : m_functor(f)
314 {
315 m_functor = f->clone(m_functor_buf.data(), FUNCTOR_CLASS_SIZE);
316 }
317
318 public:
319
320 OneTBBTaskFunctor taskFunctor() { return OneTBBTaskFunctor(m_functor, this); }
321 void launchAndWait() override;
322 void launchAndWait(ConstArrayView<ITask*> tasks) override;
323
324 protected:
325
326 ITask* _createChildTask(ITaskFunctor* functor) override;
327
328 public:
329
330 ITaskFunctor* m_functor = nullptr;
332};
333using TBBTask = OneTBBTask;
334
335/*---------------------------------------------------------------------------*/
336/*---------------------------------------------------------------------------*/
337
338/*
339 * Do not use the local observer on the task_arena.
340 * Use the global observer on the scheduler.
341 * For the ID, use tbb::this_task_arena::current_thread_index().
342 */
343class TBBTaskImplementation
344: public ITaskImplementation
345{
346 class Impl;
347 class ParallelForExecute;
348 template <int RankValue>
349 class MDParallelForExecute;
350
351 public:
352
353 // For performance reasons, aligns to a cache line
354 // and uses padding.
355 class ARCCORE_ALIGNAS_PACKED(64) TaskThreadInfo
356 {
357 public:
358
359 TaskThreadInfo()
360 : m_task_index(-1)
361 {}
362
363 public:
364
365 void setTaskIndex(Integer v) { m_task_index = v; }
366 Integer taskIndex() const { return m_task_index; }
367
368 private:
369
370 Integer m_task_index;
371 };
372
373 /*!
374 * \brief Class for positioning TaskThreadInfo::taskIndex().
375 *
376 * Allows positioning the value of TaskThreadInfo::taskIndex()
377 * during construction and restoring the previous value
378 * in the destructor.
379 */
380 class TaskInfoLockGuard
381 {
382 public:
383
384 TaskInfoLockGuard(TaskThreadInfo* tti, Integer task_index)
385 : m_tti(tti)
386 , m_old_task_index(-1)
387 {
388 if (tti) {
389 m_old_task_index = tti->taskIndex();
390 tti->setTaskIndex(task_index);
391 }
392 }
393 ~TaskInfoLockGuard()
394 {
395 if (m_tti)
396 m_tti->setTaskIndex(m_old_task_index);
397 }
398
399 private:
400
401 TaskThreadInfo* m_tti;
402 Integer m_old_task_index;
403 };
404
405 public:
406
407 TBBTaskImplementation() = default;
408 ~TBBTaskImplementation() override;
409
410 public:
411
412 void build() {}
413 void initialize(Int32 nb_thread) override;
414 void terminate() override;
415
417 {
418 OneTBBTask* t = new OneTBBTask(f);
419 return t;
420 }
421
422 void executeParallelFor(Int32 begin, Int32 size, const ParallelLoopOptions& options, IRangeFunctor* f) final;
423 void executeParallelFor(Int32 begin, Int32 size, Integer grain_size, IRangeFunctor* f) final;
424 void executeParallelFor(Int32 begin, Int32 size, IRangeFunctor* f) final
425 {
427 }
428 void executeParallelFor(const ParallelFor1DLoopInfo& loop_info) override;
429
431 const ForLoopRunInfo& run_info,
432 IMDRangeFunctor<1>* functor) final
433 {
434 _executeMDParallelFor<1>(loop_ranges, functor, run_info);
435 }
437 const ForLoopRunInfo& run_info,
438 IMDRangeFunctor<2>* functor) final
439 {
440 _executeMDParallelFor<2>(loop_ranges, functor, run_info);
441 }
443 const ForLoopRunInfo& run_info,
444 IMDRangeFunctor<3>* functor) final
445 {
446 _executeMDParallelFor<3>(loop_ranges, functor, run_info);
447 }
449 const ForLoopRunInfo& run_info,
450 IMDRangeFunctor<4>* functor) final
451 {
452 _executeMDParallelFor<4>(loop_ranges, functor, run_info);
453 }
454
455 bool isActive() const final
456 {
457 return m_is_active;
458 }
459
461 {
462 return (nbAllowedThread() <= 1) ? 0 : _currentTaskTreadIndex();
463 }
464
465 Int32 currentTaskIndex() const final;
466
467 void printInfos(std::ostream& o) const final;
468
469 public:
470
471 /*!
472 * \brief Instance of \a TaskThreadInfo associated with the current thread.
473 *
474 * May be null if the current thread is not associated with a TBB thread
475 * or if it is outside the execution of a task or a parallel loop.
476 */
477 TaskThreadInfo* currentTaskThreadInfo() const;
478
479 private:
480
481 bool m_is_active = false;
482 Impl* m_p = nullptr;
483
484 private:
485
486 template <int RankValue> void
487 _executeMDParallelFor(const ComplexForLoopRanges<RankValue>& loop_ranges,
488 IMDRangeFunctor<RankValue>* functor,
489 const ForLoopRunInfo& run_info);
490 void _executeParallelFor(const ParallelFor1DLoopInfo& loop_info);
491};
492
493/*---------------------------------------------------------------------------*/
494/*---------------------------------------------------------------------------*/
495
496class TBBTaskImplementation::Impl
497{
498 class TaskObserver
499 : public tbb::task_scheduler_observer
500 {
501 public:
502
503 explicit TaskObserver(TBBTaskImplementation::Impl* p)
504 : tbb::task_scheduler_observer(p->m_main_arena)
505 , m_p(p)
506 {
507 }
508 void on_scheduler_entry(bool is_worker) override
509 {
510 m_p->notifyThreadCreated(is_worker);
511 }
512 void on_scheduler_exit(bool is_worker) override
513 {
514 m_p->notifyThreadDestroyed(is_worker);
515 }
517 };
518
519 public:
520
521 Impl()
522 : m_task_observer(this)
523 , m_thread_task_infos(cache_line_size)
524 {
525 m_nb_allowed_thread = tbb::info::default_concurrency();
526 _init();
527 }
528 Impl(Int32 nb_thread)
529 : m_main_arena(nb_thread)
530 , m_task_observer(this)
531 , m_thread_task_infos(cache_line_size)
532 {
533 m_nb_allowed_thread = nb_thread;
534 _init();
535 }
536
537 public:
538
539 Int32 nbAllowedThread() const { return m_nb_allowed_thread; }
540 TaskThreadInfo* threadTaskInfo(Integer index) { return &m_thread_task_infos[index]; }
541
542 private:
543
544 Int32 m_nb_allowed_thread = 0;
545
546 public:
547
548 void terminate()
549 {
550 for (auto x : m_sub_arena_list) {
551 if (x)
552 x->terminate();
553 delete x;
554 }
555 m_sub_arena_list.clear();
556 m_main_arena.terminate();
557 m_task_observer.observe(false);
558 oneapi::tbb::finalize(m_task_scheduler_handle);
559 }
560
561 public:
562
563 void notifyThreadCreated(bool is_worker)
564 {
565 std::thread::id my_thread_id = std::this_thread::get_id();
566
567 // With OneTBB, this method is called every time we enter
568 // our 'task_arena'. Since the notification method should only be called once,
569 // we use a set to keep track of the threads already created.
570 // NOTE: This method cannot be used with the historical TBB version
571 // (2018) because this 'contains' method does not exist
572 if (m_constructed_thread_map.contains(my_thread_id))
573 return;
574 m_constructed_thread_map.insert(my_thread_id);
575
576 {
577 if (TaskFactory::verboseLevel() >= 1) {
578 std::ostringstream ostr;
579 ostr << "TBB: CREATE THREAD"
580 << " nb_allowed=" << m_nb_allowed_thread
581 << " tbb_default_allowed=" << tbb::info::default_concurrency()
582 << " id=" << my_thread_id
583 << " arena_id=" << _currentTaskTreadIndex()
584 << " is_worker=" << is_worker
585 << "\n";
586 std::cout << ostr.str();
587 }
588 TaskFactoryInternal::notifyThreadCreated();
589 }
590 }
591
592 void notifyThreadDestroyed([[maybe_unused]] bool is_worker)
593 {
594 // With OneTBB, this method is called every time we exit
595 // the main arena. Therefore, it does not truly correspond to a
596 // thread destruction. So we do nothing for this notification.
597 // TODO: Look into how we can be notified of the actual thread destruction.
598 }
599
600 private:
601
602#if TBB_VERSION_MAJOR > 2021 || (TBB_VERSION_MAJOR == 2021 && TBB_VERSION_MINOR > 5)
603 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = oneapi::tbb::attach();
604#else
605 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = tbb::task_scheduler_handle::get();
606#endif
607
608 public:
609
610 tbb::task_arena m_main_arena;
611 //! Array whose i-th element contains the tbb::task_arena for \a i thread.
612 std::vector<tbb::task_arena*> m_sub_arena_list;
613
614 private:
615
616 TaskObserver m_task_observer;
617 std::mutex m_thread_created_mutex;
618 std::vector<TaskThreadInfo> m_thread_task_infos;
619 tbb::concurrent_set<std::thread::id> m_constructed_thread_map;
620 void _init()
621 {
622 ConcurrencyBase::_setMaxAllowedThread(m_nb_allowed_thread);
623
624 if (TaskFactory::verboseLevel() >= 1) {
625 std::cout << "TBB: TBBTaskImplementationInit nb_allowed_thread=" << m_nb_allowed_thread
626 << " id=" << std::this_thread::get_id()
627 << " version=" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR
628 << "\n";
629 }
630 m_thread_task_infos.resize(m_nb_allowed_thread);
631 m_task_observer.observe(true);
632 Integer max_arena_size = m_nb_allowed_thread;
633 // Artificially limit the number of tbb::task_arena
634 // to avoid having too many allocated objects.
635 if (max_arena_size > 512)
636 max_arena_size = 512;
637 if (max_arena_size < 2)
638 max_arena_size = 2;
639 m_sub_arena_list.resize(max_arena_size);
640 m_sub_arena_list[0] = m_sub_arena_list[1] = nullptr;
641 for (Integer i = 2; i < max_arena_size; ++i)
642 m_sub_arena_list[i] = new tbb::task_arena(i);
643 }
644};
645
646/*---------------------------------------------------------------------------*/
647/*---------------------------------------------------------------------------*/
648
649/*!
650 * \brief Executor for a 1D loop.
651 */
652class TBBParallelFor
653{
654 public:
655
656 TBBParallelFor(IRangeFunctor* f, Int32 nb_allowed_thread, ForLoopOneExecStat* stat_info)
657 : m_functor(f)
658 , m_stat_info(stat_info)
659 , m_nb_allowed_thread(nb_allowed_thread)
660 {}
661
662 public:
663
664 void operator()(tbb::blocked_range<Integer>& range) const
665 {
666#ifdef ARCCORE_CHECK
667 if (TaskFactory::verboseLevel() >= 3) {
668 std::ostringstream o;
669 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
670 << " id=" << std::this_thread::get_id()
671 << " max_allowed=" << m_nb_allowed_thread
672 << " range_begin=" << range.begin() << " range_size=" << range.size()
673 << "\n";
674 std::cout << o.str();
675 std::cout.flush();
676 }
677
678 int tbb_index = _currentTaskTreadIndex();
679 if (tbb_index < 0 || tbb_index >= m_nb_allowed_thread)
680 ARCCORE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
681 tbb_index, m_nb_allowed_thread);
682#endif
683
684 if (m_stat_info)
685 m_stat_info->incrementNbChunk();
686 m_functor->executeFunctor(range.begin(), CheckedConvert::toInteger(range.size()));
687 }
688
689 private:
690
691 IRangeFunctor* m_functor;
692 ForLoopOneExecStat* m_stat_info = nullptr;
693 Int32 m_nb_allowed_thread;
694};
695
696/*---------------------------------------------------------------------------*/
697/*---------------------------------------------------------------------------*/
698
699/*!
700 * \brief Executor for a multi-dimensional loop.
701 */
702template <int RankValue>
703class TBBMDParallelFor
704{
705 public:
706
707 TBBMDParallelFor(IMDRangeFunctor<RankValue>* f, Int32 nb_allowed_thread, ForLoopOneExecStat* stat_info)
708 : m_functor(f)
709 , m_stat_info(stat_info)
710 , m_nb_allowed_thread(nb_allowed_thread)
711 {}
712
713 public:
714
715 void operator()(blocked_nd_range<Int32, RankValue>& range) const
716 {
717#ifdef ARCCORE_CHECK
718 if (TaskFactory::verboseLevel() >= 3) {
719 std::ostringstream o;
720 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
721 << " id=" << std::this_thread::get_id()
722 << " max_allowed=" << m_nb_allowed_thread
723 << " MDFor ";
724 for (Int32 i = 0; i < RankValue; ++i) {
725 auto r0 = static_cast<Int32>(range.dim(i).begin());
726 auto r1 = static_cast<Int32>(range.dim(i).size());
727 o << " range" << i << " (begin=" << r0 << " size=" << r1 << ")";
728 }
729 o << "\n";
730 std::cout << o.str();
731 std::cout.flush();
732 }
733
734 int tbb_index = _currentTaskTreadIndex();
735 if (tbb_index < 0 || tbb_index >= m_nb_allowed_thread)
736 ARCCORE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
737 tbb_index, m_nb_allowed_thread);
738#endif
739
740 if (m_stat_info)
741 m_stat_info->incrementNbChunk();
742 m_functor->executeFunctor(_fromTBBRange(range));
743 }
744
745 private:
746
747 IMDRangeFunctor<RankValue>* m_functor = nullptr;
748 ForLoopOneExecStat* m_stat_info = nullptr;
749 Int32 m_nb_allowed_thread;
750};
751
752/*---------------------------------------------------------------------------*/
753/*---------------------------------------------------------------------------*/
754
755/*!
756 * \brief Deterministic implementation of ParallelFor.
757 *
758 * The implementation is deterministic in the sense that it only depends on
759 * the iteration interval (m_begin_index and m_size),
760 * the specified number of threads (\a m_nb_thread), and the grain size
761 * (\a m_grain_size).
762 *
763 * The algorithm used is similar to the one used by OpenMP for a
764 * parallel for with the static option: the iteration interval
765 * is divided into several blocks and each block is assigned to a task based
766 * on a round-robin algorithm.
767 * To determine the number of blocks, two cases are possible:
768 * - if \a m_grain_size is not specified, the iteration interval
769 * is divided into a number of blocks equal to the number of threads used.
770 * - if \a m_grain_size is specified, the number of blocks will be equal
771 * to \a m_size divided by \a m_grain_size.
772 */
773class TBBDeterministicParallelFor
774{
775 public:
776
777 TBBDeterministicParallelFor(TBBTaskImplementation* impl, const TBBParallelFor& tbb_for,
778 Integer begin_index, Integer size, Integer grain_size, Integer nb_thread)
779 : m_impl(impl)
780 , m_tbb_for(tbb_for)
781 , m_nb_thread(nb_thread)
782 , m_begin_index(begin_index)
783 , m_size(size)
784 , m_grain_size(grain_size)
785 , m_nb_block(0)
786 , m_block_size(0)
787 , m_nb_block_per_thread(0)
788 {
789 if (m_nb_thread < 1)
790 m_nb_thread = 1;
791
792 if (m_grain_size > 0) {
793 m_block_size = m_grain_size;
794 if (m_block_size > 0) {
795 m_nb_block = m_size / m_block_size;
796 if ((m_size % m_block_size) != 0)
797 ++m_nb_block;
798 }
799 else
800 m_nb_block = 1;
801 m_nb_block_per_thread = m_nb_block / m_nb_thread;
802 if ((m_nb_block % m_nb_thread) != 0)
803 ++m_nb_block_per_thread;
804 }
805 else {
806 if (m_nb_block < 1)
807 m_nb_block = m_nb_thread;
808 m_block_size = m_size / m_nb_block;
809 m_nb_block_per_thread = 1;
810 }
811 if (TaskFactory::verboseLevel() >= 2) {
812 std::cout << "TBBDeterministicParallelFor: BEGIN=" << m_begin_index << " size=" << m_size
813 << " grain_size=" << m_grain_size
814 << " nb_block=" << m_nb_block << " nb_thread=" << m_nb_thread
815 << " nb_block_per_thread=" << m_nb_block_per_thread
816 << " block_size=" << m_block_size
817 << " block_size*nb_block=" << m_block_size * m_nb_block << '\n';
818 }
819 }
820
821 public:
822
823 /*!
824 * \brief Operator for a given thread.
825 *
826 * Generally, range.size() will be one, because a thread will only
827 * process one iteration, but this is not guaranteed by TBB.
828 */
829 void operator()(tbb::blocked_range<Integer>& range) const
830 {
831 auto nb_iter = static_cast<Integer>(range.size());
832 for (Integer i = 0; i < nb_iter; ++i) {
833 Integer task_id = range.begin() + i;
834 for (Integer k = 0, kn = m_nb_block_per_thread; k < kn; ++k) {
835 Integer block_id = task_id + (k * m_nb_thread);
836 if (block_id < m_nb_block)
837 _doBlock(task_id, block_id);
838 }
839 }
840 }
841
842 void _doBlock(Integer task_id, Integer block_id) const
843 {
844 TBBTaskImplementation::TaskInfoLockGuard guard(m_impl->currentTaskThreadInfo(), task_id);
845
846 Integer iter_begin = block_id * m_block_size;
847 Integer iter_size = m_block_size;
848 if ((block_id + 1) == m_nb_block) {
849 // For the last block, the size is the number of remaining elements
850 iter_size = m_size - iter_begin;
851 }
852 iter_begin += m_begin_index;
853#ifdef ARCCORE_CHECK
854 if (TaskFactory::verboseLevel() >= 3) {
855 std::ostringstream o;
856 o << "TBB: DoBlock: BLOCK task_id=" << task_id << " block_id=" << block_id
857 << " iter_begin=" << iter_begin << " iter_size=" << iter_size << '\n';
858 std::cout << o.str();
859 std::cout.flush();
860 }
861#endif
862 if (iter_size > 0) {
863 auto r = tbb::blocked_range<int>(iter_begin, iter_begin + iter_size);
864 m_tbb_for(r);
865 }
866 }
867
868 private:
869
870 TBBTaskImplementation* m_impl;
871 const TBBParallelFor& m_tbb_for;
872 Integer m_nb_thread;
873 Integer m_begin_index;
874 Integer m_size;
875 Integer m_grain_size;
876 Integer m_nb_block;
877 Integer m_block_size;
878 Integer m_nb_block_per_thread;
879};
880
881/*---------------------------------------------------------------------------*/
882/*---------------------------------------------------------------------------*/
883
885{
886 public:
887
888 ParallelForExecute(TBBTaskImplementation* impl, const ParallelLoopOptions& options,
889 Integer begin, Integer size, IRangeFunctor* f, ForLoopOneExecStat* stat_info)
890 : m_impl(impl)
891 , m_begin(begin)
892 , m_size(size)
893 , m_functor(f)
894 , m_options(options)
895 , m_stat_info(stat_info)
896 {}
897
898 public:
899
900 void operator()() const
901 {
902 Integer nb_thread = m_options.maxThread();
903 TBBParallelFor pf(m_functor, nb_thread, m_stat_info);
904 Integer gsize = m_options.grainSize();
905 tbb::blocked_range<Integer> range(m_begin, m_begin + m_size);
906 if (TaskFactory::verboseLevel() >= 1)
907 std::cout << "TBB: TBBTaskImplementationInit ParallelForExecute begin=" << m_begin
908 << " size=" << m_size << " gsize=" << gsize
909 << " partitioner=" << (int)m_options.partitioner()
910 << " nb_thread=" << nb_thread
911 << " has_stat_info=" << (m_stat_info != nullptr)
912 << '\n';
913
914 if (gsize > 0)
915 range = tbb::blocked_range<Integer>(m_begin, m_begin + m_size, gsize);
916
917 if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Static) {
918 tbb::parallel_for(range, pf, tbb::static_partitioner());
919 }
920 else if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Deterministic) {
921 tbb::blocked_range<Integer> range2(0, nb_thread, 1);
922 TBBDeterministicParallelFor dpf(m_impl, pf, m_begin, m_size, gsize, nb_thread);
923 tbb::parallel_for(range2, dpf);
924 }
925 else
926 tbb::parallel_for(range, pf);
927 }
928
929 private:
930
931 TBBTaskImplementation* m_impl = nullptr;
932 Integer m_begin;
933 Integer m_size;
934 IRangeFunctor* m_functor = nullptr;
935 ParallelLoopOptions m_options;
936 ForLoopOneExecStat* m_stat_info = nullptr;
937};
938
939/*---------------------------------------------------------------------------*/
940/*---------------------------------------------------------------------------*/
941
942template <int RankValue>
943class TBBTaskImplementation::MDParallelForExecute
944{
945 public:
946
947 MDParallelForExecute(TBBTaskImplementation* impl,
948 const ParallelLoopOptions& options,
950 IMDRangeFunctor<RankValue>* f, [[maybe_unused]] ForLoopOneExecStat* stat_info)
951 : m_impl(impl)
952 , m_tbb_range(_toTBBRange(range))
953 , m_functor(f)
954 , m_options(options)
955 , m_stat_info(stat_info)
956 {
957 // We cannot modify the values of a tbb::blocked_rangeNd instance.
958 // We must therefore reconstruct it completely.
959 FixedArray<size_t, RankValue> all_grain_sizes;
960 Int32 gsize = m_options.grainSize();
961 if (gsize > 0) {
962 // If the grain size is not zero, it must be distributed
963 // across all dimensions. We start with the last one.
964 // TODO: check why performance is sometimes
965 // lower than what we get using a static partitioner.
966 constexpr bool is_verbose = false;
967 std::array<Int32, RankValue> range_extents = range.extents().asStdArray();
968 double ratio = static_cast<double>(gsize) / static_cast<double>(range.nbElement());
969 if constexpr (is_verbose) {
970 std::cout << "GSIZE=" << gsize << " rank=" << RankValue << " ratio=" << ratio;
971 for (Int32 i = 0; i < RankValue; ++i)
972 std::cout << " range" << i << "=" << range_extents[i];
973 std::cout << "\n";
974 }
975 Int32 index = RankValue - 1;
976 Int32 remaining_grain = gsize;
977 for (; index >= 0; --index) {
978 Int32 current = range_extents[index];
979 if constexpr (is_verbose)
980 std::cout << "Check index=" << index << " remaining=" << remaining_grain << " current=" << current << "\n";
981 if (remaining_grain > current) {
982 all_grain_sizes[index] = current;
983 remaining_grain /= current;
984 }
985 else {
986 all_grain_sizes[index] = remaining_grain;
987 break;
988 }
989 }
990 for (Int32 i = 0; i < index; ++i)
991 all_grain_sizes[i] = 1;
992 if constexpr (is_verbose) {
993 for (Int32 i = 0; i < RankValue; ++i)
994 std::cout << " grain" << i << "=" << all_grain_sizes[i];
995 std::cout << "\n";
996 }
997 m_tbb_range = _toTBBRangeWithGrain(m_tbb_range, all_grain_sizes);
998 }
999 }
1000
1001 public:
1002
1003 void operator()() const
1004 {
1005 Integer nb_thread = m_options.maxThread();
1006 TBBMDParallelFor<RankValue> pf(m_functor, nb_thread, m_stat_info);
1007
1008 if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Static) {
1009 tbb::parallel_for(m_tbb_range, pf, tbb::static_partitioner());
1010 }
1011 else if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Deterministic) {
1012 // TODO: implement deterministic mode
1013 ARCCORE_THROW(NotImplementedException, "ParallelLoopOptions::Partitioner::Deterministic for multi-dimensionnal loops");
1014 //tbb::blocked_range<Integer> range2(0,nb_thread,1);
1015 //TBBDeterministicParallelFor dpf(m_impl,pf,m_begin,m_size,gsize,nb_thread);
1016 //tbb::parallel_for(range2,dpf);
1017 }
1018 else {
1019 tbb::parallel_for(m_tbb_range, pf);
1020 }
1021 }
1022
1023 private:
1024
1025 TBBTaskImplementation* m_impl = nullptr;
1026 blocked_nd_range<Int32, RankValue> m_tbb_range;
1027 IMDRangeFunctor<RankValue>* m_functor = nullptr;
1028 ParallelLoopOptions m_options;
1029 ForLoopOneExecStat* m_stat_info = nullptr;
1030};
1031
1032/*---------------------------------------------------------------------------*/
1033/*---------------------------------------------------------------------------*/
1034
1035TBBTaskImplementation::
1036~TBBTaskImplementation()
1037{
1038 delete m_p;
1039}
1040
1041/*---------------------------------------------------------------------------*/
1042/*---------------------------------------------------------------------------*/
1043
1044void TBBTaskImplementation::
1045initialize(Int32 nb_thread)
1046{
1047 if (nb_thread < 0)
1048 nb_thread = 0;
1049 m_is_active = (nb_thread != 1);
1050 if (nb_thread != 0)
1051 m_p = new Impl(nb_thread);
1052 else
1053 m_p = new Impl();
1054 ParallelLoopOptions opts = TaskFactory::defaultParallelLoopOptions();
1055 opts.setMaxThread(nbAllowedThread());
1057}
1058
1059/*---------------------------------------------------------------------------*/
1060/*---------------------------------------------------------------------------*/
1061
1062void TBBTaskImplementation::
1063terminate()
1064{
1065 m_p->terminate();
1066}
1067
1068/*---------------------------------------------------------------------------*/
1069/*---------------------------------------------------------------------------*/
1070
1072printInfos(std::ostream& o) const
1073{
1074 o << "OneTBBTaskImplementation"
1075 << " version=" << TBB_VERSION_STRING
1076 << " interface=" << TBB_INTERFACE_VERSION
1077 << " runtime_interface=" << TBB_runtime_interface_version();
1078}
1079
1080/*---------------------------------------------------------------------------*/
1081/*---------------------------------------------------------------------------*/
1082
1083void TBBTaskImplementation::
1084_executeParallelFor(const ParallelFor1DLoopInfo& loop_info)
1085{
1086 ScopedExecInfo sei(loop_info.runInfo());
1087 ForLoopOneExecStat* stat_info = sei.statInfo();
1088 ::Arcane::Impl::ScopedStatLoop scoped_loop(sei.isOwn() ? stat_info : nullptr);
1089
1090 Int32 begin = loop_info.beginIndex();
1091 Int32 size = loop_info.size();
1092 ParallelLoopOptions options = loop_info.runInfo().options().value_or(TaskFactory::defaultParallelLoopOptions());
1093 IRangeFunctor* f = loop_info.functor();
1095
1096 Integer max_thread = options.maxThread();
1097 Integer nb_allowed_thread = m_p->nbAllowedThread();
1098 if (max_thread < 0)
1099 max_thread = nb_allowed_thread;
1100
1101 if (TaskFactory::verboseLevel() >= 1)
1102 std::cout << "TBB: TBBTaskImplementation executeParallelFor begin=" << begin
1103 << " size=" << size << " max_thread=" << max_thread
1104 << " grain_size=" << options.grainSize()
1105 << " nb_allowed=" << nb_allowed_thread << '\n';
1106
1107 // In sequential execution, call the method \a f directly.
1108 if (max_thread == 1 || max_thread == 0) {
1109 f->executeFunctor(begin, size);
1110 return;
1111 }
1112
1113 // Replace the uninitialized values of \a options with those of \a m_default_loop_options
1114 ParallelLoopOptions true_options(options);
1115 true_options.mergeUnsetValues(TaskFactory::defaultParallelLoopOptions());
1116 true_options.setMaxThread(max_thread);
1117
1118 ParallelForExecute pfe(this, true_options, begin, size, f, stat_info);
1119
1120 tbb::task_arena* used_arena = nullptr;
1121 if (max_thread < nb_allowed_thread && max_thread < m_p->m_sub_arena_list.size())
1122 used_arena = m_p->m_sub_arena_list[max_thread];
1123 if (!used_arena)
1124 used_arena = &(m_p->m_main_arena);
1125 used_arena->execute(pfe);
1126}
1127
1128/*---------------------------------------------------------------------------*/
1129/*---------------------------------------------------------------------------*/
1130
1133{
1134 _executeParallelFor(loop_info);
1135}
1136
1137/*---------------------------------------------------------------------------*/
1138/*---------------------------------------------------------------------------*/
1139
1140/*!
1141 * \brief Execution of an N-dimensional loop.
1142 *
1143 * \warning The current implementation does not take into account \a options
1144 * for loops other than one dimension.
1145 */
1146template <int RankValue> void TBBTaskImplementation::
1147_executeMDParallelFor(const ComplexForLoopRanges<RankValue>& loop_ranges,
1149 const ForLoopRunInfo& run_info)
1150{
1151 ParallelLoopOptions options;
1152 if (run_info.options().has_value())
1153 options = run_info.options().value();
1154
1155 ScopedExecInfo sei(run_info);
1156 ForLoopOneExecStat* stat_info = sei.statInfo();
1157 ::Arcane::Impl::ScopedStatLoop scoped_loop(sei.isOwn() ? stat_info : nullptr);
1158
1159 if (TaskFactory::verboseLevel() >= 1) {
1160 std::cout << "TBB: TBBTaskImplementation executeMDParallelFor nb_dim=" << RankValue
1161 << " nb_element=" << loop_ranges.nbElement()
1162 << " grain_size=" << options.grainSize()
1163 << " name=" << run_info.traceInfo().traceInfo()
1164 << " has_stat_info=" << (stat_info != nullptr)
1165 << '\n';
1166 }
1167
1168 Integer max_thread = options.maxThread();
1169 // In sequential execution, call the method \a f directly.
1170 if (max_thread == 1 || max_thread == 0) {
1171 functor->executeFunctor(loop_ranges);
1172 return;
1173 }
1174
1175 // Replace the uninitialized values of \a options with those of \a m_default_loop_options
1176 ParallelLoopOptions true_options(options);
1177 true_options.mergeUnsetValues(TaskFactory::defaultParallelLoopOptions());
1178
1179 Integer nb_allowed_thread = m_p->nbAllowedThread();
1180 if (max_thread < 0)
1181 max_thread = nb_allowed_thread;
1182 tbb::task_arena* used_arena = nullptr;
1183 if (max_thread < nb_allowed_thread)
1184 used_arena = m_p->m_sub_arena_list[max_thread];
1185 if (!used_arena)
1186 used_arena = &(m_p->m_main_arena);
1187
1188 // For now for dimension 1, use the historical 'ParallelForExecute'
1189 if constexpr (RankValue == 1) {
1190 auto range_1d = _toTBBRange(loop_ranges);
1191 auto x1 = [&](Integer begin, Integer size) {
1192 functor->executeFunctor(makeLoopRanges(ForLoopRange(begin, size)));
1193 //functor->executeFunctor(ComplexForLoopRanges<1>(begin,size));
1194 };
1195 LambdaRangeFunctorT<decltype(x1)> functor_1d(x1);
1196 Integer begin1 = CheckedConvert::toInteger(range_1d.dim(0).begin());
1197 Integer size1 = CheckedConvert::toInteger(range_1d.dim(0).size());
1198 ParallelForExecute pfe(this, true_options, begin1, size1, &functor_1d, stat_info);
1199 used_arena->execute(pfe);
1200 }
1201 else {
1202 MDParallelForExecute<RankValue> pfe(this, true_options, loop_ranges, functor, stat_info);
1203 used_arena->execute(pfe);
1204 }
1205}
1206
1207/*---------------------------------------------------------------------------*/
1208/*---------------------------------------------------------------------------*/
1209
1211executeParallelFor(Integer begin, Integer size, Integer grain_size, IRangeFunctor* f)
1212{
1214 opts.setGrainSize(grain_size);
1215 ForLoopRunInfo run_info(opts);
1216 executeParallelFor(ParallelFor1DLoopInfo(begin, size, f, run_info));
1217}
1218
1219/*---------------------------------------------------------------------------*/
1220/*---------------------------------------------------------------------------*/
1221
1227
1228/*---------------------------------------------------------------------------*/
1229/*---------------------------------------------------------------------------*/
1230
1231TBBTaskImplementation::TaskThreadInfo* TBBTaskImplementation::
1233{
1234 Int32 thread_id = currentTaskThreadIndex();
1235 if (thread_id >= 0)
1236 return m_p->threadTaskInfo(thread_id);
1237 return nullptr;
1238}
1239
1240/*---------------------------------------------------------------------------*/
1241/*---------------------------------------------------------------------------*/
1242
1244currentTaskIndex() const
1245{
1246 Int32 thread_id = currentTaskThreadIndex();
1247 // This test was added to bypass a bug in one of the versions
1248 // of OneTBB. It is probably useless today (2025)
1249 if (thread_id < 0 || thread_id >= m_p->nbAllowedThread())
1250 return 0;
1251 TBBTaskImplementation::TaskThreadInfo* tti = currentTaskThreadInfo();
1252 if (tti) {
1253 Int32 task_index = tti->taskIndex();
1254 if (task_index >= 0)
1255 return task_index;
1256 }
1257 return thread_id;
1258}
1259
1260/*---------------------------------------------------------------------------*/
1261/*---------------------------------------------------------------------------*/
1262
1265{
1266 tbb::task_group task_group;
1267 task_group.run(taskFunctor());
1268 task_group.wait();
1269 delete this;
1270}
1271
1272/*---------------------------------------------------------------------------*/
1273/*---------------------------------------------------------------------------*/
1274
1277{
1278 tbb::task_group task_group;
1279 Integer n = tasks.size();
1280 if (n == 0)
1281 return;
1282
1283 //set_ref_count(n+1);
1284 for (Integer i = 0; i < n; ++i) {
1285 auto* t = static_cast<OneTBBTask*>(tasks[i]);
1286 task_group.run(t->taskFunctor());
1287 }
1288 task_group.wait();
1289 for (Integer i = 0; i < n; ++i) {
1290 auto* t = static_cast<OneTBBTask*>(tasks[i]);
1291 delete t;
1292 }
1293}
1294
1295/*---------------------------------------------------------------------------*/
1296/*---------------------------------------------------------------------------*/
1297
1298ITask* OneTBBTask::
1299_createChildTask(ITaskFunctor* functor)
1300{
1301 auto* t = new OneTBBTask(functor);
1302 return t;
1303}
1304
1305/*---------------------------------------------------------------------------*/
1306/*---------------------------------------------------------------------------*/
1307
1308ARCANE_DI_REGISTER_PROVIDER(TBBTaskImplementation,
1309 DependencyInjection::ProviderProperty("TBBTaskImplementation"),
1310 ARCANE_DI_INTERFACES(ITaskImplementation),
1311 ARCANE_DI_EMPTY_CONSTRUCTOR());
1312
1313/*---------------------------------------------------------------------------*/
1314/*---------------------------------------------------------------------------*/
1315
1316} // End namespace Arcane
1317
1318/*---------------------------------------------------------------------------*/
1319/*---------------------------------------------------------------------------*/
#define ARCCORE_FATAL(...)
Macro throwing a FatalErrorException.
#define ARCCORE_THROW(exception_class,...)
Macro to throw an exception with formatting.
#define ARCCORE_CHECK_POINTER(ptr)
Macro that returns the pointer ptr if it is not null or throws an exception if it is null.
Constant view of an array of type T.
constexpr Integer size() const noexcept
Number of elements in the array.
Class to manage the profiling of a single loop execution.
Loop execution information.
Interface of a functor on a multi-dimensional iteration interval of dimension RankValue.
virtual void executeFunctor(const ComplexForLoopRanges< RankValue > &loop_range)=0
Executes the associated method.
Interface of a functor on an iteration interval.
virtual void executeFunctor(Int32 begin, Int32 size)=0
Executes the associated method.
virtual void executeFunctor(const TaskContext &tc)=0
Executes the associated method.
Int32 nbAllowedThread() const
Maximum number of threads used to manage tasks.
Interface for a concurrent task.
Definition Task.h:194
Class allowing retrieval of the time spent between the constructor call and the destructor call.
void launchAndWait() override
Launches the task and blocks until it finishes.
Characteristics of a multi-thread 1D loop.
Definition ParallelFor.h:35
Execution options for a parallel loop in multi-threading.
Integer grainSize() const
Size of an iteration interval.
Int32 maxThread() const
Maximum number of allowed threads.
void setGrainSize(Integer v)
Sets the size (approximate) of an iteration interval.
static bool hasProfiling()
Indicates if profiling is active.
Deterministic implementation of ParallelFor.
void operator()(tbb::blocked_range< Integer > &range) const
Operator for a given thread.
Executor for a 1D loop.
std::vector< tbb::task_arena * > m_sub_arena_list
Array whose i-th element contains the tbb::task_arena for i thread.
Class for positioning TaskThreadInfo::taskIndex().
Int32 currentTaskThreadIndex() const final
Implementation of TaskFactory::currentTaskThreadIndex().
void executeParallelFor(const ComplexForLoopRanges< 1 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 1 > *functor) final
Executes a 1D loop in parallel.
void executeParallelFor(Int32 begin, Int32 size, IRangeFunctor *f) final
Executes the functor f in parallel.
ITask * createRootTask(ITaskFunctor *f) override
Creates a root task. The implementation must copy the value of f, which is either a TaskFunctor or a ...
void printInfos(std::ostream &o) const final
Prints information about the runtime used.
void executeParallelFor(Int32 begin, Int32 size, const ParallelLoopOptions &options, IRangeFunctor *f) final
Executes the functor f in parallel.
void executeParallelFor(const ComplexForLoopRanges< 3 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 3 > *functor) final
Executes a 3D loop in parallel.
void executeParallelFor(const ComplexForLoopRanges< 4 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 4 > *functor) final
Executes a 4D loop in parallel.
TaskThreadInfo * currentTaskThreadInfo() const
Instance of TaskThreadInfo associated with the current thread.
bool isActive() const final
Indicates if the implementation is active.
Int32 currentTaskIndex() const final
Implementation of TaskFactory::currentTaskIndex().
void executeParallelFor(const ComplexForLoopRanges< 2 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 2 > *functor) final
Executes a 2D loop in parallel.
Execution context of a task.
Definition Task.h:50
static const ParallelLoopOptions & defaultParallelLoopOptions()
Default parallel loop execution options.
static Integer verboseLevel()
Verbosity level.
static void setDefaultParallelLoopOptions(const ParallelLoopOptions &v)
Sets the default parallel loop execution options.
static Int32 currentTaskThreadIndex()
Index (between 0 and nbAllowedThread()-1) of the thread executing the current task.
String getStackTrace()
Returns a string containing the call stack.
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --
Int32 Integer
Type representing an integer.
SimpleForLoopRanges< 1 > makeLoopRanges(Int32 n1)
Creates an iteration range [0,n1[, [0,n2[.
std::int32_t Int32
Signed integer type of 32 bits.