14#include "arccore/base/NotImplementedException.h"
15#include "arccore/base/IFunctor.h"
16#include "arccore/base/ForLoopRanges.h"
17#include "arccore/base/IObservable.h"
18#include "arccore/base/PlatformUtils.h"
19#include "arccore/base/FixedArray.h"
20#include "arccore/base/Profiling.h"
21#include "arccore/base/CheckedConvert.h"
22#include "arccore/base/FixedArray.h"
23#include "arccore/base/ForLoopRunInfo.h"
24#include "arccore/base/internal/DependencyInjection.h"
26#include "arccore/concurrency/IThreadImplementation.h"
27#include "arccore/concurrency/Task.h"
28#include "arccore/concurrency/ITaskImplementation.h"
29#include "arccore/concurrency/TaskFactory.h"
30#include "arccore/concurrency/ParallelFor.h"
31#include "arccore/concurrency/internal/TaskFactoryInternal.h"
39#define TBB_PREVIEW_BLOCKED_RANGE_ND 1
47#define TBB_PREVIEW_WAITING_FOR_WORKERS 1
49#include <oneapi/tbb/concurrent_set.h>
50#include <oneapi/tbb/global_control.h>
71#if (TBB_VERSION_MAJOR > 2022) || (TBB_VERSION_MAJOR == 2022 && TBB_VERSION_MINOR > 0) || defined __TBB_blocked_nd_range_H
75template <
typename Value,
unsigned int N>
76using blocked_nd_range = tbb::blocked_nd_range<Value, N>;
80template <
typename Value,
unsigned int N>
81using blocked_nd_range = tbb::blocked_rangeNd<Value, N>;
90 constexpr Int32 cache_line_size = 64;
105 explicit ScopedExecInfo(
const ForLoopRunInfo& run_info)
106 : m_run_info(run_info)
112 ForLoopOneExecStat* ptr = run_info.execStat();
114 m_stat_info_ptr = ptr;
115 m_use_own_run_info =
false;
118 m_stat_info_ptr = isStatActive() ? &m_stat_info :
nullptr;
122#ifdef PRINT_STAT_INFO
123 if (m_stat_info_ptr) {
124 bool is_valid = m_run_info.traceInfo().isValid();
126 std::cout <<
"ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
130 std::cout <<
"ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
131 <<
" trace_name=" << m_run_info.traceInfo().traceInfo().name() <<
"\n";
134 if (m_stat_info_ptr && m_use_own_run_info) {
135 ProfilingRegistry::_threadLocalForLoopInstance()->merge(*m_stat_info_ptr, m_run_info.traceInfo());
141 ForLoopOneExecStat* statInfo()
const {
return m_stat_info_ptr; }
142 bool isOwn()
const {
return m_use_own_run_info; }
146 ForLoopOneExecStat m_stat_info;
147 ForLoopOneExecStat* m_stat_info_ptr =
nullptr;
148 ForLoopRunInfo m_run_info;
150 bool m_use_own_run_info =
true;
156 inline int _currentTaskTreadIndex()
162 return tbb::this_task_arena::current_thread_index();
165 inline blocked_nd_range<Int32, 1>
168 return { { r.lowerBound<0>(), r.upperBound<0>() } };
171 inline blocked_nd_range<Int32, 2>
174 return { { r.lowerBound<0>(), r.upperBound<0>() },
175 { r.lowerBound<1>(), r.upperBound<1>() } };
178 inline blocked_nd_range<Int32, 3>
181 return { { r.lowerBound<0>(), r.upperBound<0>() },
182 { r.lowerBound<1>(), r.upperBound<1>() },
183 { r.lowerBound<2>(), r.upperBound<2>() } };
186 inline blocked_nd_range<Int32, 4>
189 return { { r.lowerBound<0>(), r.upperBound<0>() },
190 { r.lowerBound<1>(), r.upperBound<1>() },
191 { r.lowerBound<2>(), r.upperBound<2>() },
192 { r.lowerBound<3>(), r.upperBound<3>() } };
198 inline blocked_nd_range<Int32, 2>
201 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
202 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] } };
205 inline blocked_nd_range<Int32, 3>
208 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
209 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] },
210 { r.dim(2).begin(), r.dim(2).end(), grain_sizes[2] } };
213 inline blocked_nd_range<Int32, 4>
216 return { { r.dim(0).begin(), r.dim(0).end(), grain_sizes[0] },
217 { r.dim(1).begin(), r.dim(1).end(), grain_sizes[1] },
218 { r.dim(2).begin(), r.dim(2).end(), grain_sizes[2] },
219 { r.dim(3).begin(), r.dim(3).end(), grain_sizes[3] } };
226 _fromTBBRange(
const blocked_nd_range<Int32, 2>& r)
229 using ArrayExtentType = BoundsType::ArrayExtentType;
231 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin()));
232 auto s0 =
static_cast<Int32>(r.dim(0).size());
233 auto s1 =
static_cast<Int32>(r.dim(1).size());
234 BoundsType sizes(ArrayExtentType(s0, s1));
235 return { lower_bounds, sizes };
239 _fromTBBRange(
const blocked_nd_range<Int32, 3>& r)
242 using ArrayExtentType = BoundsType::ArrayExtentType;
244 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin(), r.dim(2).begin()));
245 auto s0 =
static_cast<Int32>(r.dim(0).size());
246 auto s1 =
static_cast<Int32>(r.dim(1).size());
247 auto s2 =
static_cast<Int32>(r.dim(2).size());
248 BoundsType sizes(ArrayExtentType(s0, s1, s2));
249 return { lower_bounds, sizes };
253 _fromTBBRange(
const blocked_nd_range<Int32, 4>& r)
256 using ArrayExtentType =
typename BoundsType::ArrayExtentType;
258 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(), r.dim(1).begin(), r.dim(2).begin(), r.dim(3).begin()));
259 auto s0 =
static_cast<Int32>(r.dim(0).size());
260 auto s1 =
static_cast<Int32>(r.dim(1).size());
261 auto s2 =
static_cast<Int32>(r.dim(2).size());
262 auto s3 =
static_cast<Int32>(r.dim(3).size());
263 BoundsType sizes(ArrayExtentType(s0, s1, s2, s3));
264 return { lower_bounds, sizes };
272class OneTBBTaskFunctor
283 void operator()()
const
308 static const int FUNCTOR_CLASS_SIZE = 32;
315 m_functor = f->clone(m_functor_buf.data(), FUNCTOR_CLASS_SIZE);
343class TBBTaskImplementation
348 template <
int RankValue>
349 class MDParallelForExecute;
355 class ARCCORE_ALIGNAS_PACKED(64) TaskThreadInfo
365 void setTaskIndex(
Integer v) { m_task_index = v; }
366 Integer taskIndex()
const {
return m_task_index; }
380 class TaskInfoLockGuard
384 TaskInfoLockGuard(TaskThreadInfo* tti,
Integer task_index)
386 , m_old_task_index(-1)
389 m_old_task_index = tti->taskIndex();
390 tti->setTaskIndex(task_index);
396 m_tti->setTaskIndex(m_old_task_index);
401 TaskThreadInfo* m_tti;
407 TBBTaskImplementation() =
default;
408 ~TBBTaskImplementation()
override;
413 void initialize(
Int32 nb_thread)
override;
414 void terminate()
override;
434 _executeMDParallelFor<1>(loop_ranges, functor, run_info);
440 _executeMDParallelFor<2>(loop_ranges, functor, run_info);
446 _executeMDParallelFor<3>(loop_ranges, functor, run_info);
452 _executeMDParallelFor<4>(loop_ranges, functor, run_info);
481 bool m_is_active = false;
486 template <
int RankValue>
void
496class TBBTaskImplementation::Impl
499 :
public tbb::task_scheduler_observer
504 : tbb::task_scheduler_observer(p->m_main_arena)
508 void on_scheduler_entry(
bool is_worker)
override
510 m_p->notifyThreadCreated(is_worker);
512 void on_scheduler_exit(
bool is_worker)
override
514 m_p->notifyThreadDestroyed(is_worker);
522 : m_task_observer(
this)
523 , m_thread_task_infos(cache_line_size)
525 m_nb_allowed_thread = tbb::info::default_concurrency();
528 Impl(
Int32 nb_thread)
529 : m_main_arena(nb_thread)
530 , m_task_observer(
this)
531 , m_thread_task_infos(cache_line_size)
533 m_nb_allowed_thread = nb_thread;
540 TaskThreadInfo* threadTaskInfo(
Integer index) {
return &m_thread_task_infos[index]; }
544 Int32 m_nb_allowed_thread = 0;
556 m_main_arena.terminate();
557 m_task_observer.observe(
false);
558 oneapi::tbb::finalize(m_task_scheduler_handle);
563 void notifyThreadCreated(
bool is_worker)
565 std::thread::id my_thread_id = std::this_thread::get_id();
572 if (m_constructed_thread_map.contains(my_thread_id))
574 m_constructed_thread_map.insert(my_thread_id);
578 std::ostringstream ostr;
579 ostr <<
"TBB: CREATE THREAD"
580 <<
" nb_allowed=" << m_nb_allowed_thread
581 <<
" tbb_default_allowed=" << tbb::info::default_concurrency()
582 <<
" id=" << my_thread_id
583 <<
" arena_id=" << _currentTaskTreadIndex()
584 <<
" is_worker=" << is_worker
586 std::cout << ostr.str();
588 TaskFactoryInternal::notifyThreadCreated();
592 void notifyThreadDestroyed([[maybe_unused]]
bool is_worker)
602#if TBB_VERSION_MAJOR > 2021 || (TBB_VERSION_MAJOR == 2021 && TBB_VERSION_MINOR > 5)
603 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = oneapi::tbb::attach();
605 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = tbb::task_scheduler_handle::get();
610 tbb::task_arena m_main_arena;
616 TaskObserver m_task_observer;
617 std::mutex m_thread_created_mutex;
618 std::vector<TaskThreadInfo> m_thread_task_infos;
619 tbb::concurrent_set<std::thread::id> m_constructed_thread_map;
622 ConcurrencyBase::_setMaxAllowedThread(m_nb_allowed_thread);
625 std::cout <<
"TBB: TBBTaskImplementationInit nb_allowed_thread=" << m_nb_allowed_thread
626 <<
" id=" << std::this_thread::get_id()
627 <<
" version=" << TBB_VERSION_MAJOR <<
"." << TBB_VERSION_MINOR
630 m_thread_task_infos.resize(m_nb_allowed_thread);
631 m_task_observer.observe(
true);
632 Integer max_arena_size = m_nb_allowed_thread;
635 if (max_arena_size > 512)
636 max_arena_size = 512;
637 if (max_arena_size < 2)
641 for (
Integer i = 2; i < max_arena_size; ++i)
658 , m_stat_info(stat_info)
659 , m_nb_allowed_thread(nb_allowed_thread)
664 void operator()(tbb::blocked_range<Integer>& range)
const
668 std::ostringstream o;
670 <<
" id=" << std::this_thread::get_id()
671 <<
" max_allowed=" << m_nb_allowed_thread
672 <<
" range_begin=" << range.begin() <<
" range_size=" << range.size()
674 std::cout << o.str();
678 int tbb_index = _currentTaskTreadIndex();
679 if (tbb_index < 0 || tbb_index >= m_nb_allowed_thread)
680 ARCCORE_FATAL(
"Invalid index for thread idx={0} valid_interval=[0..{1}[",
681 tbb_index, m_nb_allowed_thread);
685 m_stat_info->incrementNbChunk();
686 m_functor->executeFunctor(range.begin(), CheckedConvert::toInteger(range.size()));
693 Int32 m_nb_allowed_thread;
702template <
int RankValue>
703class TBBMDParallelFor
709 , m_stat_info(stat_info)
710 , m_nb_allowed_thread(nb_allowed_thread)
715 void operator()(blocked_nd_range<Int32, RankValue>& range)
const
719 std::ostringstream o;
721 <<
" id=" << std::this_thread::get_id()
722 <<
" max_allowed=" << m_nb_allowed_thread
724 for (
Int32 i = 0; i < RankValue; ++i) {
725 auto r0 =
static_cast<Int32>(range.dim(i).begin());
726 auto r1 =
static_cast<Int32>(range.dim(i).size());
727 o <<
" range" << i <<
" (begin=" << r0 <<
" size=" << r1 <<
")";
730 std::cout << o.str();
734 int tbb_index = _currentTaskTreadIndex();
735 if (tbb_index < 0 || tbb_index >= m_nb_allowed_thread)
736 ARCCORE_FATAL(
"Invalid index for thread idx={0} valid_interval=[0..{1}[",
737 tbb_index, m_nb_allowed_thread);
741 m_stat_info->incrementNbChunk();
742 m_functor->executeFunctor(_fromTBBRange(range));
749 Int32 m_nb_allowed_thread;
773class TBBDeterministicParallelFor
781 , m_nb_thread(nb_thread)
782 , m_begin_index(begin_index)
784 , m_grain_size(grain_size)
787 , m_nb_block_per_thread(0)
792 if (m_grain_size > 0) {
793 m_block_size = m_grain_size;
794 if (m_block_size > 0) {
795 m_nb_block = m_size / m_block_size;
796 if ((m_size % m_block_size) != 0)
801 m_nb_block_per_thread = m_nb_block / m_nb_thread;
802 if ((m_nb_block % m_nb_thread) != 0)
803 ++m_nb_block_per_thread;
807 m_nb_block = m_nb_thread;
808 m_block_size = m_size / m_nb_block;
809 m_nb_block_per_thread = 1;
812 std::cout <<
"TBBDeterministicParallelFor: BEGIN=" << m_begin_index <<
" size=" << m_size
813 <<
" grain_size=" << m_grain_size
814 <<
" nb_block=" << m_nb_block <<
" nb_thread=" << m_nb_thread
815 <<
" nb_block_per_thread=" << m_nb_block_per_thread
816 <<
" block_size=" << m_block_size
817 <<
" block_size*nb_block=" << m_block_size * m_nb_block <<
'\n';
831 auto nb_iter =
static_cast<Integer>(range.size());
832 for (
Integer i = 0; i < nb_iter; ++i) {
833 Integer task_id = range.begin() + i;
834 for (
Integer k = 0, kn = m_nb_block_per_thread; k < kn; ++k) {
835 Integer block_id = task_id + (k * m_nb_thread);
836 if (block_id < m_nb_block)
837 _doBlock(task_id, block_id);
846 Integer iter_begin = block_id * m_block_size;
847 Integer iter_size = m_block_size;
848 if ((block_id + 1) == m_nb_block) {
850 iter_size = m_size - iter_begin;
852 iter_begin += m_begin_index;
854 if (TaskFactory::verboseLevel() >= 3) {
855 std::ostringstream o;
856 o <<
"TBB: DoBlock: BLOCK task_id=" << task_id <<
" block_id=" << block_id
857 <<
" iter_begin=" << iter_begin <<
" iter_size=" << iter_size <<
'\n';
858 std::cout << o.str();
863 auto r = tbb::blocked_range<int>(iter_begin, iter_begin + iter_size);
870 TBBTaskImplementation* m_impl;
871 const TBBParallelFor& m_tbb_for;
895 , m_stat_info(stat_info)
900 void operator()()
const
902 Integer nb_thread = m_options.maxThread();
904 Integer gsize = m_options.grainSize();
905 tbb::blocked_range<Integer> range(m_begin, m_begin + m_size);
907 std::cout <<
"TBB: TBBTaskImplementationInit ParallelForExecute begin=" << m_begin
908 <<
" size=" << m_size <<
" gsize=" << gsize
909 <<
" partitioner=" << (int)m_options.partitioner()
910 <<
" nb_thread=" << nb_thread
911 <<
" has_stat_info=" << (m_stat_info !=
nullptr)
915 range = tbb::blocked_range<Integer>(m_begin, m_begin + m_size, gsize);
918 tbb::parallel_for(range, pf, tbb::static_partitioner());
921 tbb::blocked_range<Integer> range2(0, nb_thread, 1);
923 tbb::parallel_for(range2, dpf);
926 tbb::parallel_for(range, pf);
931 TBBTaskImplementation* m_impl =
nullptr;
942template <
int RankValue>
943class TBBTaskImplementation::MDParallelForExecute
947 MDParallelForExecute(TBBTaskImplementation* impl,
952 , m_tbb_range(_toTBBRange(range))
955 , m_stat_info(stat_info)
960 Int32 gsize = m_options.grainSize();
966 constexpr bool is_verbose =
false;
967 std::array<Int32, RankValue> range_extents = range.extents().asStdArray();
968 double ratio =
static_cast<double>(gsize) /
static_cast<double>(range.nbElement());
969 if constexpr (is_verbose) {
970 std::cout <<
"GSIZE=" << gsize <<
" rank=" << RankValue <<
" ratio=" << ratio;
971 for (
Int32 i = 0; i < RankValue; ++i)
972 std::cout <<
" range" << i <<
"=" << range_extents[i];
975 Int32 index = RankValue - 1;
976 Int32 remaining_grain = gsize;
977 for (; index >= 0; --index) {
978 Int32 current = range_extents[index];
979 if constexpr (is_verbose)
980 std::cout <<
"Check index=" << index <<
" remaining=" << remaining_grain <<
" current=" << current <<
"\n";
981 if (remaining_grain > current) {
982 all_grain_sizes[index] = current;
983 remaining_grain /= current;
986 all_grain_sizes[index] = remaining_grain;
990 for (Int32 i = 0; i < index; ++i)
991 all_grain_sizes[i] = 1;
992 if constexpr (is_verbose) {
993 for (Int32 i = 0; i < RankValue; ++i)
994 std::cout <<
" grain" << i <<
"=" << all_grain_sizes[i];
997 m_tbb_range = _toTBBRangeWithGrain(m_tbb_range, all_grain_sizes);
1003 void operator()()
const
1005 Integer nb_thread = m_options.maxThread();
1006 TBBMDParallelFor<RankValue> pf(m_functor, nb_thread, m_stat_info);
1008 if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Static) {
1009 tbb::parallel_for(m_tbb_range, pf, tbb::static_partitioner());
1011 else if (m_options.partitioner() == ParallelLoopOptions::Partitioner::Deterministic) {
1013 ARCCORE_THROW(NotImplementedException,
"ParallelLoopOptions::Partitioner::Deterministic for multi-dimensionnal loops");
1019 tbb::parallel_for(m_tbb_range, pf);
1025 TBBTaskImplementation* m_impl =
nullptr;
1026 blocked_nd_range<Int32, RankValue> m_tbb_range;
1027 IMDRangeFunctor<RankValue>* m_functor =
nullptr;
1028 ParallelLoopOptions m_options;
1029 ForLoopOneExecStat* m_stat_info =
nullptr;
1035TBBTaskImplementation::
1036~TBBTaskImplementation()
1044void TBBTaskImplementation::
1045initialize(
Int32 nb_thread)
1049 m_is_active = (nb_thread != 1);
1051 m_p =
new Impl(nb_thread);
1062void TBBTaskImplementation::
1074 o <<
"OneTBBTaskImplementation"
1075 <<
" version=" << TBB_VERSION_STRING
1076 <<
" interface=" << TBB_INTERFACE_VERSION
1077 <<
" runtime_interface=" << TBB_runtime_interface_version();
1083void TBBTaskImplementation::
1086 ScopedExecInfo sei(loop_info.runInfo());
1090 Int32 begin = loop_info.beginIndex();
1091 Int32 size = loop_info.size();
1097 Integer nb_allowed_thread = m_p->nbAllowedThread();
1099 max_thread = nb_allowed_thread;
1102 std::cout <<
"TBB: TBBTaskImplementation executeParallelFor begin=" << begin
1103 <<
" size=" << size <<
" max_thread=" << max_thread
1104 <<
" grain_size=" << options.
grainSize()
1105 <<
" nb_allowed=" << nb_allowed_thread <<
'\n';
1108 if (max_thread == 1 || max_thread == 0) {
1114 ParallelLoopOptions true_options(options);
1116 true_options.setMaxThread(max_thread);
1118 ParallelForExecute pfe(
this, true_options, begin, size, f, stat_info);
1120 tbb::task_arena* used_arena =
nullptr;
1121 if (max_thread < nb_allowed_thread && max_thread < m_p->m_sub_arena_list.size())
1124 used_arena = &(m_p->m_main_arena);
1125 used_arena->execute(pfe);
1134 _executeParallelFor(loop_info);
1146template <
int RankValue>
void TBBTaskImplementation::
1152 if (run_info.options().has_value())
1153 options = run_info.options().value();
1155 ScopedExecInfo sei(run_info);
1160 std::cout <<
"TBB: TBBTaskImplementation executeMDParallelFor nb_dim=" << RankValue
1161 <<
" nb_element=" << loop_ranges.nbElement()
1162 <<
" grain_size=" << options.
grainSize()
1163 <<
" name=" << run_info.traceInfo().traceInfo()
1164 <<
" has_stat_info=" << (stat_info !=
nullptr)
1170 if (max_thread == 1 || max_thread == 0) {
1176 ParallelLoopOptions true_options(options);
1179 Integer nb_allowed_thread = m_p->nbAllowedThread();
1181 max_thread = nb_allowed_thread;
1182 tbb::task_arena* used_arena =
nullptr;
1183 if (max_thread < nb_allowed_thread)
1184 used_arena = m_p->m_sub_arena_list[max_thread];
1186 used_arena = &(m_p->m_main_arena);
1189 if constexpr (RankValue == 1) {
1190 auto range_1d = _toTBBRange(loop_ranges);
1195 LambdaRangeFunctorT<
decltype(x1)> functor_1d(x1);
1196 Integer begin1 = CheckedConvert::toInteger(range_1d.dim(0).begin());
1197 Integer size1 = CheckedConvert::toInteger(range_1d.dim(0).size());
1199 used_arena->execute(pfe);
1202 MDParallelForExecute<RankValue> pfe(
this, true_options, loop_ranges, functor, stat_info);
1203 used_arena->execute(pfe);
1236 return m_p->threadTaskInfo(thread_id);
1249 if (thread_id < 0 || thread_id >= m_p->nbAllowedThread())
1253 Int32 task_index = tti->taskIndex();
1254 if (task_index >= 0)
1266 tbb::task_group task_group;
1267 task_group.run(taskFunctor());
1278 tbb::task_group task_group;
1284 for (
Integer i = 0; i < n; ++i) {
1285 auto* t =
static_cast<OneTBBTask*
>(tasks[i]);
1286 task_group.run(t->taskFunctor());
1289 for (
Integer i = 0; i < n; ++i) {
1290 auto* t =
static_cast<OneTBBTask*
>(tasks[i]);
1308ARCANE_DI_REGISTER_PROVIDER(TBBTaskImplementation,
1309 DependencyInjection::ProviderProperty(
"TBBTaskImplementation"),
1310 ARCANE_DI_INTERFACES(ITaskImplementation),
1311 ARCANE_DI_EMPTY_CONSTRUCTOR());
#define ARCCORE_FATAL(...)
Macro throwing a FatalErrorException.
#define ARCCORE_THROW(exception_class,...)
Macro to throw an exception with formatting.
#define ARCCORE_CHECK_POINTER(ptr)
Macro that returns the pointer ptr if it is not null or throws an exception if it is null.
Constant view of an array of type T.
constexpr Integer size() const noexcept
Number of elements in the array.
Class to manage the profiling of a single loop execution.
Loop execution information.
Interface of a functor on a multi-dimensional iteration interval of dimension RankValue.
virtual void executeFunctor(const ComplexForLoopRanges< RankValue > &loop_range)=0
Executes the associated method.
Interface of a functor on an iteration interval.
virtual void executeFunctor(Int32 begin, Int32 size)=0
Executes the associated method.
virtual void executeFunctor(const TaskContext &tc)=0
Executes the associated method.
Int32 nbAllowedThread() const
Maximum number of threads used to manage tasks.
Interface for a concurrent task.
Class allowing retrieval of the time spent between the constructor call and the destructor call.
void launchAndWait() override
Launches the task and blocks until it finishes.
Characteristics of a multi-thread 1D loop.
Execution options for a parallel loop in multi-threading.
Integer grainSize() const
Size of an iteration interval.
Int32 maxThread() const
Maximum number of allowed threads.
void setGrainSize(Integer v)
Sets the size (approximate) of an iteration interval.
@ Static
Uses static partitioning.
@ Deterministic
Uses static partitioning and scheduling.
static bool hasProfiling()
Indicates if profiling is active.
Deterministic implementation of ParallelFor.
void operator()(tbb::blocked_range< Integer > &range) const
Operator for a given thread.
std::vector< tbb::task_arena * > m_sub_arena_list
Array whose i-th element contains the tbb::task_arena for i thread.
Class for positioning TaskThreadInfo::taskIndex().
Int32 currentTaskThreadIndex() const final
Implementation of TaskFactory::currentTaskThreadIndex().
void executeParallelFor(const ComplexForLoopRanges< 1 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 1 > *functor) final
Executes a 1D loop in parallel.
void executeParallelFor(Int32 begin, Int32 size, IRangeFunctor *f) final
Executes the functor f in parallel.
ITask * createRootTask(ITaskFunctor *f) override
Creates a root task. The implementation must copy the value of f, which is either a TaskFunctor or a ...
void printInfos(std::ostream &o) const final
Prints information about the runtime used.
void executeParallelFor(Int32 begin, Int32 size, const ParallelLoopOptions &options, IRangeFunctor *f) final
Executes the functor f in parallel.
void executeParallelFor(const ComplexForLoopRanges< 3 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 3 > *functor) final
Executes a 3D loop in parallel.
void executeParallelFor(const ComplexForLoopRanges< 4 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 4 > *functor) final
Executes a 4D loop in parallel.
TaskThreadInfo * currentTaskThreadInfo() const
Instance of TaskThreadInfo associated with the current thread.
bool isActive() const final
Indicates if the implementation is active.
Int32 currentTaskIndex() const final
Implementation of TaskFactory::currentTaskIndex().
void executeParallelFor(const ComplexForLoopRanges< 2 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 2 > *functor) final
Executes a 2D loop in parallel.
Execution context of a task.
static const ParallelLoopOptions & defaultParallelLoopOptions()
Default parallel loop execution options.
static Integer verboseLevel()
Verbosity level.
static void setDefaultParallelLoopOptions(const ParallelLoopOptions &v)
Sets the default parallel loop execution options.
static Int32 currentTaskThreadIndex()
Index (between 0 and nbAllowedThread()-1) of the thread executing the current task.
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --
Int32 Integer
Type representing an integer.
SimpleForLoopRanges< 1 > makeLoopRanges(Int32 n1)
Creates an iteration range [0,n1[, [0,n2[.
std::int32_t Int32
Signed integer type of 32 bits.