14#include "arcane/utils/IThreadImplementation.h"
15#include "arcane/utils/NotImplementedException.h"
16#include "arcane/utils/IFunctor.h"
17#include "arcane/utils/CheckedConvert.h"
18#include "arcane/utils/ForLoopRanges.h"
20#include "arcane/utils/IObservable.h"
21#include "arcane/utils/PlatformUtils.h"
22#include "arcane/utils/Profiling.h"
23#include "arcane/utils/MemoryAllocator.h"
24#include "arcane/utils/FixedArray.h"
25#include "arcane/utils/internal/TaskFactoryInternal.h"
26#include "arcane/utils/internal/DependencyInjection.h"
28#include "arcane/core/FactoryService.h"
35#define TBB_PREVIEW_BLOCKED_RANGE_ND 1
42#ifdef ARCANE_USE_ONETBB
45#define TBB_PREVIEW_WAITING_FOR_WORKERS 1
47#include <oneapi/tbb/concurrent_set.h>
48#include <oneapi/tbb/global_control.h>
55#if __has_include(<tbb/blocked_rangeNd.h>)
56#include <tbb/blocked_rangeNd.h>
70#if (TBB_INTERFACE_VERSION < 10003)
76# define ARCANE_STR_HELPER(x) #x
77# define ARCANE_STR(x) ARCANE_STR_HELPER(x)
78# pragma message "Your version of TBB is : " ARCANE_STR(TBB_VERSION_MAJOR) "." ARCANE_STR(TBB_VERSION_MINOR)
80# error "Your version of TBB is too old. TBB 2018.3+ is required. Please disable TBB in configuration using -DCMAKE_DISABLE_FIND_PACKAGE_TBB=TRUE"
90#if (TBB_VERSION_MAJOR > 2022) || (TBB_VERSION_MAJOR == 2022 && TBB_VERSION_MINOR > 0) || defined __TBB_blocked_nd_range_H
94template <
typename Value,
unsigned int N>
95using blocked_nd_range = tbb::blocked_nd_range<Value, N>;
99template <
typename Value,
unsigned int N>
100using blocked_nd_range = tbb::blocked_rangeNd<Value, N>;
138 explicit ScopedExecInfo(
const ForLoopRunInfo& run_info)
139 : m_run_info(run_info)
145 ForLoopOneExecStat* ptr = run_info.execStat();
147 m_stat_info_ptr = ptr;
148 m_use_own_run_info =
false;
151 m_stat_info_ptr = isStatActive() ? &m_stat_info :
nullptr;
155#ifdef PRINT_STAT_INFO
156 if (m_stat_info_ptr){
157 bool is_valid = m_run_info.traceInfo().isValid();
159 std::cout <<
"ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
163 std::cout <<
"ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
164 <<
" trace_name=" << m_run_info.traceInfo().traceInfo().name() <<
"\n";
167 if (m_stat_info_ptr && m_use_own_run_info){
174 ForLoopOneExecStat* statInfo()
const {
return m_stat_info_ptr; }
175 bool isOwn()
const {
return m_use_own_run_info; }
178 ForLoopOneExecStat m_stat_info;
179 ForLoopOneExecStat* m_stat_info_ptr =
nullptr;
180 ForLoopRunInfo m_run_info;
182 bool m_use_own_run_info =
true;
188inline int _currentTaskTreadIndex()
194 return tbb::this_task_arena::current_thread_index();
197inline blocked_nd_range<Int32, 1>
200 return {{r.lowerBound<0>(), r.upperBound<0>()}};
203inline blocked_nd_range<Int32, 2>
206 return {{r.lowerBound<0>(), r.upperBound<0>()},
207 {r.lowerBound<1>(), r.upperBound<1>()}};
211inline blocked_nd_range<Int32, 3>
214 return {{r.lowerBound<0>(), r.upperBound<0>()},
215 {r.lowerBound<1>(), r.upperBound<1>()},
216 {r.lowerBound<2>(), r.upperBound<2>()}};
219inline blocked_nd_range<Int32, 4>
222 return {{r.lowerBound<0>(), r.upperBound<0>()},
223 {r.lowerBound<1>(), r.upperBound<1>()},
224 {r.lowerBound<2>(), r.upperBound<2>()},
225 {r.lowerBound<3>(), r.upperBound<3>()}};
231inline blocked_nd_range<Int32, 2>
234 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
235 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]}};
238inline blocked_nd_range<Int32, 3>
241 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
242 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]},
243 {r.dim(2).begin(), r.dim(2).end(), grain_sizes[2]}};
246inline blocked_nd_range<Int32, 4>
249 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
250 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]},
251 {r.dim(2).begin(), r.dim(2).end(), grain_sizes[2]},
252 {r.dim(3).begin(), r.dim(3).end(), grain_sizes[3]}};
259_fromTBBRange(
const blocked_nd_range<Int32, 2>& r)
262 using ArrayExtentType =
typename BoundsType::ArrayExtentType;
264 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin()));
265 auto s0 =
static_cast<Int32>(r.dim(0).size());
266 auto s1 =
static_cast<Int32>(r.dim(1).size());
267 BoundsType sizes(ArrayExtentType(s0,s1));
268 return { lower_bounds, sizes };
272_fromTBBRange(
const blocked_nd_range<Int32, 3>& r)
275 using ArrayExtentType =
typename BoundsType::ArrayExtentType;
277 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin(),r.dim(2).begin()));
278 auto s0 =
static_cast<Int32>(r.dim(0).size());
279 auto s1 =
static_cast<Int32>(r.dim(1).size());
280 auto s2 =
static_cast<Int32>(r.dim(2).size());
281 BoundsType sizes(ArrayExtentType(s0,s1,s2));
282 return { lower_bounds, sizes };
286_fromTBBRange(
const blocked_nd_range<Int32, 4>& r)
289 using ArrayExtentType =
typename BoundsType::ArrayExtentType;
291 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin(),r.dim(2).begin(),r.dim(3).begin()));
292 auto s0 =
static_cast<Int32>(r.dim(0).size());
293 auto s1 =
static_cast<Int32>(r.dim(1).size());
294 auto s2 =
static_cast<Int32>(r.dim(2).size());
295 auto s3 =
static_cast<Int32>(r.dim(3).size());
296 BoundsType sizes(ArrayExtentType(s0,s1,s2,s3));
297 return { lower_bounds, sizes };
305#ifdef ARCANE_USE_ONETBB
310class OneTBBTaskFunctor
313 OneTBBTaskFunctor(ITaskFunctor* functor,ITask* task)
314 : m_functor(functor), m_task(task) {}
316 void operator()()
const
319 ITaskFunctor* tf = m_functor;
321 TaskContext task_context(m_task);
323 tf->executeFunctor(task_context);
327 mutable ITaskFunctor* m_functor;
338 static const int FUNCTOR_CLASS_SIZE = 32;
340 OneTBBTask(ITaskFunctor* f)
343 m_functor = f->clone(functor_buf,FUNCTOR_CLASS_SIZE);
346 OneTBBTaskFunctor taskFunctor() {
return OneTBBTaskFunctor(m_functor,
this); }
347 void launchAndWait()
override;
348 void launchAndWait(ConstArrayView<ITask*> tasks)
override;
350 virtual ITask* _createChildTask(ITaskFunctor* functor)
override;
352 ITaskFunctor* m_functor;
353 char functor_buf[FUNCTOR_CLASS_SIZE];
355using TBBTask = OneTBBTask;
370 static const int FUNCTOR_CLASS_SIZE = 32;
375 m_functor = f->clone(functor_buf,FUNCTOR_CLASS_SIZE);
378 tbb::task* execute()
override
395 char functor_buf[FUNCTOR_CLASS_SIZE];
411class TBBTaskImplementation
416 template<
int RankValue>
425 TaskThreadInfo() : m_task_index(-1){}
427 void setTaskIndex(
Integer v) { m_task_index = v; }
428 Integer taskIndex()
const {
return m_task_index; }
439 class TaskInfoLockGuard
443 : m_tti(tti), m_old_task_index(-1)
446 m_old_task_index = tti->taskIndex();
447 tti->setTaskIndex(task_index);
453 m_tti->setTaskIndex(m_old_task_index);
473#ifdef ARCANE_USE_ONETBB
474 OneTBBTask* t =
new OneTBBTask(f);
542 bool m_is_active = false;
547 template<
int RankValue>
void
557class TBBTaskImplementation::Impl
560 :
public tbb::task_scheduler_observer
565#ifdef ARCANE_USE_ONETBB
566 tbb::task_scheduler_observer(p->m_main_arena),
571 void on_scheduler_entry(
bool is_worker)
override
573 m_p->notifyThreadCreated(is_worker);
575 void on_scheduler_exit(
bool is_worker)
override
577 m_p->notifyThreadDestroyed(is_worker);
584 m_task_observer(this),
587#ifdef ARCANE_USE_ONETBB
588 m_nb_allowed_thread = tbb::info::default_concurrency();
590 m_nb_allowed_thread = tbb::task_scheduler_init::default_num_threads();
596#ifndef ARCANE_USE_ONETBB
597 m_scheduler_init(nb_thread),
599 m_main_arena(nb_thread),
600 m_task_observer(this),
603 m_nb_allowed_thread = nb_thread;
608 TaskThreadInfo* threadTaskInfo(
Integer index) {
return &m_thread_task_infos[index]; }
610 Int32 m_nb_allowed_thread;
615 for(
auto x : m_sub_arena_list ){
620 m_sub_arena_list.clear();
621 m_main_arena.terminate();
622#ifdef ARCANE_USE_ONETBB
623 m_task_observer.observe(
false);
624 oneapi::tbb::finalize(m_task_scheduler_handle);
626 m_scheduler_init.terminate();
627 m_task_observer.observe(
false);
631 void notifyThreadCreated(
bool is_worker)
633 std::thread::id my_thread_id = std::this_thread::get_id();
635#ifdef ARCANE_USE_ONETBB
642 if (m_constructed_thread_map.contains(my_thread_id))
644 m_constructed_thread_map.insert(my_thread_id);
649 std::ostringstream ostr;
650 ostr <<
"TBB: CREATE THREAD"
651 <<
" nb_allowed=" << m_nb_allowed_thread
652#ifdef ARCANE_USE_ONETBB
653 <<
" tbb_default_allowed=" << tbb::info::default_concurrency()
655 <<
" tbb_default_allowed=" << tbb::task_scheduler_init::default_num_threads()
657 <<
" id=" << my_thread_id
658 <<
" arena_id=" << _currentTaskTreadIndex()
659 <<
" is_worker=" << is_worker
661 std::cout << ostr.str();
667 void notifyThreadDestroyed([[maybe_unused]]
bool is_worker)
669#ifdef ARCANE_USE_ONETBB
679 std::scoped_lock sl(m_thread_created_mutex);
681 std::cout <<
"TBB: DESTROY THREAD"
682 <<
" id=" << std::this_thread::get_id()
683 <<
" arena_id=" << _currentTaskTreadIndex()
684 <<
" is_worker=" << is_worker
692#ifdef ARCANE_USE_ONETBB
693#if TBB_VERSION_MAJOR>2021 || (TBB_VERSION_MAJOR==2021 && TBB_VERSION_MINOR>5)
694 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = oneapi::tbb::attach();
696 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = tbb::task_scheduler_handle::get();
699 tbb::task_scheduler_init m_scheduler_init;
702 tbb::task_arena m_main_arena;
707 std::mutex m_thread_created_mutex;
709#ifdef ARCANE_USE_ONETBB
710 tbb::concurrent_set<std::thread::id> m_constructed_thread_map;
715 std::cout <<
"TBB: TBBTaskImplementationInit nb_allowed_thread=" << m_nb_allowed_thread
716 <<
" id=" << std::this_thread::get_id()
717 <<
" version=" << TBB_VERSION_MAJOR <<
"." << TBB_VERSION_MINOR
720 m_thread_task_infos.
resize(m_nb_allowed_thread);
721 m_task_observer.observe(
true);
722 Integer max_arena_size = m_nb_allowed_thread;
725 if (max_arena_size>512)
726 max_arena_size = 512;
727 if (max_arena_size<2)
731 for(
Integer i=2; i<max_arena_size; ++i )
745 : m_functor(f), m_stat_info(stat_info), m_nb_allowed_thread(nb_allowed_thread){}
748 void operator()(tbb::blocked_range<Integer>& range)
const
752 std::ostringstream o;
754 <<
" id=" << std::this_thread::get_id()
755 <<
" max_allowed=" << m_nb_allowed_thread
756 <<
" range_begin=" << range.begin() <<
" range_size=" << range.size()
758 std::cout << o.str();
762 int tbb_index = _currentTaskTreadIndex();
763 if (tbb_index<0 || tbb_index>=m_nb_allowed_thread)
764 ARCANE_FATAL(
"Invalid index for thread idx={0} valid_interval=[0..{1}[",
765 tbb_index,m_nb_allowed_thread);
769 m_stat_info->incrementNbChunk();
776 Int32 m_nb_allowed_thread;
784template<
int RankValue>
785class TBBMDParallelFor
790 : m_functor(f), m_stat_info(stat_info), m_nb_allowed_thread(nb_allowed_thread){}
794 void operator()(blocked_nd_range<Int32, RankValue>& range)
const
798 std::ostringstream o;
800 <<
" id=" << std::this_thread::get_id()
801 <<
" max_allowed=" << m_nb_allowed_thread
803 for(
Int32 i=0; i<RankValue; ++i ){
804 Int32 r0 =
static_cast<Int32>(range.dim(i).begin());
805 Int32 r1 =
static_cast<Int32>(range.dim(i).size());
806 o <<
" range" << i <<
" (begin=" << r0 <<
" size=" << r1 <<
")";
809 std::cout << o.str();
813 int tbb_index = _currentTaskTreadIndex();
814 if (tbb_index<0 || tbb_index>=m_nb_allowed_thread)
815 ARCANE_FATAL(
"Invalid index for thread idx={0} valid_interval=[0..{1}[",
816 tbb_index,m_nb_allowed_thread);
820 m_stat_info->incrementNbChunk();
821 m_functor->executeFunctor(_fromTBBRange(range));
828 Int32 m_nb_allowed_thread;
850class TBBDeterministicParallelFor
855 : m_impl(impl), m_tbb_for(tbb_for), m_nb_thread(nb_thread), m_begin_index(begin_index), m_size(size),
856 m_grain_size(grain_size), m_nb_block(0), m_block_size(0), m_nb_block_per_thread(0)
862 m_block_size = m_grain_size;
864 m_nb_block = m_size / m_block_size;
865 if ((m_size % m_block_size)!=0)
870 m_nb_block_per_thread = m_nb_block / m_nb_thread;
871 if ((m_nb_block % m_nb_thread) != 0)
872 ++m_nb_block_per_thread;
876 m_nb_block = m_nb_thread;
877 m_block_size = m_size / m_nb_block;
878 m_nb_block_per_thread = 1;
881 std::cout <<
"TBBDeterministicParallelFor: BEGIN=" << m_begin_index <<
" size=" << m_size
882 <<
" grain_size=" << m_grain_size
883 <<
" nb_block=" << m_nb_block <<
" nb_thread=" << m_nb_thread
884 <<
" nb_block_per_thread=" << m_nb_block_per_thread
885 <<
" block_size=" << m_block_size
886 <<
" block_size*nb_block=" << m_block_size*m_nb_block <<
'\n';
900 for(
Integer i=0; i<nb_iter; ++i ){
901 Integer task_id = range.begin() + i;
902 for (
Integer k=0, kn=m_nb_block_per_thread; k<kn; ++k ){
903 Integer block_id = task_id + (k * m_nb_thread);
904 if (block_id<m_nb_block)
905 _doBlock(task_id,block_id);
914 Integer iter_begin = block_id * m_block_size;
915 Integer iter_size = m_block_size;
916 if ((block_id+1)==m_nb_block){
918 iter_size = m_size - iter_begin;
920 iter_begin += m_begin_index;
922 if (TaskFactory::verboseLevel()>=3){
923 std::ostringstream o;
924 o <<
"TBB: DoBlock: BLOCK task_id=" << task_id <<
" block_id=" << block_id
925 <<
" iter_begin=" << iter_begin <<
" iter_size=" << iter_size <<
'\n';
926 std::cout << o.str();
931 auto r = tbb::blocked_range<int>(iter_begin,iter_begin + iter_size);
938 TBBTaskImplementation* m_impl;
939 const TBBParallelFor& m_tbb_for;
958 : m_impl(impl), m_begin(begin), m_size(size), m_functor(f), m_options(options), m_stat_info(stat_info){}
962 void operator()()
const
964 Integer nb_thread = m_options.maxThread();
966 Integer gsize = m_options.grainSize();
967 tbb::blocked_range<Integer> range(m_begin,m_begin+m_size);
969 std::cout <<
"TBB: TBBTaskImplementationInit ParallelForExecute begin=" << m_begin
970 <<
" size=" << m_size <<
" gsize=" << gsize
971 <<
" partitioner=" << (int)m_options.partitioner()
972 <<
" nb_thread=" << nb_thread
973 <<
" has_stat_info=" << (m_stat_info!=
nullptr)
977 range = tbb::blocked_range<Integer>(m_begin,m_begin+m_size,gsize);
980 tbb::parallel_for(range,pf,tbb::static_partitioner());
983 tbb::blocked_range<Integer> range2(0,nb_thread,1);
985 tbb::parallel_for(range2,dpf);
988 tbb::parallel_for(range,pf);
991 TBBTaskImplementation* m_impl =
nullptr;
1002template<
int RankValue>
1007 MDParallelForExecute(TBBTaskImplementation* impl,
1012 , m_tbb_range(_toTBBRange(range))
1014 , m_options(options)
1015 , m_stat_info(stat_info)
1020 Int32 gsize = m_options.grainSize();
1027 constexpr bool is_verbose =
false;
1028 std::array<Int32,RankValue> range_extents = range.extents().asStdArray();
1029 double ratio =
static_cast<double>(gsize) /
static_cast<double>(range.nbElement());
1030 if constexpr (is_verbose){
1031 std::cout <<
"GSIZE=" << gsize <<
" rank=" << RankValue <<
" ratio=" << ratio;
1032 for(
Int32 i=0; i<RankValue; ++i )
1033 std::cout <<
" range" << i <<
"=" << range_extents[i];
1036 Int32 index = RankValue - 1;
1037 Int32 remaining_grain = gsize;
1038 for( ; index>=0; --index ){
1039 Int32 current = range_extents[index];
1040 if constexpr (is_verbose)
1041 std::cout <<
"Check index=" << index <<
" remaining=" << remaining_grain <<
" current=" << current <<
"\n";
1042 if (remaining_grain>current){
1043 all_grain_sizes[index] = current;
1044 remaining_grain /= current;
1047 all_grain_sizes[index] = remaining_grain;
1051 for(
Int32 i=0; i<index; ++i )
1052 all_grain_sizes[i] = 1;
1053 if constexpr (is_verbose){
1054 for(
Int32 i=0; i<RankValue; ++i )
1055 std::cout <<
" grain" << i <<
"=" << all_grain_sizes[i];
1058 m_tbb_range = _toTBBRangeWithGrain(m_tbb_range,all_grain_sizes);
1064 void operator()()
const
1066 Integer nb_thread = m_options.maxThread();
1070 tbb::parallel_for(m_tbb_range,pf,tbb::static_partitioner());
1080 tbb::parallel_for(m_tbb_range,pf);
1084 TBBTaskImplementation* m_impl =
nullptr;
1085 blocked_nd_range<Int32, RankValue> m_tbb_range;
1094TBBTaskImplementation::
1095~TBBTaskImplementation()
1108 m_is_active = (nb_thread!=1);
1110 m_p =
new Impl(nb_thread);
1133 return m_p->nbAllowedThread();
1142#ifdef ARCANE_USE_ONETBB
1143 o <<
"OneTBBTaskImplementation"
1144 <<
" version=" << TBB_VERSION_STRING
1145 <<
" interface=" << TBB_INTERFACE_VERSION
1146 <<
" runtime_interface=" << TBB_runtime_interface_version();
1148 o <<
"TBBTaskImplementation"
1149 <<
" version=" << TBB_VERSION_MAJOR <<
"." << TBB_VERSION_MINOR
1150 <<
" interface=" << TBB_INTERFACE_VERSION;
1157void TBBTaskImplementation::
1160 ScopedExecInfo sei(loop_info.runInfo());
1164 Int32 begin = loop_info.beginIndex();
1165 Int32 size = loop_info.size();
1170 Integer nb_allowed_thread = m_p->nbAllowedThread();
1172 max_thread = nb_allowed_thread;
1175 std::cout <<
"TBB: TBBTaskImplementation executeParallelFor begin=" << begin
1176 <<
" size=" << size <<
" max_thread=" << max_thread
1177 <<
" grain_size=" << options.
grainSize()
1178 <<
" nb_allowed=" << nb_allowed_thread <<
'\n';
1181 if (max_thread==1 || max_thread==0){
1187 ParallelLoopOptions true_options(options);
1189 true_options.setMaxThread(max_thread);
1191 ParallelForExecute pfe(
this,true_options,begin,size,f,stat_info);
1193 tbb::task_arena* used_arena =
nullptr;
1194 if (max_thread<nb_allowed_thread && max_thread<m_p->m_sub_arena_list.size())
1197 used_arena = &(m_p->m_main_arena);
1198 used_arena->execute(pfe);
1207 _executeParallelFor(loop_info);
1224 if (run_info.options().has_value())
1225 options = run_info.options().value();
1227 ScopedExecInfo sei(run_info);
1232 std::cout <<
"TBB: TBBTaskImplementation executeMDParallelFor nb_dim=" << RankValue
1233 <<
" nb_element=" << loop_ranges.nbElement()
1234 <<
" grain_size=" << options.
grainSize()
1235 <<
" name=" << run_info.traceInfo().traceInfo()
1236 <<
" has_stat_info=" << (stat_info!=
nullptr)
1242 if (max_thread==1 || max_thread==0){
1243 functor->executeFunctor(loop_ranges);
1251 Integer nb_allowed_thread = m_p->nbAllowedThread();
1253 max_thread = nb_allowed_thread;
1254 tbb::task_arena* used_arena =
nullptr;
1255 if (max_thread<nb_allowed_thread)
1256 used_arena = m_p->m_sub_arena_list[max_thread];
1258 used_arena = &(m_p->m_main_arena);
1261 if constexpr (RankValue==1){
1262 auto range_1d = _toTBBRange(loop_ranges);
1272 used_arena->execute(pfe);
1276 used_arena->execute(pfe);
1309 return m_p->threadTaskInfo(thread_id);
1320#ifdef ARCANE_USE_ONETBB
1321 if (thread_id<0 || thread_id>=m_p->nbAllowedThread())
1326 Int32 task_index = tti->taskIndex();
1336#ifdef ARCANE_USE_ONETBB
1344 tbb::task_group task_group;
1345 task_group.run(taskFunctor());
1354launchAndWait(ConstArrayView<ITask*> tasks)
1356 tbb::task_group task_group;
1363 OneTBBTask* t =
static_cast<OneTBBTask*
>(tasks[i]);
1364 task_group.run(t->taskFunctor());
1368 OneTBBTask* t =
static_cast<OneTBBTask*
>(tasks[i]);
1377_createChildTask(ITaskFunctor* functor)
1379 OneTBBTask* t =
new OneTBBTask(functor);
1394 task::spawn_root_and_wait(*
this);
1408 for(
Integer i=0; i<n-1; ++i ){
1409 TBBTask* t =
static_cast<TBBTask*
>(tasks[i]);
1412 spawn_and_wait_for_all(*
static_cast<TBBTask*
>(tasks[n-1]));
1418ITask* LegacyTBBTask::
1421 TBBTask* t =
new(allocate_child()) TBBTask(functor);
1435 TBBTaskImplementation);
1437ARCANE_DI_REGISTER_PROVIDER(TBBTaskImplementation,
1438 DependencyInjection::ProviderProperty(
"TBBTaskImplementation"),
1439 ARCANE_DI_INTERFACES(ITaskImplementation),
1440 ARCANE_DI_EMPTY_CONSTRUCTOR());
#define ARCANE_THROW(exception_class,...)
Macro pour envoyer une exception avec formattage.
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
#define ARCANE_ALIGNAS_PACKED(value)
Macro pour garantir le compactage et l'alignement d'une classe sur value octets.
Classes, Types et macros pour gérer la concurrence.
#define ARCANE_REGISTER_APPLICATION_FACTORY(aclass, ainterface, aname)
Enregistre un service de fabrique pour la classe aclass.
Allocateur mémoire avec alignement mémoire spécifique.
void resize(Int64 s)
Change le nombre d'éléments du tableau à s.
Interval d'itération complexe.
Vue constante d'un tableau de type T.
constexpr Integer size() const noexcept
Nombre d'éléments du tableau.
Tableau 1D de taille fixe.
Classe pour gérer le profiling d'une seule exécution d'une boucle.
Intervalle d'itération pour une boucle.
Informations d'exécution d'une boucle.
Interface d'un fonctor sur un interval d'itération multi-dimensionnel de dimension RankValue.
virtual void notifyAllObservers()=0
Notifie tous les observateurs.
Interface d'un fonctor sur un interval d'itération.
virtual void executeFunctor(Integer begin, Integer size)=0
Exécute la méthode associée.
Interface d'un fonctor pour une tâche.
virtual void executeFunctor(const TaskContext &tc)=0
Exécute la méthode associé
Implémentation d'une fabrique de tâches.
Interface d'une tâche concourante.
Fonctor sur un interval d'itération instancié via une lambda fonction.
void launchAndWait() override
Lance la tâche et bloque jusqu'à ce qu'elle se termine.
Exception lorsqu'une fonction n'est pas implémentée.
Caractéristiques d'un boucle 1D multi-thread.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
void mergeUnsetValues(const ParallelLoopOptions &po)
Fusionne les valeurs non modifiées de l'instance par celles de po.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
void setMaxThread(Integer v)
Positionne le nombre maximal de threads autorisé.
@ Static
Utilise un partitionnement statique.
@ Deterministic
Utilise un partitionnement et un ordonnancement statique.
static impl::ForLoopStatInfoList * _threadLocalForLoopInstance()
static bool hasProfiling()
Indique si le profilage est actif.
Structure contenant les informations pour créer un service.
Implémentation déterministe de ParallelFor.
void operator()(tbb::blocked_range< Integer > &range) const
Opérateur pour un thread donné.
Exécuteur pour une boucle multi-dimension.
Exécuteur pour une boucle 1D.
UniqueArray< tbb::task_arena * > m_sub_arena_list
Tableau dont le i-ème élément contient la tbb::task_arena pour i thread.
Classe pour positionner TaskThreadInfo::taskIndex().
Int32 currentTaskThreadIndex() const final
Implémentation de TaskFactory::currentTaskThreadIndex()
void initialize(Int32 nb_thread) override
void executeParallelFor(const ComplexForLoopRanges< 1 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 1 > *functor) final
Exécute une boucle 1D en concurrence.
void executeParallelFor(Int32 begin, Int32 size, IRangeFunctor *f) final
Exécute le fonctor f en concurrence.
ITask * createRootTask(ITaskFunctor *f) override
Créé une tâche racine. L'implémentation doit recopier la valeur de f qui est soit un TaskFunctor,...
void printInfos(std::ostream &o) const final
Affiche les informations sur le runtime utilisé
void executeParallelFor(Int32 begin, Int32 size, const ParallelLoopOptions &options, IRangeFunctor *f) final
Exécute le fonctor f en concurrence.
void executeParallelFor(const ComplexForLoopRanges< 3 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 3 > *functor) final
Exécute une boucle 3D en concurrence.
void executeParallelFor(const ComplexForLoopRanges< 4 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 4 > *functor) final
Exécute une boucle 4D en concurrence.
TaskThreadInfo * currentTaskThreadInfo() const
Instance de TaskThreadInfo associé au thread courant.
void terminate() override
bool isActive() const final
Indique si l'implémentation est active.
void _executeMDParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, IMDRangeFunctor< RankValue > *functor, const ForLoopRunInfo &run_info)
Exécution d'une boucle N-dimensions.
Int32 currentTaskIndex() const final
Implémentation de TaskFactory::currentTaskIndex()
void executeParallelFor(const ComplexForLoopRanges< 2 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 2 > *functor) final
Exécute une boucle 2D en concurrence.
Int32 nbAllowedThread() const final
Nombre de threads utilisés au maximum pour gérer les tâches.
Contexte d'éxecution d'une tâche.
static void notifyThreadCreated()
Notifie tous les observateurs de création de thread.
static IObservable * destroyThreadObservable()
Observable appelé lors de la destruction d'un thread pour une tâche.
static const ParallelLoopOptions & defaultParallelLoopOptions()
Valeurs par défaut d'exécution d'une boucle parallèle.
static Integer verboseLevel()
Niveau de verbosité
static void setDefaultParallelLoopOptions(const ParallelLoopOptions &v)
Positionne les valeurs par défaut d'exécution d'une boucle parallèle.
static Int32 currentTaskThreadIndex()
Indice (entre 0 et nbAllowedThread()-1) du thread exécutant la tâche actuelle.
Vecteur 1D de données avec sémantique par valeur (style STL).
Classe permettant de récupérer le temps passé entre l'appel au constructeur et au destructeur.
Integer toInteger(Real r)
Converti un Int64 en un Integer.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
Int32 Integer
Type représentant un entier.
SimpleForLoopRanges< 1 > makeLoopRanges(Int32 n1)
Créé un intervalle d'itération [0,n1[.
std::int32_t Int32
Type entier signé sur 32 bits.