Arcane  v3.16.9.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
TBBTaskImplementation.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* TBBTaskImplementation.cc (C) 2000-2025 */
9/* */
10/* Implémentation des tâches utilisant TBB (Intel Threads Building Blocks). */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/utils/IThreadImplementation.h"
15#include "arcane/utils/NotImplementedException.h"
16#include "arcane/utils/IFunctor.h"
17#include "arcane/utils/CheckedConvert.h"
18#include "arcane/utils/ForLoopRanges.h"
20#include "arcane/utils/IObservable.h"
21#include "arcane/utils/PlatformUtils.h"
22#include "arcane/utils/Profiling.h"
23#include "arcane/utils/MemoryAllocator.h"
24#include "arcane/utils/FixedArray.h"
25#include "arcane/utils/internal/TaskFactoryInternal.h"
26#include "arcane/utils/internal/DependencyInjection.h"
27
28#include "arcane/core/FactoryService.h"
29
30#include <new>
31#include <stack>
32
33// Il faut définir cette macro pour que la classe 'blocked_rangeNd' soit disponible
34
35#define TBB_PREVIEW_BLOCKED_RANGE_ND 1
36
37// la macro 'ARCANE_USE_ONETBB' est définie dans le CMakeLists.txt
38// si on compile avec la version OneTBB version 2021+
39// (https://github.com/oneapi-src/oneTBB.git)
40// A terme ce sera la seule version supportée par Arcane.
41
42#ifdef ARCANE_USE_ONETBB
43
44// Nécessaire pour avoir accès à task_scheduler_handle
45#define TBB_PREVIEW_WAITING_FOR_WORKERS 1
46#include <tbb/tbb.h>
47#include <oneapi/tbb/concurrent_set.h>
48#include <oneapi/tbb/global_control.h>
49
50#else // ARCANE_USE_ONETBB
51
52// NOTE GG: depuis mars 2019, la version 2018.3+ des TBB est obligatoire.
53// C'est celle qui introduit 'blocked_rangeNd.h'
54#include <tbb/tbb.h>
55#if __has_include(<tbb/blocked_rangeNd.h>)
56#include <tbb/blocked_rangeNd.h>
57#endif
58
59/*
60 * Maintenant vérifie que la version est au moins 2018.3.
61 */
62
63// Pour TBB 2018, la valeur de TBB_VERSION_MINOR vaut toujours 0 même
64// pour TBB 2018 Update 3. On ne peut donc pas utiliser TBB_VERSION_MINO
65// pour savoir exactement quelle version de TBB 2018 on utilise.
66// A la place on utilise TBB_INTERFACE_VERSION qui est incrémenté
67// quand l'interface change. Dans notre cas pour la version 2018.3 elle
68// vaut 10003. On indique donc que la version de TBB est trop ancienne
69// si TBB_INTERFACE_VERSION est plus petit que 10003
70#if (TBB_INTERFACE_VERSION < 10003)
71#define ARCANE_OLD_TBB
72#endif
73
74#ifdef ARCANE_OLD_TBB
75# if defined(__GNUG__)
76# define ARCANE_STR_HELPER(x) #x
77# define ARCANE_STR(x) ARCANE_STR_HELPER(x)
78# pragma message "Your version of TBB is : " ARCANE_STR(TBB_VERSION_MAJOR) "." ARCANE_STR(TBB_VERSION_MINOR)
79# endif
80# error "Your version of TBB is too old. TBB 2018.3+ is required. Please disable TBB in configuration using -DCMAKE_DISABLE_FIND_PACKAGE_TBB=TRUE"
81#endif
82
83#include <thread>
84#include <mutex>
85
86#endif // ARCANE_USE_ONETBB
87
88namespace
89{
90#if (TBB_VERSION_MAJOR > 2022) || (TBB_VERSION_MAJOR == 2022 && TBB_VERSION_MINOR > 0) || defined __TBB_blocked_nd_range_H
91
92// La classe "blocked_rangeNd" a été retirée dans la version
93// 2022.0.0 et remplacée par "blocked_nd_range".
94template <typename Value, unsigned int N>
95using blocked_nd_range = tbb::blocked_nd_range<Value, N>;
96
97#else
98
99template <typename Value, unsigned int N>
100using blocked_nd_range = tbb::blocked_rangeNd<Value, N>;
101
102#endif
103} // namespace
104
105/*---------------------------------------------------------------------------*/
106/*---------------------------------------------------------------------------*/
107
108namespace Arcane
109{
110
112
113// TODO: utiliser un pool mémoire spécifique pour gérer les
114// OneTBBTask pour optimiser les new/delete des instances de cette classe.
115// Auparavant avec les anciennes versions de TBB cela était géré avec
116// la méthode 'tbb::task::allocate_child()'.
117
118/*---------------------------------------------------------------------------*/
119/*---------------------------------------------------------------------------*/
120
121namespace
122{
123
124// Positif si on récupère les statistiques d'exécution
125bool isStatActive()
126{
128}
129
134class ScopedExecInfo
135{
136 public:
137
138 explicit ScopedExecInfo(const ForLoopRunInfo& run_info)
139 : m_run_info(run_info)
140 {
141 // Si run_info.execInfo() n'est pas nul, on l'utilise.
142 // Cela signifie que c'est l'appelant de qui va gérer les statistiques
143 // d'exécution. Sinon, on utilise \a m_stat_info si les statistiques
144 // d'exécution sont demandées.
145 ForLoopOneExecStat* ptr = run_info.execStat();
146 if (ptr){
147 m_stat_info_ptr = ptr;
148 m_use_own_run_info = false;
149 }
150 else
151 m_stat_info_ptr = isStatActive() ? &m_stat_info : nullptr;
152 }
153 ~ScopedExecInfo()
154 {
155#ifdef PRINT_STAT_INFO
156 if (m_stat_info_ptr){
157 bool is_valid = m_run_info.traceInfo().isValid();
158 if (!is_valid)
159 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
160 << " stack=" << platform::getStackTrace()
161 << "\n";
162 else
163 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
164 << " trace_name=" << m_run_info.traceInfo().traceInfo().name() << "\n";
165 }
166#endif
167 if (m_stat_info_ptr && m_use_own_run_info){
168 ProfilingRegistry::_threadLocalForLoopInstance()->merge(*m_stat_info_ptr,m_run_info.traceInfo());
169 }
170 }
171
172 public:
173
174 ForLoopOneExecStat* statInfo() const { return m_stat_info_ptr; }
175 bool isOwn() const { return m_use_own_run_info; }
176 private:
177
178 ForLoopOneExecStat m_stat_info;
179 ForLoopOneExecStat* m_stat_info_ptr = nullptr;
180 ForLoopRunInfo m_run_info;
182 bool m_use_own_run_info = true;
183};
184
185/*---------------------------------------------------------------------------*/
186/*---------------------------------------------------------------------------*/
187
188inline int _currentTaskTreadIndex()
189{
190 // NOTE: Avec OneTBB 2021, la valeur n'est plus '0' si on appelle cette méthode
191 // depuis un thread en dehors d'un task_arena. Avec la version 2021,
192 // la valeur est 65535.
193 // NOTE: Il semble que cela soit un bug de la 2021.3.
194 return tbb::this_task_arena::current_thread_index();
195}
196
197inline blocked_nd_range<Int32, 1>
198_toTBBRange(const ComplexForLoopRanges<1>& r)
199{
200 return {{r.lowerBound<0>(), r.upperBound<0>()}};
201}
202
203inline blocked_nd_range<Int32, 2>
204_toTBBRange(const ComplexForLoopRanges<2>& r)
205{
206 return {{r.lowerBound<0>(), r.upperBound<0>()},
207 {r.lowerBound<1>(), r.upperBound<1>()}};
208
209}
210
211inline blocked_nd_range<Int32, 3>
212_toTBBRange(const ComplexForLoopRanges<3>& r)
213{
214 return {{r.lowerBound<0>(), r.upperBound<0>()},
215 {r.lowerBound<1>(), r.upperBound<1>()},
216 {r.lowerBound<2>(), r.upperBound<2>()}};
217}
218
219inline blocked_nd_range<Int32, 4>
220_toTBBRange(const ComplexForLoopRanges<4>& r)
221{
222 return {{r.lowerBound<0>(), r.upperBound<0>()},
223 {r.lowerBound<1>(), r.upperBound<1>()},
224 {r.lowerBound<2>(), r.upperBound<2>()},
225 {r.lowerBound<3>(), r.upperBound<3>()}};
226}
227
228/*---------------------------------------------------------------------------*/
229/*---------------------------------------------------------------------------*/
230
231inline blocked_nd_range<Int32, 2>
232_toTBBRangeWithGrain(const blocked_nd_range<Int32, 2>& r, FixedArray<size_t, 2> grain_sizes)
233{
234 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
235 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]}};
236}
237
238inline blocked_nd_range<Int32, 3>
239_toTBBRangeWithGrain(const blocked_nd_range<Int32, 3>& r, FixedArray<size_t, 3> grain_sizes)
240{
241 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
242 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]},
243 {r.dim(2).begin(), r.dim(2).end(), grain_sizes[2]}};
244}
245
246inline blocked_nd_range<Int32, 4>
247_toTBBRangeWithGrain(const blocked_nd_range<Int32, 4>& r, FixedArray<size_t, 4> grain_sizes)
248{
249 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
250 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]},
251 {r.dim(2).begin(), r.dim(2).end(), grain_sizes[2]},
252 {r.dim(3).begin(), r.dim(3).end(), grain_sizes[3]}};
253}
254
255/*---------------------------------------------------------------------------*/
256/*---------------------------------------------------------------------------*/
257
259_fromTBBRange(const blocked_nd_range<Int32, 2>& r)
260{
261 using BoundsType = ArrayBounds<MDDim2>;
262 using ArrayExtentType = typename BoundsType::ArrayExtentType;
263
264 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin()));
265 auto s0 = static_cast<Int32>(r.dim(0).size());
266 auto s1 = static_cast<Int32>(r.dim(1).size());
267 BoundsType sizes(ArrayExtentType(s0,s1));
268 return { lower_bounds, sizes };
269}
270
272_fromTBBRange(const blocked_nd_range<Int32, 3>& r)
273{
274 using BoundsType = ArrayBounds<MDDim3>;
275 using ArrayExtentType = typename BoundsType::ArrayExtentType;
276
277 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin(),r.dim(2).begin()));
278 auto s0 = static_cast<Int32>(r.dim(0).size());
279 auto s1 = static_cast<Int32>(r.dim(1).size());
280 auto s2 = static_cast<Int32>(r.dim(2).size());
281 BoundsType sizes(ArrayExtentType(s0,s1,s2));
282 return { lower_bounds, sizes };
283}
284
286_fromTBBRange(const blocked_nd_range<Int32, 4>& r)
287{
288 using BoundsType = ArrayBounds<MDDim4>;
289 using ArrayExtentType = typename BoundsType::ArrayExtentType;
290
291 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin(),r.dim(2).begin(),r.dim(3).begin()));
292 auto s0 = static_cast<Int32>(r.dim(0).size());
293 auto s1 = static_cast<Int32>(r.dim(1).size());
294 auto s2 = static_cast<Int32>(r.dim(2).size());
295 auto s3 = static_cast<Int32>(r.dim(3).size());
296 BoundsType sizes(ArrayExtentType(s0,s1,s2,s3));
297 return { lower_bounds, sizes };
298}
299
300}
301
302/*---------------------------------------------------------------------------*/
303/*---------------------------------------------------------------------------*/
304
305#ifdef ARCANE_USE_ONETBB
306
307/*---------------------------------------------------------------------------*/
308/*---------------------------------------------------------------------------*/
309
310class OneTBBTaskFunctor
311{
312 public:
313 OneTBBTaskFunctor(ITaskFunctor* functor,ITask* task)
314 : m_functor(functor), m_task(task) {}
315 public:
316 void operator()() const
317 {
318 if (m_functor){
319 ITaskFunctor* tf = m_functor;
320 m_functor = 0;
321 TaskContext task_context(m_task);
322 //cerr << "FUNC=" << typeid(*tf).name();
323 tf->executeFunctor(task_context);
324 }
325 }
326 public:
327 mutable ITaskFunctor* m_functor;
328 ITask* m_task;
329};
330
331/*---------------------------------------------------------------------------*/
332/*---------------------------------------------------------------------------*/
333
334class OneTBBTask
335: public ITask
336{
337 public:
338 static const int FUNCTOR_CLASS_SIZE = 32;
339 public:
340 OneTBBTask(ITaskFunctor* f)
341 : m_functor(f)
342 {
343 m_functor = f->clone(functor_buf,FUNCTOR_CLASS_SIZE);
344 }
345 public:
346 OneTBBTaskFunctor taskFunctor() { return OneTBBTaskFunctor(m_functor,this); }
347 void launchAndWait() override;
348 void launchAndWait(ConstArrayView<ITask*> tasks) override;
349 protected:
350 virtual ITask* _createChildTask(ITaskFunctor* functor) override;
351 public:
352 ITaskFunctor* m_functor;
353 char functor_buf[FUNCTOR_CLASS_SIZE];
354};
355using TBBTask = OneTBBTask;
356
357/*---------------------------------------------------------------------------*/
358/*---------------------------------------------------------------------------*/
359
360#else // ARCANE_USE_ONETBB
361
362/*---------------------------------------------------------------------------*/
363/*---------------------------------------------------------------------------*/
364
365class LegacyTBBTask
366: public tbb::task
367, public ITask
368{
369 public:
370 static const int FUNCTOR_CLASS_SIZE = 32;
371 public:
372 LegacyTBBTask(ITaskFunctor* f)
373 : m_functor(f)
374 {
375 m_functor = f->clone(functor_buf,FUNCTOR_CLASS_SIZE);
376 }
377 public:
378 tbb::task* execute() override
379 {
380 if (m_functor){
381 ITaskFunctor* tf = m_functor;
382 m_functor = 0;
383 TaskContext task_context(this);
384 //cerr << "FUNC=" << typeid(*tf).name();
385 tf->executeFunctor(task_context);
386 }
387 return nullptr;
388 }
389 void launchAndWait() override;
390 void launchAndWait(ConstArrayView<ITask*> tasks) override;
391 protected:
392 ITask* _createChildTask(ITaskFunctor* functor) final;
393 public:
394 ITaskFunctor* m_functor;
395 char functor_buf[FUNCTOR_CLASS_SIZE];
396};
397using TBBTask = LegacyTBBTask;
398
399/*---------------------------------------------------------------------------*/
400/*---------------------------------------------------------------------------*/
401
402#endif // ARCANE_USE_ONETBB
403
404/*---------------------------------------------------------------------------*/
405/*---------------------------------------------------------------------------*/
406/*
407 * Ne pas utiliser l'observer locale au task_arena.
408 * Utiliser l'observer global au scheduler.
409 * Pour l'id, utiliser tbb::this_task_arena::current_thread_index().
410 */
411class TBBTaskImplementation
412: public ITaskImplementation
413{
414 class Impl;
415 class ParallelForExecute;
416 template<int RankValue>
418
419 public:
420 // Pour des raisons de performance, s'aligne sur une ligne de cache
421 // et utilise un padding.
422 class ARCANE_ALIGNAS_PACKED(64) TaskThreadInfo
423 {
424 public:
425 TaskThreadInfo() : m_task_index(-1){}
426 public:
427 void setTaskIndex(Integer v) { m_task_index = v; }
428 Integer taskIndex() const { return m_task_index; }
429 private:
430 Integer m_task_index;
431 };
432
439 class TaskInfoLockGuard
440 {
441 public:
442 TaskInfoLockGuard(TaskThreadInfo* tti,Integer task_index)
443 : m_tti(tti), m_old_task_index(-1)
444 {
445 if (tti){
446 m_old_task_index = tti->taskIndex();
447 tti->setTaskIndex(task_index);
448 }
449 }
450 ~TaskInfoLockGuard()
451 {
452 if (m_tti)
453 m_tti->setTaskIndex(m_old_task_index);
454 }
455 private:
456 TaskThreadInfo* m_tti;
457 Integer m_old_task_index;
458 };
459
460 public:
461 TBBTaskImplementation(const ServiceBuildInfo& sbi)
462 {
463 }
464 TBBTaskImplementation() = default;
465 ~TBBTaskImplementation() override;
466 public:
467 void build() {}
468 void initialize(Int32 nb_thread) override;
469 void terminate() override;
470
472 {
473#ifdef ARCANE_USE_ONETBB
474 OneTBBTask* t = new OneTBBTask(f);
475#else
476 LegacyTBBTask* t = new(tbb::task::allocate_root()) LegacyTBBTask(f);
477#endif
478 return t;
479 }
480
481 void executeParallelFor(Int32 begin,Int32 size,const ParallelLoopOptions& options,IRangeFunctor* f) final;
482 void executeParallelFor(Int32 begin,Int32 size,Integer grain_size,IRangeFunctor* f) final;
487 void executeParallelFor(const ParallelFor1DLoopInfo& loop_info) override;
488
490 const ForLoopRunInfo& run_info,
491 IMDRangeFunctor<1>* functor) final
492 {
493 _executeMDParallelFor<1>(loop_ranges,functor,run_info);
494 }
496 const ForLoopRunInfo& run_info,
497 IMDRangeFunctor<2>* functor) final
498 {
499 _executeMDParallelFor<2>(loop_ranges,functor,run_info);
500 }
502 const ForLoopRunInfo& run_info,
503 IMDRangeFunctor<3>* functor) final
504 {
505 _executeMDParallelFor<3>(loop_ranges,functor,run_info);
506 }
508 const ForLoopRunInfo& run_info,
509 IMDRangeFunctor<4>* functor) final
510 {
511 _executeMDParallelFor<4>(loop_ranges,functor,run_info);
512 }
513
514 bool isActive() const final
515 {
516 return m_is_active;
517 }
518
519 Int32 nbAllowedThread() const final;
520
522 {
523 return (nbAllowedThread() <= 1 ) ? 0 : _currentTaskTreadIndex();
524 }
525
526 Int32 currentTaskIndex() const final;
527
528 void printInfos(std::ostream& o) const final;
529
530 public:
531
538 TaskThreadInfo* currentTaskThreadInfo() const;
539
540 private:
541
542 bool m_is_active = false;
543 Impl* m_p = nullptr;
544
545 private:
546
547 template<int RankValue> void
548 _executeMDParallelFor(const ComplexForLoopRanges<RankValue>& loop_ranges,
549 IMDRangeFunctor<RankValue>* functor,
550 const ForLoopRunInfo& run_info);
551 void _executeParallelFor(const ParallelFor1DLoopInfo& loop_info);
552};
553
554/*---------------------------------------------------------------------------*/
555/*---------------------------------------------------------------------------*/
556
557class TBBTaskImplementation::Impl
558{
559 class TaskObserver
560 : public tbb::task_scheduler_observer
561 {
562 public:
563 TaskObserver(TBBTaskImplementation::Impl* p)
564 :
565#ifdef ARCANE_USE_ONETBB
566 tbb::task_scheduler_observer(p->m_main_arena),
567#endif
568 m_p(p)
569 {
570 }
571 void on_scheduler_entry(bool is_worker) override
572 {
573 m_p->notifyThreadCreated(is_worker);
574 }
575 void on_scheduler_exit(bool is_worker) override
576 {
577 m_p->notifyThreadDestroyed(is_worker);
578 }
580 };
581
582 public:
583 Impl() :
584 m_task_observer(this),
585 m_thread_task_infos(AlignedMemoryAllocator::CacheLine())
586 {
587#ifdef ARCANE_USE_ONETBB
588 m_nb_allowed_thread = tbb::info::default_concurrency();
589#else
590 m_nb_allowed_thread = tbb::task_scheduler_init::default_num_threads();
591#endif
592 _init();
593 }
594 Impl(Int32 nb_thread)
595 :
596#ifndef ARCANE_USE_ONETBB
597 m_scheduler_init(nb_thread),
598#endif
599 m_main_arena(nb_thread),
600 m_task_observer(this),
601 m_thread_task_infos(AlignedMemoryAllocator::CacheLine())
602 {
603 m_nb_allowed_thread = nb_thread;
604 _init();
605 }
606 public:
607 Int32 nbAllowedThread() const { return m_nb_allowed_thread; }
608 TaskThreadInfo* threadTaskInfo(Integer index) { return &m_thread_task_infos[index]; }
609 private:
610 Int32 m_nb_allowed_thread;
611
612 public:
613 void terminate()
614 {
615 for( auto x : m_sub_arena_list ){
616 if (x)
617 x->terminate();
618 delete x;
619 }
620 m_sub_arena_list.clear();
621 m_main_arena.terminate();
622#ifdef ARCANE_USE_ONETBB
623 m_task_observer.observe(false);
624 oneapi::tbb::finalize(m_task_scheduler_handle);
625#else
626 m_scheduler_init.terminate();
627 m_task_observer.observe(false);
628#endif
629 }
630 public:
631 void notifyThreadCreated(bool is_worker)
632 {
633 std::thread::id my_thread_id = std::this_thread::get_id();
634
635#ifdef ARCANE_USE_ONETBB
636 // Avec OneTBB, cette méthode est appelée à chaque fois qu'on rentre
637 // dans notre 'task_arena'. Comme il ne faut appeler qu'une seule
638 // fois la méthode de notification on utilise un ensemble pour
639 // conserver la liste des threads déjà créés.
640 // NOTE: On ne peut pas utiliser cette méthode avec la version TBB historique
641 // (2018) car cette méthode 'contains' n'existe pas
642 if (m_constructed_thread_map.contains(my_thread_id))
643 return;
644 m_constructed_thread_map.insert(my_thread_id);
645#endif
646
647 {
649 std::ostringstream ostr;
650 ostr << "TBB: CREATE THREAD"
651 << " nb_allowed=" << m_nb_allowed_thread
652#ifdef ARCANE_USE_ONETBB
653 << " tbb_default_allowed=" << tbb::info::default_concurrency()
654#else
655 << " tbb_default_allowed=" << tbb::task_scheduler_init::default_num_threads()
656#endif
657 << " id=" << my_thread_id
658 << " arena_id=" << _currentTaskTreadIndex()
659 << " is_worker=" << is_worker
660 << "\n";
661 std::cout << ostr.str();
662 }
664 }
665 }
666
667 void notifyThreadDestroyed([[maybe_unused]] bool is_worker)
668 {
669#ifdef ARCANE_USE_ONETBB
670 // Avec OneTBB, cette méthode est appelée à chaque fois qu'on sort
671 // de l'arène principale. Du coup elle ne correspond pas vraiment à une
672 // destruction de thread. On ne fait donc rien pour cette notification
673 // TODO: Regarder comment on peut être notifié de la destruction effective
674 // du thread.
675#else
676 // Il faut toujours un verrou car on n'est pas certain que
677 // les méthodes appelées par l'observable soient thread-safe
678 // (et aussi TaskFactory::createThreadObservable() ne l'est pas)
679 std::scoped_lock sl(m_thread_created_mutex);
681 std::cout << "TBB: DESTROY THREAD"
682 << " id=" << std::this_thread::get_id()
683 << " arena_id=" << _currentTaskTreadIndex()
684 << " is_worker=" << is_worker
685 << '\n';
686 }
687 // TODO: jamais utilisé. Sera supprimé au passage à OneTBB.
689#endif
690 }
691 private:
692#ifdef ARCANE_USE_ONETBB
693#if TBB_VERSION_MAJOR>2021 || (TBB_VERSION_MAJOR==2021 && TBB_VERSION_MINOR>5)
694 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = oneapi::tbb::attach();
695#else
696 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = tbb::task_scheduler_handle::get();
697#endif
698#else
699 tbb::task_scheduler_init m_scheduler_init;
700#endif
701 public:
702 tbb::task_arena m_main_arena;
705 private:
706 TaskObserver m_task_observer;
707 std::mutex m_thread_created_mutex;
708 UniqueArray<TaskThreadInfo> m_thread_task_infos;
709#ifdef ARCANE_USE_ONETBB
710 tbb::concurrent_set<std::thread::id> m_constructed_thread_map;
711#endif
712 void _init()
713 {
715 std::cout << "TBB: TBBTaskImplementationInit nb_allowed_thread=" << m_nb_allowed_thread
716 << " id=" << std::this_thread::get_id()
717 << " version=" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR
718 << "\n";
719 }
720 m_thread_task_infos.resize(m_nb_allowed_thread);
721 m_task_observer.observe(true);
722 Integer max_arena_size = m_nb_allowed_thread;
723 // Limite artificiellement le nombre de tbb::task_arena
724 // pour éviter d'avoir trop d'objets alloués.
725 if (max_arena_size>512)
726 max_arena_size = 512;
727 if (max_arena_size<2)
728 max_arena_size = 2;
729 m_sub_arena_list.resize(max_arena_size);
730 m_sub_arena_list[0] = m_sub_arena_list[1] = nullptr;
731 for( Integer i=2; i<max_arena_size; ++i )
732 m_sub_arena_list[i] = new tbb::task_arena(i);
733 }
734};
735
736/*---------------------------------------------------------------------------*/
737/*---------------------------------------------------------------------------*/
741class TBBParallelFor
742{
743 public:
744 TBBParallelFor(IRangeFunctor* f,Int32 nb_allowed_thread,ForLoopOneExecStat* stat_info)
745 : m_functor(f), m_stat_info(stat_info), m_nb_allowed_thread(nb_allowed_thread){}
746 public:
747
748 void operator()(tbb::blocked_range<Integer>& range) const
749 {
750#ifdef ARCANE_CHECK
752 std::ostringstream o;
753 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
754 << " id=" << std::this_thread::get_id()
755 << " max_allowed=" << m_nb_allowed_thread
756 << " range_begin=" << range.begin() << " range_size=" << range.size()
757 << "\n";
758 std::cout << o.str();
759 std::cout.flush();
760 }
761
762 int tbb_index = _currentTaskTreadIndex();
763 if (tbb_index<0 || tbb_index>=m_nb_allowed_thread)
764 ARCANE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
765 tbb_index,m_nb_allowed_thread);
766#endif
767
768 if (m_stat_info)
769 m_stat_info->incrementNbChunk();
770 m_functor->executeFunctor(range.begin(),CheckedConvert::toInteger(range.size()));
771 }
772
773 private:
774 IRangeFunctor* m_functor;
775 ForLoopOneExecStat* m_stat_info = nullptr;
776 Int32 m_nb_allowed_thread;
777};
778
779/*---------------------------------------------------------------------------*/
780/*---------------------------------------------------------------------------*/
784template<int RankValue>
785class TBBMDParallelFor
786{
787 public:
788
789 TBBMDParallelFor(IMDRangeFunctor<RankValue>* f,Int32 nb_allowed_thread,ForLoopOneExecStat* stat_info)
790 : m_functor(f), m_stat_info(stat_info), m_nb_allowed_thread(nb_allowed_thread){}
791
792 public:
793
794 void operator()(blocked_nd_range<Int32, RankValue>& range) const
795 {
796#ifdef ARCANE_CHECK
798 std::ostringstream o;
799 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
800 << " id=" << std::this_thread::get_id()
801 << " max_allowed=" << m_nb_allowed_thread
802 << " MDFor ";
803 for( Int32 i=0; i<RankValue; ++i ){
804 Int32 r0 = static_cast<Int32>(range.dim(i).begin());
805 Int32 r1 = static_cast<Int32>(range.dim(i).size());
806 o << " range" << i << " (begin=" << r0 << " size=" << r1 << ")";
807 }
808 o << "\n";
809 std::cout << o.str();
810 std::cout.flush();
811 }
812
813 int tbb_index = _currentTaskTreadIndex();
814 if (tbb_index<0 || tbb_index>=m_nb_allowed_thread)
815 ARCANE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
816 tbb_index,m_nb_allowed_thread);
817#endif
818
819 if (m_stat_info)
820 m_stat_info->incrementNbChunk();
821 m_functor->executeFunctor(_fromTBBRange(range));
822 }
823
824 private:
825
826 IMDRangeFunctor<RankValue>* m_functor = nullptr;
827 ForLoopOneExecStat* m_stat_info = nullptr;
828 Int32 m_nb_allowed_thread;
829};
830
831/*---------------------------------------------------------------------------*/
832/*---------------------------------------------------------------------------*/
850class TBBDeterministicParallelFor
851{
852 public:
853 TBBDeterministicParallelFor(TBBTaskImplementation* impl,const TBBParallelFor& tbb_for,
854 Integer begin_index,Integer size,Integer grain_size,Integer nb_thread)
855 : m_impl(impl), m_tbb_for(tbb_for), m_nb_thread(nb_thread), m_begin_index(begin_index), m_size(size),
856 m_grain_size(grain_size), m_nb_block(0), m_block_size(0), m_nb_block_per_thread(0)
857 {
858 if (m_nb_thread<1)
859 m_nb_thread = 1;
860
861 if (m_grain_size>0){
862 m_block_size = m_grain_size;
863 if (m_block_size>0){
864 m_nb_block = m_size / m_block_size;
865 if ((m_size % m_block_size)!=0)
866 ++m_nb_block;
867 }
868 else
869 m_nb_block = 1;
870 m_nb_block_per_thread = m_nb_block / m_nb_thread;
871 if ((m_nb_block % m_nb_thread) != 0)
872 ++m_nb_block_per_thread;
873 }
874 else{
875 if (m_nb_block<1)
876 m_nb_block = m_nb_thread;
877 m_block_size = m_size / m_nb_block;
878 m_nb_block_per_thread = 1;
879 }
881 std::cout << "TBBDeterministicParallelFor: BEGIN=" << m_begin_index << " size=" << m_size
882 << " grain_size=" << m_grain_size
883 << " nb_block=" << m_nb_block << " nb_thread=" << m_nb_thread
884 << " nb_block_per_thread=" << m_nb_block_per_thread
885 << " block_size=" << m_block_size
886 << " block_size*nb_block=" << m_block_size*m_nb_block << '\n';
887 }
888 }
889 public:
890
897 void operator()(tbb::blocked_range<Integer>& range) const
898 {
899 Integer nb_iter = static_cast<Integer>(range.size());
900 for( Integer i=0; i<nb_iter; ++i ){
901 Integer task_id = range.begin() + i;
902 for ( Integer k=0, kn=m_nb_block_per_thread; k<kn; ++k ){
903 Integer block_id = task_id + (k * m_nb_thread);
904 if (block_id<m_nb_block)
905 _doBlock(task_id,block_id);
906 }
907 }
908 }
909
910 void _doBlock(Integer task_id,Integer block_id) const
911 {
912 TBBTaskImplementation::TaskInfoLockGuard guard(m_impl->currentTaskThreadInfo(),task_id);
913
914 Integer iter_begin = block_id * m_block_size;
915 Integer iter_size = m_block_size;
916 if ((block_id+1)==m_nb_block){
917 // Pour le dernier bloc, la taille est le nombre d'éléments restants
918 iter_size = m_size - iter_begin;
919 }
920 iter_begin += m_begin_index;
921#ifdef ARCANE_CHECK
922 if (TaskFactory::verboseLevel()>=3){
923 std::ostringstream o;
924 o << "TBB: DoBlock: BLOCK task_id=" << task_id << " block_id=" << block_id
925 << " iter_begin=" << iter_begin << " iter_size=" << iter_size << '\n';
926 std::cout << o.str();
927 std::cout.flush();
928 }
929#endif
930 if (iter_size>0){
931 auto r = tbb::blocked_range<int>(iter_begin,iter_begin + iter_size);
932 m_tbb_for(r);
933 }
934 }
935
936 private:
937
938 TBBTaskImplementation* m_impl;
939 const TBBParallelFor& m_tbb_for;
940 Integer m_nb_thread;
941 Integer m_begin_index;
942 Integer m_size;
943 Integer m_grain_size;
944 Integer m_nb_block;
945 Integer m_block_size;
946 Integer m_nb_block_per_thread;
947};
948
949/*---------------------------------------------------------------------------*/
950/*---------------------------------------------------------------------------*/
951
953{
954 public:
955
956 ParallelForExecute(TBBTaskImplementation* impl, const ParallelLoopOptions& options,
957 Integer begin,Integer size,IRangeFunctor* f,ForLoopOneExecStat* stat_info)
958 : m_impl(impl), m_begin(begin), m_size(size), m_functor(f), m_options(options), m_stat_info(stat_info){}
959
960 public:
961
962 void operator()() const
963 {
964 Integer nb_thread = m_options.maxThread();
965 TBBParallelFor pf(m_functor,nb_thread,m_stat_info);
966 Integer gsize = m_options.grainSize();
967 tbb::blocked_range<Integer> range(m_begin,m_begin+m_size);
969 std::cout << "TBB: TBBTaskImplementationInit ParallelForExecute begin=" << m_begin
970 << " size=" << m_size << " gsize=" << gsize
971 << " partitioner=" << (int)m_options.partitioner()
972 << " nb_thread=" << nb_thread
973 << " has_stat_info=" << (m_stat_info!=nullptr)
974 << '\n';
975
976 if (gsize>0)
977 range = tbb::blocked_range<Integer>(m_begin,m_begin+m_size,gsize);
978
979 if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Static){
980 tbb::parallel_for(range,pf,tbb::static_partitioner());
981 }
982 else if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Deterministic){
983 tbb::blocked_range<Integer> range2(0,nb_thread,1);
984 TBBDeterministicParallelFor dpf(m_impl,pf,m_begin,m_size,gsize,nb_thread);
985 tbb::parallel_for(range2,dpf);
986 }
987 else
988 tbb::parallel_for(range,pf);
989 }
990 private:
991 TBBTaskImplementation* m_impl = nullptr;
992 Integer m_begin;
993 Integer m_size;
994 IRangeFunctor* m_functor = nullptr;
995 ParallelLoopOptions m_options;
996 ForLoopOneExecStat* m_stat_info = nullptr;
997};
998
999/*---------------------------------------------------------------------------*/
1000/*---------------------------------------------------------------------------*/
1001
1002template<int RankValue>
1004{
1005 public:
1006
1007 MDParallelForExecute(TBBTaskImplementation* impl,
1008 const ParallelLoopOptions& options,
1010 IMDRangeFunctor<RankValue>* f,[[maybe_unused]] ForLoopOneExecStat* stat_info)
1011 : m_impl(impl)
1012 , m_tbb_range(_toTBBRange(range))
1013 , m_functor(f)
1014 , m_options(options)
1015 , m_stat_info(stat_info)
1016 {
1017 // On ne peut pas modifier les valeurs d'une instance de tbb::blocked_rangeNd.
1018 // Il faut donc en reconstruire une complètement.
1019 FixedArray<size_t,RankValue> all_grain_sizes;
1020 Int32 gsize = m_options.grainSize();
1021 if (gsize>0){
1022 // Si la taille du grain est différent zéro, il faut la répartir
1023 // sur l'ensemble des dimensions. On commence par la dernière.
1024 // TODO: regarder pourquoi dans certains cas les performances sont
1025 // inférieures à celles qu'on obtient en utilisant un partitionneur
1026 // statique.
1027 constexpr bool is_verbose = false;
1028 std::array<Int32,RankValue> range_extents = range.extents().asStdArray();
1029 double ratio = static_cast<double>(gsize) / static_cast<double>(range.nbElement());
1030 if constexpr (is_verbose){
1031 std::cout << "GSIZE=" << gsize << " rank=" << RankValue << " ratio=" << ratio;
1032 for(Int32 i=0; i<RankValue; ++i )
1033 std::cout << " range" << i << "=" << range_extents[i];
1034 std::cout << "\n";
1035 }
1036 Int32 index = RankValue - 1;
1037 Int32 remaining_grain = gsize;
1038 for( ; index>=0; --index ){
1039 Int32 current = range_extents[index];
1040 if constexpr (is_verbose)
1041 std::cout << "Check index=" << index << " remaining=" << remaining_grain << " current=" << current << "\n";
1042 if (remaining_grain>current){
1043 all_grain_sizes[index] = current;
1044 remaining_grain /= current;
1045 }
1046 else{
1047 all_grain_sizes[index] = remaining_grain;
1048 break;
1049 }
1050 }
1051 for( Int32 i=0; i<index; ++i )
1052 all_grain_sizes[i] = 1;
1053 if constexpr (is_verbose){
1054 for(Int32 i=0; i<RankValue; ++i )
1055 std::cout << " grain" << i << "=" << all_grain_sizes[i];
1056 std::cout << "\n";
1057 }
1058 m_tbb_range = _toTBBRangeWithGrain(m_tbb_range,all_grain_sizes);
1059 }
1060 }
1061
1062 public:
1063
1064 void operator()() const
1065 {
1066 Integer nb_thread = m_options.maxThread();
1067 TBBMDParallelFor<RankValue> pf(m_functor,nb_thread,m_stat_info);
1068
1069 if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Static){
1070 tbb::parallel_for(m_tbb_range,pf,tbb::static_partitioner());
1071 }
1072 else if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Deterministic){
1073 // TODO: implémenter le mode déterministe
1074 ARCANE_THROW(NotImplementedException,"ParallelLoopOptions::Partitioner::Deterministic for multi-dimensionnal loops");
1075 //tbb::blocked_range<Integer> range2(0,nb_thread,1);
1076 //TBBDeterministicParallelFor dpf(m_impl,pf,m_begin,m_size,gsize,nb_thread);
1077 //tbb::parallel_for(range2,dpf);
1078 }
1079 else{
1080 tbb::parallel_for(m_tbb_range,pf);
1081 }
1082 }
1083 private:
1084 TBBTaskImplementation* m_impl = nullptr;
1085 blocked_nd_range<Int32, RankValue> m_tbb_range;
1086 IMDRangeFunctor<RankValue>* m_functor = nullptr;
1087 ParallelLoopOptions m_options;
1088 ForLoopOneExecStat* m_stat_info = nullptr;
1089};
1090
1091/*---------------------------------------------------------------------------*/
1092/*---------------------------------------------------------------------------*/
1093
1094TBBTaskImplementation::
1095~TBBTaskImplementation()
1096{
1097 delete m_p;
1098}
1099
1100/*---------------------------------------------------------------------------*/
1101/*---------------------------------------------------------------------------*/
1102
1104initialize(Int32 nb_thread)
1105{
1106 if (nb_thread<0)
1107 nb_thread = 0;
1108 m_is_active = (nb_thread!=1);
1109 if (nb_thread!=0)
1110 m_p = new Impl(nb_thread);
1111 else
1112 m_p = new Impl();
1116}
1117
1118/*---------------------------------------------------------------------------*/
1119/*---------------------------------------------------------------------------*/
1120
1122terminate()
1123{
1124 m_p->terminate();
1125}
1126
1127/*---------------------------------------------------------------------------*/
1128/*---------------------------------------------------------------------------*/
1129
1131nbAllowedThread() const
1132{
1133 return m_p->nbAllowedThread();
1134}
1135
1136/*---------------------------------------------------------------------------*/
1137/*---------------------------------------------------------------------------*/
1138
1140printInfos(std::ostream& o) const
1141{
1142#ifdef ARCANE_USE_ONETBB
1143 o << "OneTBBTaskImplementation"
1144 << " version=" << TBB_VERSION_STRING
1145 << " interface=" << TBB_INTERFACE_VERSION
1146 << " runtime_interface=" << TBB_runtime_interface_version();
1147#else
1148 o << "TBBTaskImplementation"
1149 << " version=" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR
1150 << " interface=" << TBB_INTERFACE_VERSION;
1151#endif
1152}
1153
1154/*---------------------------------------------------------------------------*/
1155/*---------------------------------------------------------------------------*/
1156
1157void TBBTaskImplementation::
1158_executeParallelFor(const ParallelFor1DLoopInfo& loop_info)
1159{
1160 ScopedExecInfo sei(loop_info.runInfo());
1161 ForLoopOneExecStat* stat_info = sei.statInfo();
1162 impl::ScopedStatLoop scoped_loop(sei.isOwn() ? stat_info : nullptr);
1163
1164 Int32 begin = loop_info.beginIndex();
1165 Int32 size = loop_info.size();
1166 ParallelLoopOptions options = loop_info.runInfo().options().value_or(TaskFactory::defaultParallelLoopOptions());
1167 IRangeFunctor* f = loop_info.functor();
1168
1169 Integer max_thread = options.maxThread();
1170 Integer nb_allowed_thread = m_p->nbAllowedThread();
1171 if (max_thread<0)
1172 max_thread = nb_allowed_thread;
1173
1175 std::cout << "TBB: TBBTaskImplementation executeParallelFor begin=" << begin
1176 << " size=" << size << " max_thread=" << max_thread
1177 << " grain_size=" << options.grainSize()
1178 << " nb_allowed=" << nb_allowed_thread << '\n';
1179
1180 // En exécution séquentielle, appelle directement la méthode \a f.
1181 if (max_thread==1 || max_thread==0){
1182 f->executeFunctor(begin,size);
1183 return;
1184 }
1185
1186 // Remplace les valeurs non initialisées de \a options par celles de \a m_default_loop_options
1187 ParallelLoopOptions true_options(options);
1188 true_options.mergeUnsetValues(TaskFactory::defaultParallelLoopOptions());
1189 true_options.setMaxThread(max_thread);
1190
1191 ParallelForExecute pfe(this,true_options,begin,size,f,stat_info);
1192
1193 tbb::task_arena* used_arena = nullptr;
1194 if (max_thread<nb_allowed_thread && max_thread<m_p->m_sub_arena_list.size())
1195 used_arena = m_p->m_sub_arena_list[max_thread];
1196 if (!used_arena)
1197 used_arena = &(m_p->m_main_arena);
1198 used_arena->execute(pfe);
1199}
1200
1201/*---------------------------------------------------------------------------*/
1202/*---------------------------------------------------------------------------*/
1203
1206{
1207 _executeParallelFor(loop_info);
1208}
1209
1210/*---------------------------------------------------------------------------*/
1211/*---------------------------------------------------------------------------*/
1218template<int RankValue> void TBBTaskImplementation::
1221 const ForLoopRunInfo& run_info)
1222{
1223 ParallelLoopOptions options;
1224 if (run_info.options().has_value())
1225 options = run_info.options().value();
1226
1227 ScopedExecInfo sei(run_info);
1228 ForLoopOneExecStat* stat_info = sei.statInfo();
1229 impl::ScopedStatLoop scoped_loop(sei.isOwn() ? stat_info : nullptr);
1230
1231 if (TaskFactory::verboseLevel()>=1){
1232 std::cout << "TBB: TBBTaskImplementation executeMDParallelFor nb_dim=" << RankValue
1233 << " nb_element=" << loop_ranges.nbElement()
1234 << " grain_size=" << options.grainSize()
1235 << " name=" << run_info.traceInfo().traceInfo()
1236 << " has_stat_info=" << (stat_info!=nullptr)
1237 << '\n';
1238 }
1239
1240 Integer max_thread = options.maxThread();
1241 // En exécution séquentielle, appelle directement la méthode \a f.
1242 if (max_thread==1 || max_thread==0){
1243 functor->executeFunctor(loop_ranges);
1244 return;
1245 }
1246
1247 // Remplace les valeurs non initialisées de \a options par celles de \a m_default_loop_options
1248 ParallelLoopOptions true_options(options);
1250
1251 Integer nb_allowed_thread = m_p->nbAllowedThread();
1252 if (max_thread<0)
1253 max_thread = nb_allowed_thread;
1254 tbb::task_arena* used_arena = nullptr;
1255 if (max_thread<nb_allowed_thread)
1256 used_arena = m_p->m_sub_arena_list[max_thread];
1257 if (!used_arena)
1258 used_arena = &(m_p->m_main_arena);
1259
1260 // Pour l'instant pour la dimension 1, utilise le 'ParallelForExecute' historique
1261 if constexpr (RankValue==1){
1262 auto range_1d = _toTBBRange(loop_ranges);
1263 auto x1 = [&](Integer begin,Integer size)
1264 {
1265 functor->executeFunctor(makeLoopRanges(ForLoopRange(begin,size)));
1266 //functor->executeFunctor(ComplexForLoopRanges<1>(begin,size));
1267 };
1268 LambdaRangeFunctorT<decltype(x1)> functor_1d(x1);
1269 Integer begin1 = CheckedConvert::toInteger(range_1d.dim(0).begin());
1270 Integer size1 = CheckedConvert::toInteger(range_1d.dim(0).size());
1271 ParallelForExecute pfe(this,true_options,begin1,size1,&functor_1d,stat_info);
1272 used_arena->execute(pfe);
1273 }
1274 else{
1275 MDParallelForExecute<RankValue> pfe(this,true_options,loop_ranges,functor,stat_info);
1276 used_arena->execute(pfe);
1277 }
1278}
1279
1280/*---------------------------------------------------------------------------*/
1281/*---------------------------------------------------------------------------*/
1282
1284executeParallelFor(Integer begin,Integer size,Integer grain_size,IRangeFunctor* f)
1285{
1287 opts.setGrainSize(grain_size);
1288 ForLoopRunInfo run_info(opts);
1289 executeParallelFor(ParallelFor1DLoopInfo(begin,size,f,run_info));
1290}
1291
1292/*---------------------------------------------------------------------------*/
1293/*---------------------------------------------------------------------------*/
1294
1300
1301/*---------------------------------------------------------------------------*/
1302/*---------------------------------------------------------------------------*/
1303
1306{
1307 Int32 thread_id = currentTaskThreadIndex();
1308 if (thread_id>=0)
1309 return m_p->threadTaskInfo(thread_id);
1310 return nullptr;
1311}
1312
1313/*---------------------------------------------------------------------------*/
1314/*---------------------------------------------------------------------------*/
1315
1317currentTaskIndex() const
1318{
1319 Int32 thread_id = currentTaskThreadIndex();
1320#ifdef ARCANE_USE_ONETBB
1321 if (thread_id<0 || thread_id>=m_p->nbAllowedThread())
1322 return 0;
1323#endif
1325 if (tti){
1326 Int32 task_index = tti->taskIndex();
1327 if (task_index>=0)
1328 return task_index;
1329 }
1330 return thread_id;
1331}
1332
1333/*---------------------------------------------------------------------------*/
1334/*---------------------------------------------------------------------------*/
1335
1336#ifdef ARCANE_USE_ONETBB
1337
1338/*---------------------------------------------------------------------------*/
1339/*---------------------------------------------------------------------------*/
1340
1341void OneTBBTask::
1342launchAndWait()
1343{
1344 tbb::task_group task_group;
1345 task_group.run(taskFunctor());
1346 task_group.wait();
1347 delete this;
1348}
1349
1350/*---------------------------------------------------------------------------*/
1351/*---------------------------------------------------------------------------*/
1352
1353void OneTBBTask::
1354launchAndWait(ConstArrayView<ITask*> tasks)
1355{
1356 tbb::task_group task_group;
1357 Integer n = tasks.size();
1358 if (n==0)
1359 return;
1360
1361 //set_ref_count(n+1);
1362 for( Integer i=0; i<n; ++i ){
1363 OneTBBTask* t = static_cast<OneTBBTask*>(tasks[i]);
1364 task_group.run(t->taskFunctor());
1365 }
1366 task_group.wait();
1367 for( Integer i=0; i<n; ++i ){
1368 OneTBBTask* t = static_cast<OneTBBTask*>(tasks[i]);
1369 delete t;
1370 }
1371}
1372
1373/*---------------------------------------------------------------------------*/
1374/*---------------------------------------------------------------------------*/
1375
1376ITask* OneTBBTask::
1377_createChildTask(ITaskFunctor* functor)
1378{
1379 OneTBBTask* t = new OneTBBTask(functor);
1380 return t;
1381}
1382
1383/*---------------------------------------------------------------------------*/
1384/*---------------------------------------------------------------------------*/
1385
1386#else // ARCANE_USE_ONETBB
1387
1388/*---------------------------------------------------------------------------*/
1389/*---------------------------------------------------------------------------*/
1390
1393{
1394 task::spawn_root_and_wait(*this);
1395}
1396
1397/*---------------------------------------------------------------------------*/
1398/*---------------------------------------------------------------------------*/
1399
1402{
1403 Integer n = tasks.size();
1404 if (n==0)
1405 return;
1406
1407 set_ref_count(n+1);
1408 for( Integer i=0; i<n-1; ++i ){
1409 TBBTask* t = static_cast<TBBTask*>(tasks[i]);
1410 spawn(*t);
1411 }
1412 spawn_and_wait_for_all(*static_cast<TBBTask*>(tasks[n-1]));
1413}
1414
1415/*---------------------------------------------------------------------------*/
1416/*---------------------------------------------------------------------------*/
1417
1418ITask* LegacyTBBTask::
1419_createChildTask(ITaskFunctor* functor)
1420{
1421 TBBTask* t = new(allocate_child()) TBBTask(functor);
1422 return t;
1423}
1424
1425/*---------------------------------------------------------------------------*/
1426/*---------------------------------------------------------------------------*/
1427
1428#endif // ARCANE_USE_ONETBB
1429
1430/*---------------------------------------------------------------------------*/
1431/*---------------------------------------------------------------------------*/
1432
1433// TODO: a supprimer maintenant qu'on utilise 'DependencyInjection'
1434ARCANE_REGISTER_APPLICATION_FACTORY(TBBTaskImplementation,ITaskImplementation,
1435 TBBTaskImplementation);
1436
1437ARCANE_DI_REGISTER_PROVIDER(TBBTaskImplementation,
1438 DependencyInjection::ProviderProperty("TBBTaskImplementation"),
1439 ARCANE_DI_INTERFACES(ITaskImplementation),
1440 ARCANE_DI_EMPTY_CONSTRUCTOR());
1441
1442/*---------------------------------------------------------------------------*/
1443/*---------------------------------------------------------------------------*/
1444
1445} // End namespace Arcane
1446
1447/*---------------------------------------------------------------------------*/
1448/*---------------------------------------------------------------------------*/
#define ARCANE_THROW(exception_class,...)
Macro pour envoyer une exception avec formattage.
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
#define ARCANE_ALIGNAS_PACKED(value)
Macro pour garantir le compactage et l'alignement d'une classe sur value octets.
Classes, Types et macros pour gérer la concurrence.
#define ARCANE_REGISTER_APPLICATION_FACTORY(aclass, ainterface, aname)
Enregistre un service de fabrique pour la classe aclass.
Allocateur mémoire avec alignement mémoire spécifique.
void resize(Int64 s)
Change le nombre d'éléments du tableau à s.
Interval d'itération complexe.
Vue constante d'un tableau de type T.
constexpr Integer size() const noexcept
Nombre d'éléments du tableau.
Tableau 1D de taille fixe.
Definition FixedArray.h:45
Classe pour gérer le profiling d'une seule exécution d'une boucle.
Definition Profiling.h:93
Intervalle d'itération pour une boucle.
Informations d'exécution d'une boucle.
Interface d'un fonctor sur un interval d'itération multi-dimensionnel de dimension RankValue.
virtual void notifyAllObservers()=0
Notifie tous les observateurs.
Interface d'un fonctor sur un interval d'itération.
virtual void executeFunctor(Integer begin, Integer size)=0
Exécute la méthode associée.
Interface d'un fonctor pour une tâche.
virtual void executeFunctor(const TaskContext &tc)=0
Exécute la méthode associé
Implémentation d'une fabrique de tâches.
Interface d'une tâche concourante.
Fonctor sur un interval d'itération instancié via une lambda fonction.
void launchAndWait() override
Lance la tâche et bloque jusqu'à ce qu'elle se termine.
Exception lorsqu'une fonction n'est pas implémentée.
Caractéristiques d'un boucle 1D multi-thread.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
void mergeUnsetValues(const ParallelLoopOptions &po)
Fusionne les valeurs non modifiées de l'instance par celles de po.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
void setMaxThread(Integer v)
Positionne le nombre maximal de threads autorisé.
@ Static
Utilise un partitionnement statique.
@ Deterministic
Utilise un partitionnement et un ordonnancement statique.
static impl::ForLoopStatInfoList * _threadLocalForLoopInstance()
Definition Profiling.cc:194
static bool hasProfiling()
Indique si le profilage est actif.
Definition Profiling.h:173
Structure contenant les informations pour créer un service.
Implémentation déterministe de ParallelFor.
void operator()(tbb::blocked_range< Integer > &range) const
Opérateur pour un thread donné.
Exécuteur pour une boucle multi-dimension.
Exécuteur pour une boucle 1D.
UniqueArray< tbb::task_arena * > m_sub_arena_list
Tableau dont le i-ème élément contient la tbb::task_arena pour i thread.
Classe pour positionner TaskThreadInfo::taskIndex().
Int32 currentTaskThreadIndex() const final
Implémentation de TaskFactory::currentTaskThreadIndex()
void initialize(Int32 nb_thread) override
void executeParallelFor(const ComplexForLoopRanges< 1 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 1 > *functor) final
Exécute une boucle 1D en concurrence.
void executeParallelFor(Int32 begin, Int32 size, IRangeFunctor *f) final
Exécute le fonctor f en concurrence.
ITask * createRootTask(ITaskFunctor *f) override
Créé une tâche racine. L'implémentation doit recopier la valeur de f qui est soit un TaskFunctor,...
void printInfos(std::ostream &o) const final
Affiche les informations sur le runtime utilisé
void executeParallelFor(Int32 begin, Int32 size, const ParallelLoopOptions &options, IRangeFunctor *f) final
Exécute le fonctor f en concurrence.
void executeParallelFor(const ComplexForLoopRanges< 3 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 3 > *functor) final
Exécute une boucle 3D en concurrence.
void executeParallelFor(const ComplexForLoopRanges< 4 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 4 > *functor) final
Exécute une boucle 4D en concurrence.
TaskThreadInfo * currentTaskThreadInfo() const
Instance de TaskThreadInfo associé au thread courant.
bool isActive() const final
Indique si l'implémentation est active.
void _executeMDParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, IMDRangeFunctor< RankValue > *functor, const ForLoopRunInfo &run_info)
Exécution d'une boucle N-dimensions.
Int32 currentTaskIndex() const final
Implémentation de TaskFactory::currentTaskIndex()
void executeParallelFor(const ComplexForLoopRanges< 2 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 2 > *functor) final
Exécute une boucle 2D en concurrence.
Int32 nbAllowedThread() const final
Nombre de threads utilisés au maximum pour gérer les tâches.
Contexte d'éxecution d'une tâche.
static void notifyThreadCreated()
Notifie tous les observateurs de création de thread.
static IObservable * destroyThreadObservable()
Observable appelé lors de la destruction d'un thread pour une tâche.
static const ParallelLoopOptions & defaultParallelLoopOptions()
Valeurs par défaut d'exécution d'une boucle parallèle.
static Integer verboseLevel()
Niveau de verbosité
static void setDefaultParallelLoopOptions(const ParallelLoopOptions &v)
Positionne les valeurs par défaut d'exécution d'une boucle parallèle.
static Int32 currentTaskThreadIndex()
Indice (entre 0 et nbAllowedThread()-1) du thread exécutant la tâche actuelle.
Vecteur 1D de données avec sémantique par valeur (style STL).
Classe permettant de récupérer le temps passé entre l'appel au constructeur et au destructeur.
Definition Profiling.h:38
Integer toInteger(Real r)
Converti un Int64 en un Integer.
ARCCORE_BASE_EXPORT String getStackTrace()
Retourne une chaîne de caractere contenant la pile d'appel.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
Int32 Integer
Type représentant un entier.
SimpleForLoopRanges< 1 > makeLoopRanges(Int32 n1)
Créé un intervalle d'itération [0,n1[.
std::int32_t Int32
Type entier signé sur 32 bits.