Arcane  v3.16.2.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
TBBTaskImplementation.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* TBBTaskImplementation.cc (C) 2000-2025 */
9/* */
10/* Implémentation des tâches utilisant TBB (Intel Threads Building Blocks). */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/utils/IThreadImplementation.h"
15#include "arcane/utils/NotImplementedException.h"
16#include "arcane/utils/IFunctor.h"
17#include "arcane/utils/CheckedConvert.h"
18#include "arcane/utils/ForLoopRanges.h"
20#include "arcane/utils/IObservable.h"
21#include "arcane/utils/PlatformUtils.h"
22#include "arcane/utils/Profiling.h"
23#include "arcane/utils/MemoryAllocator.h"
24#include "arcane/utils/FixedArray.h"
25#include "arcane/utils/internal/TaskFactoryInternal.h"
26#include "arcane/utils/internal/DependencyInjection.h"
27
28#include "arcane/core/FactoryService.h"
29
30#include <new>
31#include <stack>
32
33// Il faut définir cette macro pour que la classe 'blocked_rangeNd' soit disponible
34
35#define TBB_PREVIEW_BLOCKED_RANGE_ND 1
36
37// la macro 'ARCANE_USE_ONETBB' est définie dans le CMakeLists.txt
38// si on compile avec la version OneTBB version 2021+
39// (https://github.com/oneapi-src/oneTBB.git)
40// A terme ce sera la seule version supportée par Arcane.
41
42#ifdef ARCANE_USE_ONETBB
43
44// Nécessaire pour avoir accès à task_scheduler_handle
45#define TBB_PREVIEW_WAITING_FOR_WORKERS 1
46#include <tbb/tbb.h>
47#include <oneapi/tbb/concurrent_set.h>
48#include <oneapi/tbb/global_control.h>
49
50#else // ARCANE_USE_ONETBB
51
52// NOTE GG: depuis mars 2019, la version 2018+ des TBB est obligatoire.
53#include <tbb/tbb.h>
54#include <tbb/blocked_rangeNd.h>
55/*
56 * Maintenant vérifie que la version est au moins 2018.
57 */
58#if (TBB_VERSION_MAJOR < 4) || (TBB_VERSION_MAJOR==4 && TBB_VERSION_MINOR<2)
59#define ARCANE_OLD_TBB
60#endif
61
62#if (TBB_VERSION_MAJOR<2018)
63#define ARCANE_OLD_TBB
64#endif
65
66#ifdef ARCANE_OLD_TBB
67#error "Your version of TBB is tool old. TBB 2018+ is required. Please disable TBB in configuration"
68#endif
69
70#include <thread>
71#include <mutex>
72
73#endif // ARCANE_USE_ONETBB
74
75namespace
76{
77#if (TBB_VERSION_MAJOR > 2022) || (TBB_VERSION_MAJOR == 2022 && TBB_VERSION_MINOR > 0) || defined __TBB_blocked_nd_range_H
78
79// La classe "blocked_rangeNd" a été retirée dans la version
80// 2022.0.0 et remplacée par "blocked_nd_range".
81template <typename Value, unsigned int N>
82using blocked_nd_range = tbb::blocked_nd_range<Value, N>;
83
84#else
85
86template <typename Value, unsigned int N>
87using blocked_nd_range = tbb::blocked_rangeNd<Value, N>;
88
89#endif
90} // namespace
91
92/*---------------------------------------------------------------------------*/
93/*---------------------------------------------------------------------------*/
94
95namespace Arcane
96{
97
99
100// TODO: utiliser un pool mémoire spécifique pour gérer les
101// OneTBBTask pour optimiser les new/delete des instances de cette classe.
102// Auparavant avec les anciennes versions de TBB cela était géré avec
103// la méthode 'tbb::task::allocate_child()'.
104
105/*---------------------------------------------------------------------------*/
106/*---------------------------------------------------------------------------*/
107
108namespace
109{
110
111// Positif si on récupère les statistiques d'exécution
112bool isStatActive()
113{
115}
116
121class ScopedExecInfo
122{
123 public:
124
125 explicit ScopedExecInfo(const ForLoopRunInfo& run_info)
126 : m_run_info(run_info)
127 {
128 // Si run_info.execInfo() n'est pas nul, on l'utilise.
129 // Cela signifie que c'est l'appelant de qui va gérer les statistiques
130 // d'exécution. Sinon, on utilise \a m_stat_info si les statistiques
131 // d'exécution sont demandées.
132 ForLoopOneExecStat* ptr = run_info.execStat();
133 if (ptr){
134 m_stat_info_ptr = ptr;
135 m_use_own_run_info = false;
136 }
137 else
138 m_stat_info_ptr = isStatActive() ? &m_stat_info : nullptr;
139 }
140 ~ScopedExecInfo()
141 {
142#ifdef PRINT_STAT_INFO
143 if (m_stat_info_ptr){
144 bool is_valid = m_run_info.traceInfo().isValid();
145 if (!is_valid)
146 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
147 << " stack=" << platform::getStackTrace()
148 << "\n";
149 else
150 std::cout << "ADD_OWN_RUN_INFO nb_chunk=" << m_stat_info_ptr->nbChunk()
151 << " trace_name=" << m_run_info.traceInfo().traceInfo().name() << "\n";
152 }
153#endif
154 if (m_stat_info_ptr && m_use_own_run_info){
155 ProfilingRegistry::_threadLocalForLoopInstance()->merge(*m_stat_info_ptr,m_run_info.traceInfo());
156 }
157 }
158
159 public:
160
161 ForLoopOneExecStat* statInfo() const { return m_stat_info_ptr; }
162 bool isOwn() const { return m_use_own_run_info; }
163 private:
164
165 ForLoopOneExecStat m_stat_info;
166 ForLoopOneExecStat* m_stat_info_ptr = nullptr;
167 ForLoopRunInfo m_run_info;
169 bool m_use_own_run_info = true;
170};
171
172/*---------------------------------------------------------------------------*/
173/*---------------------------------------------------------------------------*/
174
175inline int _currentTaskTreadIndex()
176{
177 // NOTE: Avec OneTBB 2021, la valeur n'est plus '0' si on appelle cette méthode
178 // depuis un thread en dehors d'un task_arena. Avec la version 2021,
179 // la valeur est 65535.
180 // NOTE: Il semble que cela soit un bug de la 2021.3.
181 return tbb::this_task_arena::current_thread_index();
182}
183
184inline blocked_nd_range<Int32, 1>
185_toTBBRange(const ComplexForLoopRanges<1>& r)
186{
187 return {{r.lowerBound<0>(), r.upperBound<0>()}};
188}
189
190inline blocked_nd_range<Int32, 2>
191_toTBBRange(const ComplexForLoopRanges<2>& r)
192{
193 return {{r.lowerBound<0>(), r.upperBound<0>()},
194 {r.lowerBound<1>(), r.upperBound<1>()}};
195
196}
197
198inline blocked_nd_range<Int32, 3>
199_toTBBRange(const ComplexForLoopRanges<3>& r)
200{
201 return {{r.lowerBound<0>(), r.upperBound<0>()},
202 {r.lowerBound<1>(), r.upperBound<1>()},
203 {r.lowerBound<2>(), r.upperBound<2>()}};
204}
205
206inline blocked_nd_range<Int32, 4>
207_toTBBRange(const ComplexForLoopRanges<4>& r)
208{
209 return {{r.lowerBound<0>(), r.upperBound<0>()},
210 {r.lowerBound<1>(), r.upperBound<1>()},
211 {r.lowerBound<2>(), r.upperBound<2>()},
212 {r.lowerBound<3>(), r.upperBound<3>()}};
213}
214
215/*---------------------------------------------------------------------------*/
216/*---------------------------------------------------------------------------*/
217
218inline blocked_nd_range<Int32, 2>
219_toTBBRangeWithGrain(const blocked_nd_range<Int32, 2>& r, FixedArray<size_t, 2> grain_sizes)
220{
221 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
222 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]}};
223}
224
225inline blocked_nd_range<Int32, 3>
226_toTBBRangeWithGrain(const blocked_nd_range<Int32, 3>& r, FixedArray<size_t, 3> grain_sizes)
227{
228 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
229 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]},
230 {r.dim(2).begin(), r.dim(2).end(), grain_sizes[2]}};
231}
232
233inline blocked_nd_range<Int32, 4>
234_toTBBRangeWithGrain(const blocked_nd_range<Int32, 4>& r, FixedArray<size_t, 4> grain_sizes)
235{
236 return {{r.dim(0).begin(), r.dim(0).end(), grain_sizes[0]},
237 {r.dim(1).begin(), r.dim(1).end(), grain_sizes[1]},
238 {r.dim(2).begin(), r.dim(2).end(), grain_sizes[2]},
239 {r.dim(3).begin(), r.dim(3).end(), grain_sizes[3]}};
240}
241
242/*---------------------------------------------------------------------------*/
243/*---------------------------------------------------------------------------*/
244
246_fromTBBRange(const blocked_nd_range<Int32, 2>& r)
247{
248 using BoundsType = ArrayBounds<MDDim2>;
249 using ArrayExtentType = typename BoundsType::ArrayExtentType;
250
251 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin()));
252 auto s0 = static_cast<Int32>(r.dim(0).size());
253 auto s1 = static_cast<Int32>(r.dim(1).size());
254 BoundsType sizes(ArrayExtentType(s0,s1));
255 return { lower_bounds, sizes };
256}
257
259_fromTBBRange(const blocked_nd_range<Int32, 3>& r)
260{
261 using BoundsType = ArrayBounds<MDDim3>;
262 using ArrayExtentType = typename BoundsType::ArrayExtentType;
263
264 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin(),r.dim(2).begin()));
265 auto s0 = static_cast<Int32>(r.dim(0).size());
266 auto s1 = static_cast<Int32>(r.dim(1).size());
267 auto s2 = static_cast<Int32>(r.dim(2).size());
268 BoundsType sizes(ArrayExtentType(s0,s1,s2));
269 return { lower_bounds, sizes };
270}
271
273_fromTBBRange(const blocked_nd_range<Int32, 4>& r)
274{
275 using BoundsType = ArrayBounds<MDDim4>;
276 using ArrayExtentType = typename BoundsType::ArrayExtentType;
277
278 BoundsType lower_bounds(ArrayExtentType(r.dim(0).begin(),r.dim(1).begin(),r.dim(2).begin(),r.dim(3).begin()));
279 auto s0 = static_cast<Int32>(r.dim(0).size());
280 auto s1 = static_cast<Int32>(r.dim(1).size());
281 auto s2 = static_cast<Int32>(r.dim(2).size());
282 auto s3 = static_cast<Int32>(r.dim(3).size());
283 BoundsType sizes(ArrayExtentType(s0,s1,s2,s3));
284 return { lower_bounds, sizes };
285}
286
287}
288
289/*---------------------------------------------------------------------------*/
290/*---------------------------------------------------------------------------*/
291
292#ifdef ARCANE_USE_ONETBB
293
294/*---------------------------------------------------------------------------*/
295/*---------------------------------------------------------------------------*/
296
297class OneTBBTaskFunctor
298{
299 public:
300 OneTBBTaskFunctor(ITaskFunctor* functor,ITask* task)
301 : m_functor(functor), m_task(task) {}
302 public:
303 void operator()() const
304 {
305 if (m_functor){
306 ITaskFunctor* tf = m_functor;
307 m_functor = 0;
308 TaskContext task_context(m_task);
309 //cerr << "FUNC=" << typeid(*tf).name();
310 tf->executeFunctor(task_context);
311 }
312 }
313 public:
314 mutable ITaskFunctor* m_functor;
315 ITask* m_task;
316};
317
318/*---------------------------------------------------------------------------*/
319/*---------------------------------------------------------------------------*/
320
321class OneTBBTask
322: public ITask
323{
324 public:
325 static const int FUNCTOR_CLASS_SIZE = 32;
326 public:
327 OneTBBTask(ITaskFunctor* f)
328 : m_functor(f)
329 {
330 m_functor = f->clone(functor_buf,FUNCTOR_CLASS_SIZE);
331 }
332 public:
333 OneTBBTaskFunctor taskFunctor() { return OneTBBTaskFunctor(m_functor,this); }
334 void launchAndWait() override;
335 void launchAndWait(ConstArrayView<ITask*> tasks) override;
336 protected:
337 virtual ITask* _createChildTask(ITaskFunctor* functor) override;
338 public:
339 ITaskFunctor* m_functor;
340 char functor_buf[FUNCTOR_CLASS_SIZE];
341};
342using TBBTask = OneTBBTask;
343
344/*---------------------------------------------------------------------------*/
345/*---------------------------------------------------------------------------*/
346
347#else // ARCANE_USE_ONETBB
348
349/*---------------------------------------------------------------------------*/
350/*---------------------------------------------------------------------------*/
351
352class LegacyTBBTask
353: public tbb::task
354, public ITask
355{
356 public:
357 static const int FUNCTOR_CLASS_SIZE = 32;
358 public:
359 LegacyTBBTask(ITaskFunctor* f)
360 : m_functor(f)
361 {
362 m_functor = f->clone(functor_buf,FUNCTOR_CLASS_SIZE);
363 }
364 public:
365 tbb::task* execute() override
366 {
367 if (m_functor){
368 ITaskFunctor* tf = m_functor;
369 m_functor = 0;
370 TaskContext task_context(this);
371 //cerr << "FUNC=" << typeid(*tf).name();
372 tf->executeFunctor(task_context);
373 }
374 return nullptr;
375 }
376 void launchAndWait() override;
377 void launchAndWait(ConstArrayView<ITask*> tasks) override;
378 protected:
379 ITask* _createChildTask(ITaskFunctor* functor) final;
380 public:
381 ITaskFunctor* m_functor;
382 char functor_buf[FUNCTOR_CLASS_SIZE];
383};
384using TBBTask = LegacyTBBTask;
385
386/*---------------------------------------------------------------------------*/
387/*---------------------------------------------------------------------------*/
388
389#endif // ARCANE_USE_ONETBB
390
391/*---------------------------------------------------------------------------*/
392/*---------------------------------------------------------------------------*/
393/*
394 * Ne pas utiliser l'observer locale au task_arena.
395 * Utiliser l'observer global au scheduler.
396 * Pour l'id, utiliser tbb::this_task_arena::current_thread_index().
397 */
398class TBBTaskImplementation
399: public ITaskImplementation
400{
401 class Impl;
402 class ParallelForExecute;
403 template<int RankValue>
405
406 public:
407 // Pour des raisons de performance, s'aligne sur une ligne de cache
408 // et utilise un padding.
409 class ARCANE_ALIGNAS_PACKED(64) TaskThreadInfo
410 {
411 public:
412 TaskThreadInfo() : m_task_index(-1){}
413 public:
414 void setTaskIndex(Integer v) { m_task_index = v; }
415 Integer taskIndex() const { return m_task_index; }
416 private:
417 Integer m_task_index;
418 };
419
426 class TaskInfoLockGuard
427 {
428 public:
429 TaskInfoLockGuard(TaskThreadInfo* tti,Integer task_index)
430 : m_tti(tti), m_old_task_index(-1)
431 {
432 if (tti){
433 m_old_task_index = tti->taskIndex();
434 tti->setTaskIndex(task_index);
435 }
436 }
437 ~TaskInfoLockGuard()
438 {
439 if (m_tti)
440 m_tti->setTaskIndex(m_old_task_index);
441 }
442 private:
443 TaskThreadInfo* m_tti;
444 Integer m_old_task_index;
445 };
446
447 public:
448 TBBTaskImplementation(const ServiceBuildInfo& sbi)
449 {
450 }
451 TBBTaskImplementation() = default;
452 ~TBBTaskImplementation() override;
453 public:
454 void build() {}
455 void initialize(Int32 nb_thread) override;
456 void terminate() override;
457
459 {
460#ifdef ARCANE_USE_ONETBB
461 OneTBBTask* t = new OneTBBTask(f);
462#else
463 LegacyTBBTask* t = new(tbb::task::allocate_root()) LegacyTBBTask(f);
464#endif
465 return t;
466 }
467
468 void executeParallelFor(Int32 begin,Int32 size,const ParallelLoopOptions& options,IRangeFunctor* f) final;
469 void executeParallelFor(Int32 begin,Int32 size,Integer grain_size,IRangeFunctor* f) final;
474 void executeParallelFor(const ParallelFor1DLoopInfo& loop_info) override;
475
477 const ForLoopRunInfo& run_info,
478 IMDRangeFunctor<1>* functor) final
479 {
480 _executeMDParallelFor<1>(loop_ranges,functor,run_info);
481 }
483 const ForLoopRunInfo& run_info,
484 IMDRangeFunctor<2>* functor) final
485 {
486 _executeMDParallelFor<2>(loop_ranges,functor,run_info);
487 }
489 const ForLoopRunInfo& run_info,
490 IMDRangeFunctor<3>* functor) final
491 {
492 _executeMDParallelFor<3>(loop_ranges,functor,run_info);
493 }
495 const ForLoopRunInfo& run_info,
496 IMDRangeFunctor<4>* functor) final
497 {
498 _executeMDParallelFor<4>(loop_ranges,functor,run_info);
499 }
500
501 bool isActive() const final
502 {
503 return m_is_active;
504 }
505
506 Int32 nbAllowedThread() const final;
507
509 {
510 return (nbAllowedThread() <= 1 ) ? 0 : _currentTaskTreadIndex();
511 }
512
513 Int32 currentTaskIndex() const final;
514
515 void printInfos(std::ostream& o) const final;
516
517 public:
518
525 TaskThreadInfo* currentTaskThreadInfo() const;
526
527 private:
528
529 bool m_is_active = false;
530 Impl* m_p = nullptr;
531
532 private:
533
534 template<int RankValue> void
535 _executeMDParallelFor(const ComplexForLoopRanges<RankValue>& loop_ranges,
536 IMDRangeFunctor<RankValue>* functor,
537 const ForLoopRunInfo& run_info);
538 void _executeParallelFor(const ParallelFor1DLoopInfo& loop_info);
539};
540
541/*---------------------------------------------------------------------------*/
542/*---------------------------------------------------------------------------*/
543
544class TBBTaskImplementation::Impl
545{
546 class TaskObserver
547 : public tbb::task_scheduler_observer
548 {
549 public:
550 TaskObserver(TBBTaskImplementation::Impl* p)
551 :
552#ifdef ARCANE_USE_ONETBB
553 tbb::task_scheduler_observer(p->m_main_arena),
554#endif
555 m_p(p)
556 {
557 }
558 void on_scheduler_entry(bool is_worker) override
559 {
560 m_p->notifyThreadCreated(is_worker);
561 }
562 void on_scheduler_exit(bool is_worker) override
563 {
564 m_p->notifyThreadDestroyed(is_worker);
565 }
567 };
568
569 public:
570 Impl() :
571 m_task_observer(this),
572 m_thread_task_infos(AlignedMemoryAllocator::CacheLine())
573 {
574#ifdef ARCANE_USE_ONETBB
575 m_nb_allowed_thread = tbb::info::default_concurrency();
576#else
577 m_nb_allowed_thread = tbb::task_scheduler_init::default_num_threads();
578#endif
579 _init();
580 }
581 Impl(Int32 nb_thread)
582 :
583#ifndef ARCANE_USE_ONETBB
584 m_scheduler_init(nb_thread),
585#endif
586 m_main_arena(nb_thread),
587 m_task_observer(this),
588 m_thread_task_infos(AlignedMemoryAllocator::CacheLine())
589 {
590 m_nb_allowed_thread = nb_thread;
591 _init();
592 }
593 public:
594 Int32 nbAllowedThread() const { return m_nb_allowed_thread; }
595 TaskThreadInfo* threadTaskInfo(Integer index) { return &m_thread_task_infos[index]; }
596 private:
597 Int32 m_nb_allowed_thread;
598
599 public:
600 void terminate()
601 {
602 for( auto x : m_sub_arena_list ){
603 if (x)
604 x->terminate();
605 delete x;
606 }
607 m_sub_arena_list.clear();
608 m_main_arena.terminate();
609#ifdef ARCANE_USE_ONETBB
610 m_task_observer.observe(false);
611 oneapi::tbb::finalize(m_task_scheduler_handle);
612#else
613 m_scheduler_init.terminate();
614 m_task_observer.observe(false);
615#endif
616 }
617 public:
618 void notifyThreadCreated(bool is_worker)
619 {
620 std::thread::id my_thread_id = std::this_thread::get_id();
621
622#ifdef ARCANE_USE_ONETBB
623 // Avec OneTBB, cette méthode est appelée à chaque fois qu'on rentre
624 // dans notre 'task_arena'. Comme il ne faut appeler qu'une seule
625 // fois la méthode de notification on utilise un ensemble pour
626 // conserver la liste des threads déjà créés.
627 // NOTE: On ne peut pas utiliser cette méthode avec la version TBB historique
628 // (2018) car cette méthode 'contains' n'existe pas
629 if (m_constructed_thread_map.contains(my_thread_id))
630 return;
631 m_constructed_thread_map.insert(my_thread_id);
632#endif
633
634 {
636 std::ostringstream ostr;
637 ostr << "TBB: CREATE THREAD"
638 << " nb_allowed=" << m_nb_allowed_thread
639#ifdef ARCANE_USE_ONETBB
640 << " tbb_default_allowed=" << tbb::info::default_concurrency()
641#else
642 << " tbb_default_allowed=" << tbb::task_scheduler_init::default_num_threads()
643#endif
644 << " id=" << my_thread_id
645 << " arena_id=" << _currentTaskTreadIndex()
646 << " is_worker=" << is_worker
647 << "\n";
648 std::cout << ostr.str();
649 }
651 }
652 }
653
654 void notifyThreadDestroyed([[maybe_unused]] bool is_worker)
655 {
656#ifdef ARCANE_USE_ONETBB
657 // Avec OneTBB, cette méthode est appelée à chaque fois qu'on sort
658 // de l'arène principale. Du coup elle ne correspond pas vraiment à une
659 // destruction de thread. On ne fait donc rien pour cette notification
660 // TODO: Regarder comment on peut être notifié de la destruction effective
661 // du thread.
662#else
663 // Il faut toujours un verrou car on n'est pas certain que
664 // les méthodes appelées par l'observable soient thread-safe
665 // (et aussi TaskFactory::createThreadObservable() ne l'est pas)
666 std::scoped_lock sl(m_thread_created_mutex);
668 std::cout << "TBB: DESTROY THREAD"
669 << " id=" << std::this_thread::get_id()
670 << " arena_id=" << _currentTaskTreadIndex()
671 << " is_worker=" << is_worker
672 << '\n';
673 }
674 // TODO: jamais utilisé. Sera supprimé au passage à OneTBB.
676#endif
677 }
678 private:
679#ifdef ARCANE_USE_ONETBB
680#if TBB_VERSION_MAJOR>2021 || (TBB_VERSION_MAJOR==2021 && TBB_VERSION_MINOR>5)
681 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = oneapi::tbb::attach();
682#else
683 oneapi::tbb::task_scheduler_handle m_task_scheduler_handle = tbb::task_scheduler_handle::get();
684#endif
685#else
686 tbb::task_scheduler_init m_scheduler_init;
687#endif
688 public:
689 tbb::task_arena m_main_arena;
692 private:
693 TaskObserver m_task_observer;
694 std::mutex m_thread_created_mutex;
695 UniqueArray<TaskThreadInfo> m_thread_task_infos;
696#ifdef ARCANE_USE_ONETBB
697 tbb::concurrent_set<std::thread::id> m_constructed_thread_map;
698#endif
699 void _init()
700 {
702 std::cout << "TBB: TBBTaskImplementationInit nb_allowed_thread=" << m_nb_allowed_thread
703 << " id=" << std::this_thread::get_id()
704 << " version=" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR
705 << "\n";
706 }
707 m_thread_task_infos.resize(m_nb_allowed_thread);
708 m_task_observer.observe(true);
709 Integer max_arena_size = m_nb_allowed_thread;
710 // Limite artificiellement le nombre de tbb::task_arena
711 // pour éviter d'avoir trop d'objets alloués.
712 if (max_arena_size>512)
713 max_arena_size = 512;
714 if (max_arena_size<2)
715 max_arena_size = 2;
716 m_sub_arena_list.resize(max_arena_size);
717 m_sub_arena_list[0] = m_sub_arena_list[1] = nullptr;
718 for( Integer i=2; i<max_arena_size; ++i )
719 m_sub_arena_list[i] = new tbb::task_arena(i);
720 }
721};
722
723/*---------------------------------------------------------------------------*/
724/*---------------------------------------------------------------------------*/
728class TBBParallelFor
729{
730 public:
731 TBBParallelFor(IRangeFunctor* f,Int32 nb_allowed_thread,ForLoopOneExecStat* stat_info)
732 : m_functor(f), m_stat_info(stat_info), m_nb_allowed_thread(nb_allowed_thread){}
733 public:
734
735 void operator()(tbb::blocked_range<Integer>& range) const
736 {
737#ifdef ARCANE_CHECK
739 std::ostringstream o;
740 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
741 << " id=" << std::this_thread::get_id()
742 << " max_allowed=" << m_nb_allowed_thread
743 << " range_begin=" << range.begin() << " range_size=" << range.size()
744 << "\n";
745 std::cout << o.str();
746 std::cout.flush();
747 }
748
749 int tbb_index = _currentTaskTreadIndex();
750 if (tbb_index<0 || tbb_index>=m_nb_allowed_thread)
751 ARCANE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
752 tbb_index,m_nb_allowed_thread);
753#endif
754
755 if (m_stat_info)
756 m_stat_info->incrementNbChunk();
757 m_functor->executeFunctor(range.begin(),CheckedConvert::toInteger(range.size()));
758 }
759
760 private:
761 IRangeFunctor* m_functor;
762 ForLoopOneExecStat* m_stat_info = nullptr;
763 Int32 m_nb_allowed_thread;
764};
765
766/*---------------------------------------------------------------------------*/
767/*---------------------------------------------------------------------------*/
771template<int RankValue>
772class TBBMDParallelFor
773{
774 public:
775
776 TBBMDParallelFor(IMDRangeFunctor<RankValue>* f,Int32 nb_allowed_thread,ForLoopOneExecStat* stat_info)
777 : m_functor(f), m_stat_info(stat_info), m_nb_allowed_thread(nb_allowed_thread){}
778
779 public:
780
781 void operator()(blocked_nd_range<Int32, RankValue>& range) const
782 {
783#ifdef ARCANE_CHECK
785 std::ostringstream o;
786 o << "TBB: INDEX=" << TaskFactory::currentTaskThreadIndex()
787 << " id=" << std::this_thread::get_id()
788 << " max_allowed=" << m_nb_allowed_thread
789 << " MDFor ";
790 for( Int32 i=0; i<RankValue; ++i ){
791 Int32 r0 = static_cast<Int32>(range.dim(i).begin());
792 Int32 r1 = static_cast<Int32>(range.dim(i).size());
793 o << " range" << i << " (begin=" << r0 << " size=" << r1 << ")";
794 }
795 o << "\n";
796 std::cout << o.str();
797 std::cout.flush();
798 }
799
800 int tbb_index = _currentTaskTreadIndex();
801 if (tbb_index<0 || tbb_index>=m_nb_allowed_thread)
802 ARCANE_FATAL("Invalid index for thread idx={0} valid_interval=[0..{1}[",
803 tbb_index,m_nb_allowed_thread);
804#endif
805
806 if (m_stat_info)
807 m_stat_info->incrementNbChunk();
808 m_functor->executeFunctor(_fromTBBRange(range));
809 }
810
811 private:
812
813 IMDRangeFunctor<RankValue>* m_functor = nullptr;
814 ForLoopOneExecStat* m_stat_info = nullptr;
815 Int32 m_nb_allowed_thread;
816};
817
818/*---------------------------------------------------------------------------*/
819/*---------------------------------------------------------------------------*/
837class TBBDeterministicParallelFor
838{
839 public:
840 TBBDeterministicParallelFor(TBBTaskImplementation* impl,const TBBParallelFor& tbb_for,
841 Integer begin_index,Integer size,Integer grain_size,Integer nb_thread)
842 : m_impl(impl), m_tbb_for(tbb_for), m_nb_thread(nb_thread), m_begin_index(begin_index), m_size(size),
843 m_grain_size(grain_size), m_nb_block(0), m_block_size(0), m_nb_block_per_thread(0)
844 {
845 if (m_nb_thread<1)
846 m_nb_thread = 1;
847
848 if (m_grain_size>0){
849 m_block_size = m_grain_size;
850 if (m_block_size>0){
851 m_nb_block = m_size / m_block_size;
852 if ((m_size % m_block_size)!=0)
853 ++m_nb_block;
854 }
855 else
856 m_nb_block = 1;
857 m_nb_block_per_thread = m_nb_block / m_nb_thread;
858 if ((m_nb_block % m_nb_thread) != 0)
859 ++m_nb_block_per_thread;
860 }
861 else{
862 if (m_nb_block<1)
863 m_nb_block = m_nb_thread;
864 m_block_size = m_size / m_nb_block;
865 m_nb_block_per_thread = 1;
866 }
868 std::cout << "TBBDeterministicParallelFor: BEGIN=" << m_begin_index << " size=" << m_size
869 << " grain_size=" << m_grain_size
870 << " nb_block=" << m_nb_block << " nb_thread=" << m_nb_thread
871 << " nb_block_per_thread=" << m_nb_block_per_thread
872 << " block_size=" << m_block_size
873 << " block_size*nb_block=" << m_block_size*m_nb_block << '\n';
874 }
875 }
876 public:
877
884 void operator()(tbb::blocked_range<Integer>& range) const
885 {
886 Integer nb_iter = static_cast<Integer>(range.size());
887 for( Integer i=0; i<nb_iter; ++i ){
888 Integer task_id = range.begin() + i;
889 for ( Integer k=0, kn=m_nb_block_per_thread; k<kn; ++k ){
890 Integer block_id = task_id + (k * m_nb_thread);
891 if (block_id<m_nb_block)
892 _doBlock(task_id,block_id);
893 }
894 }
895 }
896
897 void _doBlock(Integer task_id,Integer block_id) const
898 {
899 TBBTaskImplementation::TaskInfoLockGuard guard(m_impl->currentTaskThreadInfo(),task_id);
900
901 Integer iter_begin = block_id * m_block_size;
902 Integer iter_size = m_block_size;
903 if ((block_id+1)==m_nb_block){
904 // Pour le dernier bloc, la taille est le nombre d'éléments restants
905 iter_size = m_size - iter_begin;
906 }
907 iter_begin += m_begin_index;
908#ifdef ARCANE_CHECK
909 if (TaskFactory::verboseLevel()>=3){
910 std::ostringstream o;
911 o << "TBB: DoBlock: BLOCK task_id=" << task_id << " block_id=" << block_id
912 << " iter_begin=" << iter_begin << " iter_size=" << iter_size << '\n';
913 std::cout << o.str();
914 std::cout.flush();
915 }
916#endif
917 if (iter_size>0){
918 auto r = tbb::blocked_range<int>(iter_begin,iter_begin + iter_size);
919 m_tbb_for(r);
920 }
921 }
922
923 private:
924
925 TBBTaskImplementation* m_impl;
926 const TBBParallelFor& m_tbb_for;
927 Integer m_nb_thread;
928 Integer m_begin_index;
929 Integer m_size;
930 Integer m_grain_size;
931 Integer m_nb_block;
932 Integer m_block_size;
933 Integer m_nb_block_per_thread;
934};
935
936/*---------------------------------------------------------------------------*/
937/*---------------------------------------------------------------------------*/
938
940{
941 public:
942
943 ParallelForExecute(TBBTaskImplementation* impl, const ParallelLoopOptions& options,
944 Integer begin,Integer size,IRangeFunctor* f,ForLoopOneExecStat* stat_info)
945 : m_impl(impl), m_begin(begin), m_size(size), m_functor(f), m_options(options), m_stat_info(stat_info){}
946
947 public:
948
949 void operator()() const
950 {
951 Integer nb_thread = m_options.maxThread();
952 TBBParallelFor pf(m_functor,nb_thread,m_stat_info);
953 Integer gsize = m_options.grainSize();
954 tbb::blocked_range<Integer> range(m_begin,m_begin+m_size);
956 std::cout << "TBB: TBBTaskImplementationInit ParallelForExecute begin=" << m_begin
957 << " size=" << m_size << " gsize=" << gsize
958 << " partitioner=" << (int)m_options.partitioner()
959 << " nb_thread=" << nb_thread
960 << " has_stat_info=" << (m_stat_info!=nullptr)
961 << '\n';
962
963 if (gsize>0)
964 range = tbb::blocked_range<Integer>(m_begin,m_begin+m_size,gsize);
965
966 if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Static){
967 tbb::parallel_for(range,pf,tbb::static_partitioner());
968 }
969 else if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Deterministic){
970 tbb::blocked_range<Integer> range2(0,nb_thread,1);
971 TBBDeterministicParallelFor dpf(m_impl,pf,m_begin,m_size,gsize,nb_thread);
972 tbb::parallel_for(range2,dpf);
973 }
974 else
975 tbb::parallel_for(range,pf);
976 }
977 private:
978 TBBTaskImplementation* m_impl = nullptr;
979 Integer m_begin;
980 Integer m_size;
981 IRangeFunctor* m_functor = nullptr;
982 ParallelLoopOptions m_options;
983 ForLoopOneExecStat* m_stat_info = nullptr;
984};
985
986/*---------------------------------------------------------------------------*/
987/*---------------------------------------------------------------------------*/
988
989template<int RankValue>
991{
992 public:
993
994 MDParallelForExecute(TBBTaskImplementation* impl,
995 const ParallelLoopOptions& options,
997 IMDRangeFunctor<RankValue>* f,[[maybe_unused]] ForLoopOneExecStat* stat_info)
998 : m_impl(impl)
999 , m_tbb_range(_toTBBRange(range))
1000 , m_functor(f)
1001 , m_options(options)
1002 , m_stat_info(stat_info)
1003 {
1004 // On ne peut pas modifier les valeurs d'une instance de tbb::blocked_rangeNd.
1005 // Il faut donc en reconstruire une complètement.
1006 FixedArray<size_t,RankValue> all_grain_sizes;
1007 Int32 gsize = m_options.grainSize();
1008 if (gsize>0){
1009 // Si la taille du grain est différent zéro, il faut la répartir
1010 // sur l'ensemble des dimensions. On commence par la dernière.
1011 // TODO: regarder pourquoi dans certains cas les performances sont
1012 // inférieures à celles qu'on obtient en utilisant un partitionneur
1013 // statique.
1014 constexpr bool is_verbose = false;
1015 std::array<Int32,RankValue> range_extents = range.extents().asStdArray();
1016 double ratio = static_cast<double>(gsize) / static_cast<double>(range.nbElement());
1017 if constexpr (is_verbose){
1018 std::cout << "GSIZE=" << gsize << " rank=" << RankValue << " ratio=" << ratio;
1019 for(Int32 i=0; i<RankValue; ++i )
1020 std::cout << " range" << i << "=" << range_extents[i];
1021 std::cout << "\n";
1022 }
1023 Int32 index = RankValue - 1;
1024 Int32 remaining_grain = gsize;
1025 for( ; index>=0; --index ){
1026 Int32 current = range_extents[index];
1027 if constexpr (is_verbose)
1028 std::cout << "Check index=" << index << " remaining=" << remaining_grain << " current=" << current << "\n";
1029 if (remaining_grain>current){
1030 all_grain_sizes[index] = current;
1031 remaining_grain /= current;
1032 }
1033 else{
1034 all_grain_sizes[index] = remaining_grain;
1035 break;
1036 }
1037 }
1038 for( Int32 i=0; i<index; ++i )
1039 all_grain_sizes[i] = 1;
1040 if constexpr (is_verbose){
1041 for(Int32 i=0; i<RankValue; ++i )
1042 std::cout << " grain" << i << "=" << all_grain_sizes[i];
1043 std::cout << "\n";
1044 }
1045 m_tbb_range = _toTBBRangeWithGrain(m_tbb_range,all_grain_sizes);
1046 }
1047 }
1048
1049 public:
1050
1051 void operator()() const
1052 {
1053 Integer nb_thread = m_options.maxThread();
1054 TBBMDParallelFor<RankValue> pf(m_functor,nb_thread,m_stat_info);
1055
1056 if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Static){
1057 tbb::parallel_for(m_tbb_range,pf,tbb::static_partitioner());
1058 }
1059 else if (m_options.partitioner()==ParallelLoopOptions::Partitioner::Deterministic){
1060 // TODO: implémenter le mode déterministe
1061 ARCANE_THROW(NotImplementedException,"ParallelLoopOptions::Partitioner::Deterministic for multi-dimensionnal loops");
1062 //tbb::blocked_range<Integer> range2(0,nb_thread,1);
1063 //TBBDeterministicParallelFor dpf(m_impl,pf,m_begin,m_size,gsize,nb_thread);
1064 //tbb::parallel_for(range2,dpf);
1065 }
1066 else{
1067 tbb::parallel_for(m_tbb_range,pf);
1068 }
1069 }
1070 private:
1071 TBBTaskImplementation* m_impl = nullptr;
1072 blocked_nd_range<Int32, RankValue> m_tbb_range;
1073 IMDRangeFunctor<RankValue>* m_functor = nullptr;
1074 ParallelLoopOptions m_options;
1075 ForLoopOneExecStat* m_stat_info = nullptr;
1076};
1077
1078/*---------------------------------------------------------------------------*/
1079/*---------------------------------------------------------------------------*/
1080
1081TBBTaskImplementation::
1082~TBBTaskImplementation()
1083{
1084 delete m_p;
1085}
1086
1087/*---------------------------------------------------------------------------*/
1088/*---------------------------------------------------------------------------*/
1089
1091initialize(Int32 nb_thread)
1092{
1093 if (nb_thread<0)
1094 nb_thread = 0;
1095 m_is_active = (nb_thread!=1);
1096 if (nb_thread!=0)
1097 m_p = new Impl(nb_thread);
1098 else
1099 m_p = new Impl();
1103}
1104
1105/*---------------------------------------------------------------------------*/
1106/*---------------------------------------------------------------------------*/
1107
1109terminate()
1110{
1111 m_p->terminate();
1112}
1113
1114/*---------------------------------------------------------------------------*/
1115/*---------------------------------------------------------------------------*/
1116
1118nbAllowedThread() const
1119{
1120 return m_p->nbAllowedThread();
1121}
1122
1123/*---------------------------------------------------------------------------*/
1124/*---------------------------------------------------------------------------*/
1125
1127printInfos(std::ostream& o) const
1128{
1129#ifdef ARCANE_USE_ONETBB
1130 o << "OneTBBTaskImplementation"
1131 << " version=" << TBB_VERSION_STRING
1132 << " interface=" << TBB_INTERFACE_VERSION
1133 << " runtime_interface=" << TBB_runtime_interface_version();
1134#else
1135 o << "TBBTaskImplementation"
1136 << " version=" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR
1137 << " interface=" << TBB_INTERFACE_VERSION;
1138#endif
1139}
1140
1141/*---------------------------------------------------------------------------*/
1142/*---------------------------------------------------------------------------*/
1143
1144void TBBTaskImplementation::
1145_executeParallelFor(const ParallelFor1DLoopInfo& loop_info)
1146{
1147 ScopedExecInfo sei(loop_info.runInfo());
1148 ForLoopOneExecStat* stat_info = sei.statInfo();
1149 impl::ScopedStatLoop scoped_loop(sei.isOwn() ? stat_info : nullptr);
1150
1151 Int32 begin = loop_info.beginIndex();
1152 Int32 size = loop_info.size();
1153 ParallelLoopOptions options = loop_info.runInfo().options().value_or(TaskFactory::defaultParallelLoopOptions());
1154 IRangeFunctor* f = loop_info.functor();
1155
1156 Integer max_thread = options.maxThread();
1157 Integer nb_allowed_thread = m_p->nbAllowedThread();
1158 if (max_thread<0)
1159 max_thread = nb_allowed_thread;
1160
1162 std::cout << "TBB: TBBTaskImplementation executeParallelFor begin=" << begin
1163 << " size=" << size << " max_thread=" << max_thread
1164 << " grain_size=" << options.grainSize()
1165 << " nb_allowed=" << nb_allowed_thread << '\n';
1166
1167 // En exécution séquentielle, appelle directement la méthode \a f.
1168 if (max_thread==1 || max_thread==0){
1169 f->executeFunctor(begin,size);
1170 return;
1171 }
1172
1173 // Remplace les valeurs non initialisées de \a options par celles de \a m_default_loop_options
1174 ParallelLoopOptions true_options(options);
1175 true_options.mergeUnsetValues(TaskFactory::defaultParallelLoopOptions());
1176 true_options.setMaxThread(max_thread);
1177
1178 ParallelForExecute pfe(this,true_options,begin,size,f,stat_info);
1179
1180 tbb::task_arena* used_arena = nullptr;
1181 if (max_thread<nb_allowed_thread && max_thread<m_p->m_sub_arena_list.size())
1182 used_arena = m_p->m_sub_arena_list[max_thread];
1183 if (!used_arena)
1184 used_arena = &(m_p->m_main_arena);
1185 used_arena->execute(pfe);
1186}
1187
1188/*---------------------------------------------------------------------------*/
1189/*---------------------------------------------------------------------------*/
1190
1193{
1194 _executeParallelFor(loop_info);
1195}
1196
1197/*---------------------------------------------------------------------------*/
1198/*---------------------------------------------------------------------------*/
1205template<int RankValue> void TBBTaskImplementation::
1208 const ForLoopRunInfo& run_info)
1209{
1210 ParallelLoopOptions options;
1211 if (run_info.options().has_value())
1212 options = run_info.options().value();
1213
1214 ScopedExecInfo sei(run_info);
1215 ForLoopOneExecStat* stat_info = sei.statInfo();
1216 impl::ScopedStatLoop scoped_loop(sei.isOwn() ? stat_info : nullptr);
1217
1218 if (TaskFactory::verboseLevel()>=1){
1219 std::cout << "TBB: TBBTaskImplementation executeMDParallelFor nb_dim=" << RankValue
1220 << " nb_element=" << loop_ranges.nbElement()
1221 << " grain_size=" << options.grainSize()
1222 << " name=" << run_info.traceInfo().traceInfo()
1223 << " has_stat_info=" << (stat_info!=nullptr)
1224 << '\n';
1225 }
1226
1227 Integer max_thread = options.maxThread();
1228 // En exécution séquentielle, appelle directement la méthode \a f.
1229 if (max_thread==1 || max_thread==0){
1230 functor->executeFunctor(loop_ranges);
1231 return;
1232 }
1233
1234 // Remplace les valeurs non initialisées de \a options par celles de \a m_default_loop_options
1235 ParallelLoopOptions true_options(options);
1237
1238 Integer nb_allowed_thread = m_p->nbAllowedThread();
1239 if (max_thread<0)
1240 max_thread = nb_allowed_thread;
1241 tbb::task_arena* used_arena = nullptr;
1242 if (max_thread<nb_allowed_thread)
1243 used_arena = m_p->m_sub_arena_list[max_thread];
1244 if (!used_arena)
1245 used_arena = &(m_p->m_main_arena);
1246
1247 // Pour l'instant pour la dimension 1, utilise le 'ParallelForExecute' historique
1248 if constexpr (RankValue==1){
1249 auto range_1d = _toTBBRange(loop_ranges);
1250 auto x1 = [&](Integer begin,Integer size)
1251 {
1252 functor->executeFunctor(makeLoopRanges(ForLoopRange(begin,size)));
1253 //functor->executeFunctor(ComplexForLoopRanges<1>(begin,size));
1254 };
1255 LambdaRangeFunctorT<decltype(x1)> functor_1d(x1);
1256 Integer begin1 = CheckedConvert::toInteger(range_1d.dim(0).begin());
1257 Integer size1 = CheckedConvert::toInteger(range_1d.dim(0).size());
1258 ParallelForExecute pfe(this,true_options,begin1,size1,&functor_1d,stat_info);
1259 used_arena->execute(pfe);
1260 }
1261 else{
1262 MDParallelForExecute<RankValue> pfe(this,true_options,loop_ranges,functor,stat_info);
1263 used_arena->execute(pfe);
1264 }
1265}
1266
1267/*---------------------------------------------------------------------------*/
1268/*---------------------------------------------------------------------------*/
1269
1271executeParallelFor(Integer begin,Integer size,Integer grain_size,IRangeFunctor* f)
1272{
1274 opts.setGrainSize(grain_size);
1275 ForLoopRunInfo run_info(opts);
1276 executeParallelFor(ParallelFor1DLoopInfo(begin,size,f,run_info));
1277}
1278
1279/*---------------------------------------------------------------------------*/
1280/*---------------------------------------------------------------------------*/
1281
1287
1288/*---------------------------------------------------------------------------*/
1289/*---------------------------------------------------------------------------*/
1290
1293{
1294 Int32 thread_id = currentTaskThreadIndex();
1295 if (thread_id>=0)
1296 return m_p->threadTaskInfo(thread_id);
1297 return nullptr;
1298}
1299
1300/*---------------------------------------------------------------------------*/
1301/*---------------------------------------------------------------------------*/
1302
1304currentTaskIndex() const
1305{
1306 Int32 thread_id = currentTaskThreadIndex();
1307#ifdef ARCANE_USE_ONETBB
1308 if (thread_id<0 || thread_id>=m_p->nbAllowedThread())
1309 return 0;
1310#endif
1312 if (tti){
1313 Int32 task_index = tti->taskIndex();
1314 if (task_index>=0)
1315 return task_index;
1316 }
1317 return thread_id;
1318}
1319
1320/*---------------------------------------------------------------------------*/
1321/*---------------------------------------------------------------------------*/
1322
1323#ifdef ARCANE_USE_ONETBB
1324
1325/*---------------------------------------------------------------------------*/
1326/*---------------------------------------------------------------------------*/
1327
1328void OneTBBTask::
1329launchAndWait()
1330{
1331 tbb::task_group task_group;
1332 task_group.run(taskFunctor());
1333 task_group.wait();
1334 delete this;
1335}
1336
1337/*---------------------------------------------------------------------------*/
1338/*---------------------------------------------------------------------------*/
1339
1340void OneTBBTask::
1341launchAndWait(ConstArrayView<ITask*> tasks)
1342{
1343 tbb::task_group task_group;
1344 Integer n = tasks.size();
1345 if (n==0)
1346 return;
1347
1348 //set_ref_count(n+1);
1349 for( Integer i=0; i<n; ++i ){
1350 OneTBBTask* t = static_cast<OneTBBTask*>(tasks[i]);
1351 task_group.run(t->taskFunctor());
1352 }
1353 task_group.wait();
1354 for( Integer i=0; i<n; ++i ){
1355 OneTBBTask* t = static_cast<OneTBBTask*>(tasks[i]);
1356 delete t;
1357 }
1358}
1359
1360/*---------------------------------------------------------------------------*/
1361/*---------------------------------------------------------------------------*/
1362
1363ITask* OneTBBTask::
1364_createChildTask(ITaskFunctor* functor)
1365{
1366 OneTBBTask* t = new OneTBBTask(functor);
1367 return t;
1368}
1369
1370/*---------------------------------------------------------------------------*/
1371/*---------------------------------------------------------------------------*/
1372
1373#else // ARCANE_USE_ONETBB
1374
1375/*---------------------------------------------------------------------------*/
1376/*---------------------------------------------------------------------------*/
1377
1380{
1381 task::spawn_root_and_wait(*this);
1382}
1383
1384/*---------------------------------------------------------------------------*/
1385/*---------------------------------------------------------------------------*/
1386
1389{
1390 Integer n = tasks.size();
1391 if (n==0)
1392 return;
1393
1394 set_ref_count(n+1);
1395 for( Integer i=0; i<n-1; ++i ){
1396 TBBTask* t = static_cast<TBBTask*>(tasks[i]);
1397 spawn(*t);
1398 }
1399 spawn_and_wait_for_all(*static_cast<TBBTask*>(tasks[n-1]));
1400}
1401
1402/*---------------------------------------------------------------------------*/
1403/*---------------------------------------------------------------------------*/
1404
1405ITask* LegacyTBBTask::
1406_createChildTask(ITaskFunctor* functor)
1407{
1408 TBBTask* t = new(allocate_child()) TBBTask(functor);
1409 return t;
1410}
1411
1412/*---------------------------------------------------------------------------*/
1413/*---------------------------------------------------------------------------*/
1414
1415#endif // ARCANE_USE_ONETBB
1416
1417/*---------------------------------------------------------------------------*/
1418/*---------------------------------------------------------------------------*/
1419
1420// TODO: a supprimer maintenant qu'on utilise 'DependencyInjection'
1421ARCANE_REGISTER_APPLICATION_FACTORY(TBBTaskImplementation,ITaskImplementation,
1422 TBBTaskImplementation);
1423
1424ARCANE_DI_REGISTER_PROVIDER(TBBTaskImplementation,
1425 DependencyInjection::ProviderProperty("TBBTaskImplementation"),
1426 ARCANE_DI_INTERFACES(ITaskImplementation),
1427 ARCANE_DI_EMPTY_CONSTRUCTOR());
1428
1429/*---------------------------------------------------------------------------*/
1430/*---------------------------------------------------------------------------*/
1431
1432} // End namespace Arcane
1433
1434/*---------------------------------------------------------------------------*/
1435/*---------------------------------------------------------------------------*/
#define ARCANE_THROW(exception_class,...)
Macro pour envoyer une exception avec formattage.
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
#define ARCANE_ALIGNAS_PACKED(value)
Macro pour garantir le compactage et l'alignement d'une classe sur value octets.
Classes, Types et macros pour gérer la concurrence.
#define ARCANE_REGISTER_APPLICATION_FACTORY(aclass, ainterface, aname)
Enregistre un service de fabrique pour la classe aclass.
Allocateur mémoire avec alignement mémoire spécifique.
void resize(Int64 s)
Change le nombre d'éléments du tableau à s.
Interval d'itération complexe.
Vue constante d'un tableau de type T.
constexpr Integer size() const noexcept
Nombre d'éléments du tableau.
Tableau 1D de taille fixe.
Definition FixedArray.h:45
Classe pour gérer le profiling d'une seule exécution d'une boucle.
Definition Profiling.h:93
Intervalle d'itération pour une boucle.
Informations d'exécution d'une boucle.
Interface d'un fonctor sur un interval d'itération multi-dimensionnel de dimension RankValue.
virtual void notifyAllObservers()=0
Notifie tous les observateurs.
Interface d'un fonctor sur un interval d'itération.
virtual void executeFunctor(Integer begin, Integer size)=0
Exécute la méthode associée.
Interface d'un fonctor pour une tâche.
virtual void executeFunctor(const TaskContext &tc)=0
Exécute la méthode associé
Implémentation d'une fabrique de tâches.
Interface d'une tâche concourante.
Fonctor sur un interval d'itération instancié via une lambda fonction.
void launchAndWait() override
Lance la tâche et bloque jusqu'à ce qu'elle se termine.
Exception lorsqu'une fonction n'est pas implémentée.
Caractéristiques d'un boucle 1D multi-thread.
Options d'exécution d'une boucle parallèle en multi-thread.
Integer grainSize() const
Taille d'un intervalle d'itération.
void mergeUnsetValues(const ParallelLoopOptions &po)
Fusionne les valeurs non modifiées de l'instance par celles de po.
Int32 maxThread() const
Nombre maximal de threads autorisés.
void setGrainSize(Integer v)
Positionne la taille (approximative) d'un intervalle d'itération.
void setMaxThread(Integer v)
Positionne le nombre maximal de threads autorisé.
@ Static
Utilise un partitionnement statique.
@ Deterministic
Utilise un partitionnement et un ordonnancement statique.
static impl::ForLoopStatInfoList * _threadLocalForLoopInstance()
Definition Profiling.cc:194
static bool hasProfiling()
Indique si le profilage est actif.
Definition Profiling.h:173
Structure contenant les informations pour créer un service.
Implémentation déterministe de ParallelFor.
void operator()(tbb::blocked_range< Integer > &range) const
Opérateur pour un thread donné.
Exécuteur pour une boucle multi-dimension.
Exécuteur pour une boucle 1D.
UniqueArray< tbb::task_arena * > m_sub_arena_list
Tableau dont le i-ème élément contient la tbb::task_arena pour i thread.
Classe pour positionner TaskThreadInfo::taskIndex().
Int32 currentTaskThreadIndex() const final
Implémentation de TaskFactory::currentTaskThreadIndex()
void initialize(Int32 nb_thread) override
void executeParallelFor(const ComplexForLoopRanges< 1 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 1 > *functor) final
Exécute une boucle 1D en concurrence.
void executeParallelFor(Int32 begin, Int32 size, IRangeFunctor *f) final
Exécute le fonctor f en concurrence.
ITask * createRootTask(ITaskFunctor *f) override
Créé une tâche racine. L'implémentation doit recopier la valeur de f qui est soit un TaskFunctor,...
void printInfos(std::ostream &o) const final
Affiche les informations sur le runtime utilisé
void executeParallelFor(Int32 begin, Int32 size, const ParallelLoopOptions &options, IRangeFunctor *f) final
Exécute le fonctor f en concurrence.
void executeParallelFor(const ComplexForLoopRanges< 3 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 3 > *functor) final
Exécute une boucle 3D en concurrence.
void executeParallelFor(const ComplexForLoopRanges< 4 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 4 > *functor) final
Exécute une boucle 4D en concurrence.
TaskThreadInfo * currentTaskThreadInfo() const
Instance de TaskThreadInfo associé au thread courant.
bool isActive() const final
Indique si l'implémentation est active.
void _executeMDParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, IMDRangeFunctor< RankValue > *functor, const ForLoopRunInfo &run_info)
Exécution d'une boucle N-dimensions.
Int32 currentTaskIndex() const final
Implémentation de TaskFactory::currentTaskIndex()
void executeParallelFor(const ComplexForLoopRanges< 2 > &loop_ranges, const ForLoopRunInfo &run_info, IMDRangeFunctor< 2 > *functor) final
Exécute une boucle 2D en concurrence.
Int32 nbAllowedThread() const final
Nombre de threads utilisés au maximum pour gérer les tâches.
Contexte d'éxecution d'une tâche.
static void notifyThreadCreated()
Notifie tous les observateurs de création de thread.
static IObservable * destroyThreadObservable()
Observable appelé lors de la destruction d'un thread pour une tâche.
static const ParallelLoopOptions & defaultParallelLoopOptions()
Valeurs par défaut d'exécution d'une boucle parallèle.
static Integer verboseLevel()
Niveau de verbosité
static void setDefaultParallelLoopOptions(const ParallelLoopOptions &v)
Positionne les valeurs par défaut d'exécution d'une boucle parallèle.
static Int32 currentTaskThreadIndex()
Indice (entre 0 et nbAllowedThread()-1) du thread exécutant la tâche actuelle.
Vecteur 1D de données avec sémantique par valeur (style STL).
Classe permettant de récupérer le temps passé entre l'appel au constructeur et au destructeur.
Definition Profiling.h:38
Integer toInteger(Real r)
Converti un Int64 en un Integer.
ARCCORE_BASE_EXPORT String getStackTrace()
Retourne une chaîne de caractere contenant la pile d'appel.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
Int32 Integer
Type représentant un entier.
SimpleForLoopRanges< 1 > makeLoopRanges(Int32 n1)
Créé un intervalle d'itération [0,n1[.
std::int32_t Int32
Type entier signé sur 32 bits.