Arcane  v4.1.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
CudaAccelerator.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* CudaAccelerator.cc (C) 2000-2025 */
9/* */
10/* Backend 'CUDA' pour les accélérateurs. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/accelerator/cuda/CudaAccelerator.h"
15
16#include "arcane/utils/PlatformUtils.h"
17#include "arcane/utils/Array.h"
18#include "arcane/utils/TraceInfo.h"
19#include "arcane/utils/NotSupportedException.h"
20#include "arcane/utils/FatalErrorException.h"
21#include "arcane/utils/ValueConvert.h"
22#include "arcane/utils/IMemoryAllocator.h"
23#include "arcane/utils/OStringStream.h"
24#include "arcane/utils/ITraceMng.h"
25#include "arcane/utils/internal/MemoryPool.h"
26
27#include "arcane/accelerator/core/internal/MemoryTracer.h"
28
29#include <iostream>
30
31/*---------------------------------------------------------------------------*/
32/*---------------------------------------------------------------------------*/
33
34namespace Arcane::Accelerator::Cuda
35{
36using namespace Arcane::impl;
37using namespace Arccore;
38
39/*---------------------------------------------------------------------------*/
40/*---------------------------------------------------------------------------*/
41
43enum class MemoryPoolFlags
44{
45 UVM = 1,
46 Device = 2,
47 HostPinned = 4
48};
49
50/*---------------------------------------------------------------------------*/
51/*---------------------------------------------------------------------------*/
52
53void arcaneCheckCudaErrors(const TraceInfo& ti, cudaError_t e)
54{
55 if (e != cudaSuccess)
56 ARCANE_FATAL("CUDA Error trace={0} e={1} str={2}", ti, e, cudaGetErrorString(e));
57}
58
59/*---------------------------------------------------------------------------*/
60/*---------------------------------------------------------------------------*/
61
62void arcaneCheckCudaErrorsNoThrow(const TraceInfo& ti, cudaError_t e)
63{
64 if (e == cudaSuccess)
65 return;
66 String str = String::format("CUDA Error trace={0} e={1} str={2}", ti, e, cudaGetErrorString(e));
67 FatalErrorException ex(ti, str);
68 ex.write(std::cerr);
69}
70
71/*---------------------------------------------------------------------------*/
72/*---------------------------------------------------------------------------*/
73
74/*---------------------------------------------------------------------------*/
75/*---------------------------------------------------------------------------*/
88{
89 public:
90
91 void initialize(Int64 block_size, bool do_block_alloc)
92 {
93 m_block_size = block_size;
94 if (m_block_size <= 0)
95 m_block_size = 128;
96 m_do_block_allocate = do_block_alloc;
97 }
98
99 void dumpStats(std::ostream& ostr, const String& name)
100 {
101 ostr << "Allocator '" << name << "' : nb_allocate=" << m_nb_allocate
102 << " nb_unaligned=" << m_nb_unaligned_allocate
103 << "\n";
104 }
105
106 Int64 adjustedCapacity(Int64 wanted_capacity, Int64 element_size) const
107 {
108 const bool do_page = m_do_block_allocate;
109 if (!do_page)
110 return wanted_capacity;
111 // Alloue un multiple de la taille d'un bloc
112 // Pour la mémoire unifiée, la taille de bloc est une page mémoire.
113 // Comme les transfers de la mémoire unifiée se font par page,
114 // cela permet de détecter quelles allocations provoquent le transfert.
115 // On se débrouille aussi pour limiter les différentes taille
116 // de bloc alloué pour éviter d'avoir trop de blocs de taille
117 // différente pour que l'éventuel MemoryPool ne contienne trop
118 // de valeurs.
119 Int64 orig_capacity = wanted_capacity;
120 Int64 new_size = orig_capacity * element_size;
121 Int64 block_size = m_block_size;
122 Int64 nb_iter = 4 + (4096 / block_size);
123 for (Int64 i = 0; i < nb_iter; ++i) {
124 if (new_size >= (4 * block_size))
125 block_size *= 4;
126 else
127 break;
128 }
129 new_size = _computeNextMultiple(new_size, block_size);
130 wanted_capacity = new_size / element_size;
131 if (wanted_capacity < orig_capacity)
132 wanted_capacity = orig_capacity;
133 return wanted_capacity;
134 }
135
136 void doAllocate(void* ptr, [[maybe_unused]] size_t new_size)
137 {
140 uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
141 if ((addr % m_block_size) != 0) {
143 }
144 }
145 }
146
147 private:
148
150 Int64 m_block_size = 128;
154 std::atomic<Int32> m_nb_allocate = 0;
156 std::atomic<Int32> m_nb_unaligned_allocate = 0;
157
158 private:
159
160 // Calcule la plus petite valeur de \a multiple de \a multiple
161 static Int64 _computeNextMultiple(Int64 n, Int64 multiple)
162 {
163 Int64 new_n = n / multiple;
164 if ((n % multiple) != 0)
165 ++new_n;
166 return (new_n * multiple);
167 }
168};
169
170/*---------------------------------------------------------------------------*/
171/*---------------------------------------------------------------------------*/
175class CudaMemoryAllocatorBase
176: public Arccore::AlignedMemoryAllocator3
177{
178 public:
179
180 using BaseClass = Arccore::AlignedMemoryAllocator3;
181
182 public:
183
185 {
186 public:
187
188 virtual ~ConcreteAllocator() = default;
189
190 public:
191
192 virtual cudaError_t _allocate(void** ptr, size_t new_size) = 0;
193 virtual cudaError_t _deallocate(void* ptr) = 0;
194 };
195
196 class UnderlyingAllocator
197 : public IMemoryPoolAllocator
198 {
199 public:
200
201 explicit UnderlyingAllocator(CudaMemoryAllocatorBase* v)
202 : m_base(v)
203 {
204 }
205
206 public:
207
208 void* allocateMemory(size_t size) override
209 {
210 void* out = nullptr;
211 ARCANE_CHECK_CUDA(m_base->m_concrete_allocator->_allocate(&out, size));
212 m_base->m_block_wrapper.doAllocate(out, size);
213 return out;
214 }
215 void freeMemory(void* ptr, [[maybe_unused]] size_t size) override
216 {
217 ARCANE_CHECK_CUDA_NOTHROW(m_base->m_concrete_allocator->_deallocate(ptr));
218 }
219
220 public:
221
222 CudaMemoryAllocatorBase* m_base = nullptr;
223 };
224
225 public:
226
227 CudaMemoryAllocatorBase(const String& allocator_name, ConcreteAllocator* concrete_allocator)
228 : AlignedMemoryAllocator3(128)
229 , m_concrete_allocator(concrete_allocator)
230 , m_direct_sub_allocator(this)
231 , m_memory_pool(&m_direct_sub_allocator, allocator_name)
232 , m_sub_allocator(&m_direct_sub_allocator)
233 , m_allocator_name(allocator_name)
234 {
235 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_MEMORY_PRINT_LEVEL", true))
236 m_print_level = v.value();
237 }
238
239 ~CudaMemoryAllocatorBase()
240 {
241 }
242
243 public:
244
245 void finalize(ITraceMng* tm)
246 {
247 if (m_print_level >= 1) {
248 OStringStream ostr;
249 if (m_use_memory_pool) {
250 m_memory_pool.dumpStats(ostr());
251 m_memory_pool.dumpFreeMap(ostr());
252 }
253 ostr() << "Allocator '" << m_allocator_name << "' nb_realloc=" << m_nb_reallocate
254 << " realloc_copy=" << m_reallocate_size << "\n";
255 m_block_wrapper.dumpStats(ostr(), m_allocator_name);
256 if (tm)
257 tm->info() << ostr.str();
258 else
259 std::cout << ostr.str();
260 }
261
262 m_memory_pool.freeCachedMemory();
263 }
264
265 public:
266
267 bool hasRealloc(MemoryAllocationArgs) const final { return true; }
268 AllocatedMemoryInfo allocate(MemoryAllocationArgs args, Int64 new_size) final
269 {
270 void* out = m_sub_allocator->allocateMemory(new_size);
271 Int64 a = reinterpret_cast<Int64>(out);
272 if ((a % 128) != 0)
273 ARCANE_FATAL("Bad alignment for CUDA allocator: offset={0}", (a % 128));
274 m_tracer.traceAllocate(out, new_size, args);
275 _applyHint(out, new_size, args);
276 return { out, new_size };
277 }
278 AllocatedMemoryInfo reallocate(MemoryAllocationArgs args, AllocatedMemoryInfo current_info, Int64 new_size) final
279 {
280 ++m_nb_reallocate;
281 Int64 current_size = current_info.size();
282 m_reallocate_size += current_size;
283 String array_name = args.arrayName();
284 const bool do_print = (m_print_level >= 2);
285 if (do_print) {
286 std::cout << "Reallocate allocator=" << m_allocator_name
287 << " current_size=" << current_size
288 << " current_capacity=" << current_info.capacity()
289 << " new_capacity=" << new_size
290 << " ptr=" << current_info.baseAddress();
291 if (array_name.null() && m_print_level >= 3) {
292 std::cout << " stack=" << platform::getStackTrace();
293 }
294 else {
295 std::cout << " name=" << array_name;
296 if (m_print_level >= 4)
297 std::cout << " stack=" << platform::getStackTrace();
298 }
299 std::cout << "\n";
300 }
301 if (m_use_memory_pool)
302 _removeHint(current_info.baseAddress(), current_size, args);
303 AllocatedMemoryInfo a = allocate(args, new_size);
304 // TODO: supprimer les Hint après le deallocate car la zone mémoire peut être réutilisée.
305 ARCANE_CHECK_CUDA(cudaMemcpy(a.baseAddress(), current_info.baseAddress(), current_size, cudaMemcpyDefault));
306 deallocate(args, current_info);
307 return a;
308 }
309 void deallocate(MemoryAllocationArgs args, AllocatedMemoryInfo mem_info) final
310 {
311 void* ptr = mem_info.baseAddress();
312 size_t mem_size = mem_info.capacity();
313 if (m_use_memory_pool)
314 _removeHint(ptr, mem_size, args);
315 // Ne lève pas d'exception en cas d'erreurs lors de la désallocation
316 // car elles ont souvent lieu dans les destructeurs et cela provoque
317 // un arrêt du code par std::terminate().
318 m_tracer.traceDeallocate(mem_info, args);
319 m_sub_allocator->freeMemory(ptr, mem_size);
320 }
321
322 Int64 adjustedCapacity(MemoryAllocationArgs args, Int64 wanted_capacity, Int64 element_size) const final
323 {
324 wanted_capacity = AlignedMemoryAllocator3::adjustedCapacity(args, wanted_capacity, element_size);
325 return m_block_wrapper.adjustedCapacity(wanted_capacity, element_size);
326 }
327
328 protected:
329
330 virtual void _applyHint([[maybe_unused]] void* ptr, [[maybe_unused]] size_t new_size,
331 [[maybe_unused]] MemoryAllocationArgs args) {}
332 virtual void _removeHint([[maybe_unused]] void* ptr, [[maybe_unused]] size_t new_size,
333 [[maybe_unused]] MemoryAllocationArgs args) {}
334
335 private:
336
337 impl::MemoryTracerWrapper m_tracer;
338 std::unique_ptr<ConcreteAllocator> m_concrete_allocator;
339 UnderlyingAllocator m_direct_sub_allocator;
340 MemoryPool m_memory_pool;
341 IMemoryPoolAllocator* m_sub_allocator = nullptr;
342 bool m_use_memory_pool = false;
343 String m_allocator_name;
344 std::atomic<Int32> m_nb_reallocate = 0;
345 std::atomic<Int64> m_reallocate_size = 0;
346 Int32 m_print_level = 0;
347
348 protected:
349
350 BlockAllocatorWrapper m_block_wrapper;
351
352 protected:
353
354 void _setTraceLevel(Int32 v) { m_tracer.setTraceLevel(v); }
355 // IMPORTANT: doit être appelé avant toute allocation et ne plus être modifié ensuite.
356 void _setUseMemoryPool(bool is_used)
357 {
358 IMemoryPoolAllocator* mem_pool = &m_memory_pool;
359 IMemoryPoolAllocator* direct = &m_direct_sub_allocator;
360 m_sub_allocator = (is_used) ? mem_pool : direct;
361 m_use_memory_pool = is_used;
362 if (is_used) {
363 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_MEMORY_POOL_MAX_BLOCK_SIZE", true)) {
364 if (v.value() < 0)
365 ARCANE_FATAL("Invalid value '{0}' for memory pool max block size");
366 size_t block_size = static_cast<size_t>(v.value());
367 m_memory_pool.setMaxCachedBlockSize(block_size);
368 }
369 }
370 }
371};
372
373/*---------------------------------------------------------------------------*/
374/*---------------------------------------------------------------------------*/
382class UnifiedMemoryCudaMemoryAllocator
383: public CudaMemoryAllocatorBase
384{
385 public:
386
387 class Allocator
388 : public ConcreteAllocator
389 {
390 public:
391
392 Allocator()
393 {
394 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUDA_USE_ALLOC_ATS", true))
395 m_use_ats = v.value();
396 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUDA_MEMORY_HINT_ON_DEVICE", true))
397 m_use_hint_as_mainly_device = (v.value() != 0);
398 }
399
400 cudaError_t _deallocate(void* ptr) final
401 {
402 if (m_use_ats) {
403 ::free(ptr);
404 return cudaSuccess;
405 }
406 //std::cout << "CUDA_MANAGED_FREE ptr=" << ptr << "\n";
407 return ::cudaFree(ptr);
408 }
409
410 cudaError_t _allocate(void** ptr, size_t new_size) final
411 {
412 if (m_use_ats) {
413 *ptr = ::aligned_alloc(128, new_size);
414 }
415 else {
416 auto r = ::cudaMallocManaged(ptr, new_size, cudaMemAttachGlobal);
417 //std::cout << "CUDA_MANAGED_MALLOC ptr=" << (*ptr) << " size=" << new_size << "\n";
418 //if (new_size < 4000)
419 //std::cout << "STACK=" << platform::getStackTrace() << "\n";
420
421 if (r != cudaSuccess)
422 return r;
423
424 // Si demandé, indique qu'on préfère allouer sur le GPU.
425 // NOTE: Dans ce cas, on récupère le device actuel pour positionner la localisation
426 // préférée. Dans le cas où on utilise MemoryPool, cette allocation ne sera effectuée
427 // qu'une seule fois. Si le device par défaut pour un thread change au cours du calcul
428 // il y aura une incohérence. Pour éviter cela, on pourrait faire un cudaMemAdvise()
429 // pour chaque allocation (via _applyHint()) mais ces opérations sont assez couteuses
430 // et s'il y a beaucoup d'allocation il peut en résulter une perte de performance.
432 int device_id = 0;
433 void* p = *ptr;
434 cudaGetDevice(&device_id);
435 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetPreferredLocation, _getMemoryLocation(device_id)));
436 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetAccessedBy, _getMemoryLocation(cudaCpuDeviceId)));
437 }
438 }
439
440 return cudaSuccess;
441 }
442
443 public:
444
445 bool m_use_ats = false;
448 };
449
450 public:
451
452 UnifiedMemoryCudaMemoryAllocator()
453 : CudaMemoryAllocatorBase("UnifiedMemoryCudaMemory", new Allocator())
454 {
455 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUDA_MALLOC_TRACE", true))
456 _setTraceLevel(v.value());
457 }
458
459 void initialize()
460 {
461 bool do_page_allocate = true;
462 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUDA_UM_PAGE_ALLOC", true))
463 do_page_allocate = (v.value() != 0);
464 Int64 page_size = platform::getPageSize();
465 m_block_wrapper.initialize(page_size, do_page_allocate);
466
467 bool use_memory_pool = false;
468 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_MEMORY_POOL", true))
469 use_memory_pool = (v.value() & static_cast<int>(MemoryPoolFlags::UVM)) != 0;
470 _setUseMemoryPool(use_memory_pool);
471 }
472
473 public:
474
475 void notifyMemoryArgsChanged([[maybe_unused]] MemoryAllocationArgs old_args,
476 MemoryAllocationArgs new_args, AllocatedMemoryInfo ptr) final
477 {
478 void* p = ptr.baseAddress();
479 Int64 s = ptr.capacity();
480 if (p && s > 0)
481 _applyHint(ptr.baseAddress(), ptr.size(), new_args);
482 }
484
485 protected:
486
487 void _applyHint(void* p, size_t new_size, MemoryAllocationArgs args)
488 {
489 eMemoryLocationHint hint = args.memoryLocationHint();
490 // Utilise le device actif pour positionner le GPU par défaut
491 // On ne le fait que si le \a hint le nécessite pour éviter d'appeler
492 // cudaGetDevice() à chaque fois.
493 int device_id = 0;
495 cudaGetDevice(&device_id);
496 }
497 auto device_memory_location = _getMemoryLocation(device_id);
498 auto cpu_memory_location = _getMemoryLocation(cudaCpuDeviceId);
499
500 //std::cout << "SET_MEMORY_HINT name=" << args.arrayName() << " size=" << new_size << " hint=" << (int)hint << "\n";
502 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetPreferredLocation, device_memory_location));
503 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetAccessedBy, cpu_memory_location));
504 }
506 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetPreferredLocation, cpu_memory_location));
507 //ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetAccessedBy, 0));
508 }
510 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetReadMostly, device_memory_location));
511 }
512 }
513 void _removeHint(void* p, size_t size, MemoryAllocationArgs args)
514 {
515 eMemoryLocationHint hint = args.memoryLocationHint();
516 if (hint == eMemoryLocationHint::None)
517 return;
518 int device_id = 0;
519 ARCANE_CHECK_CUDA(cudaMemAdvise(p, size, cudaMemAdviseUnsetReadMostly, _getMemoryLocation(device_id)));
520 }
521
522 private:
523
524 bool m_use_ats = false;
525};
526
527/*---------------------------------------------------------------------------*/
528/*---------------------------------------------------------------------------*/
529
530class HostPinnedCudaMemoryAllocator
531: public CudaMemoryAllocatorBase
532{
533 public:
534
536 : public ConcreteAllocator
537 {
538 public:
539
540 cudaError_t _allocate(void** ptr, size_t new_size) final
541 {
542 return ::cudaMallocHost(ptr, new_size);
543 }
544 cudaError_t _deallocate(void* ptr) final
545 {
546 return ::cudaFreeHost(ptr);
547 }
548 };
549
550 public:
551
552 HostPinnedCudaMemoryAllocator()
553 : CudaMemoryAllocatorBase("HostPinnedCudaMemory", new Allocator())
554 {
555 }
556
557 public:
558
559 void initialize()
560 {
561 bool use_memory_pool = false;
562 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_MEMORY_POOL", true))
563 use_memory_pool = (v.value() & static_cast<int>(MemoryPoolFlags::HostPinned)) != 0;
564 _setUseMemoryPool(use_memory_pool);
565 m_block_wrapper.initialize(128, use_memory_pool);
566 }
568};
569
570/*---------------------------------------------------------------------------*/
571/*---------------------------------------------------------------------------*/
572
573class DeviceCudaMemoryAllocator
574: public CudaMemoryAllocatorBase
575{
576
577 class Allocator
578 : public ConcreteAllocator
579 {
580 public:
581
582 Allocator()
583 {
584 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUDA_USE_ALLOC_ATS", true))
585 m_use_ats = v.value();
586 }
587
588 cudaError_t _allocate(void** ptr, size_t new_size) final
589 {
590 if (m_use_ats) {
591 // FIXME: it does not work on WIN32
592 *ptr = std::aligned_alloc(128, new_size);
593 if (*ptr)
594 return cudaSuccess;
595 return cudaErrorMemoryAllocation;
596 }
597 cudaError_t r = ::cudaMalloc(ptr, new_size);
598 //std::cout << "ALLOCATE_DEVICE ptr=" << (*ptr) << " size=" << new_size << " r=" << (int)r << "\n";
599 return r;
600 }
601 cudaError_t _deallocate(void* ptr) final
602 {
603 if (m_use_ats) {
604 std::free(ptr);
605 return cudaSuccess;
606 }
607 //std::cout << "FREE_DEVICE ptr=" << ptr << "\n";
608 return ::cudaFree(ptr);
609 }
610
611 private:
612
613 bool m_use_ats = false;
614 };
615
616 public:
617
618 DeviceCudaMemoryAllocator()
619 : CudaMemoryAllocatorBase("DeviceCudaMemoryAllocator", new Allocator())
620 {
621 }
622
623 public:
624
625 void initialize()
626 {
627 bool use_memory_pool = false;
628 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_MEMORY_POOL", true))
629 use_memory_pool = (v.value() & static_cast<int>(MemoryPoolFlags::Device)) != 0;
630 _setUseMemoryPool(use_memory_pool);
631 m_block_wrapper.initialize(128, use_memory_pool);
632 }
634};
635
636/*---------------------------------------------------------------------------*/
637/*---------------------------------------------------------------------------*/
638
639namespace
640{
641 UnifiedMemoryCudaMemoryAllocator unified_memory_cuda_memory_allocator;
642 HostPinnedCudaMemoryAllocator host_pinned_cuda_memory_allocator;
643 DeviceCudaMemoryAllocator device_cuda_memory_allocator;
644} // namespace
645
646/*---------------------------------------------------------------------------*/
647/*---------------------------------------------------------------------------*/
648
650getCudaMemoryAllocator()
651{
652 return &unified_memory_cuda_memory_allocator;
653}
654
656getCudaDeviceMemoryAllocator()
657{
658 return &device_cuda_memory_allocator;
659}
660
662getCudaUnifiedMemoryAllocator()
663{
664 return &unified_memory_cuda_memory_allocator;
665}
666
668getCudaHostPinnedMemoryAllocator()
669{
670 return &host_pinned_cuda_memory_allocator;
671}
672
673/*---------------------------------------------------------------------------*/
674/*---------------------------------------------------------------------------*/
675
676void initializeCudaMemoryAllocators()
677{
678 unified_memory_cuda_memory_allocator.initialize();
679 device_cuda_memory_allocator.initialize();
680 host_pinned_cuda_memory_allocator.initialize();
681}
682
683void finalizeCudaMemoryAllocators(ITraceMng* tm)
684{
685 unified_memory_cuda_memory_allocator.finalize(tm);
686 device_cuda_memory_allocator.finalize(tm);
687 host_pinned_cuda_memory_allocator.finalize(tm);
688}
689
690/*---------------------------------------------------------------------------*/
691/*---------------------------------------------------------------------------*/
692
693} // namespace Arcane::Accelerator::Cuda
694
695/*---------------------------------------------------------------------------*/
696/*---------------------------------------------------------------------------*/
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Classe commune pour gérer l'allocation par bloc.
bool m_do_block_allocate
Indique si l'allocation en utilisant m_block_size.
std::atomic< Int32 > m_nb_allocate
Nombre d'allocations.
std::atomic< Int32 > m_nb_unaligned_allocate
Nombre d'allocations non alignées.
Int64 m_block_size
Taille d'un bloc. L'allocation sera un multiple de cette taille.
void freeMemory(void *ptr, size_t size) override
Libère le bloc situé à l'adresse address contenant size octets.
void * allocateMemory(size_t size) override
Alloue un bloc pour size octets.
Classe de base d'un allocateur spécifique pour 'Cuda'.
eMemoryResource memoryResource() const override
Ressource mémoire fournie par l'allocateur.
eMemoryResource memoryResource() const override
Ressource mémoire fournie par l'allocateur.
bool m_use_hint_as_mainly_device
Si vrai, par défaut on considère toutes les allocations comme eMemoryLocationHint::MainlyDevice.
eMemoryResource memoryResource() const override
Ressource mémoire fournie par l'allocateur.
Int64 adjustedCapacity(MemoryAllocationArgs args, Int64 wanted_capacity, Int64 element_size) const override
Ajuste la capacité suivant la taille d'élément.
Informations sur une zone mémoire allouée.
static std::optional< Int32 > tryParseFromEnvironment(StringView s, bool throw_if_invalid)
Definition Convert.cc:122
Exception lorsqu'une erreur fatale est survenue.
Interface du gestionnaire de traces.
virtual TraceMessage info()=0
Flot pour un message d'information.
Classe contenant des informations pour spécialiser les allocations.
Flot de sortie lié à une String.
Chaîne de caractères unicode.
bool null() const
Retourne true si la chaîne est nulle.
Definition String.cc:305
Interface d'un allocateur pour un MemoryPool.
Definition MemoryPool.h:39
ARCCORE_BASE_EXPORT String getStackTrace()
Retourne une chaîne de caractere contenant la pile d'appel.
Int64 getPageSize()
Taille des pages du système hôte en octets.
std::int64_t Int64
Type entier signé sur 64 bits.
eMemoryLocationHint
Indices sur la localisation mémoire attendue.
@ MainlyHost
Indique que la donnée sera plutôt utilisée sur CPU.
@ HostAndDeviceMostlyRead
Indique que la donnée sera utilisée à la fois sur accélérateur et sur CPU et qu'elle ne sera pas souv...
@ MainlyDevice
Indique que la donnée sera plutôt utilisée sur accélérateur.
eMemoryResource
Liste des ressources mémoire disponibles.
@ HostPinned
Alloue sur l'hôte.
@ UnifiedMemory
Alloue en utilisant la mémoire unifiée.
@ Device
Alloue sur le device.