Arcane  v3.16.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
CudaAccelerator.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* CudaAccelerator.cc (C) 2000-2024 */
9/* */
10/* Backend 'CUDA' pour les accélérateurs. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/accelerator/cuda/CudaAccelerator.h"
15
16#include "arcane/utils/PlatformUtils.h"
17#include "arcane/utils/Array.h"
18#include "arcane/utils/TraceInfo.h"
19#include "arcane/utils/NotSupportedException.h"
20#include "arcane/utils/FatalErrorException.h"
21#include "arcane/utils/ValueConvert.h"
22#include "arcane/utils/IMemoryAllocator.h"
23#include "arcane/utils/OStringStream.h"
24#include "arcane/utils/ITraceMng.h"
25#include "arcane/utils/internal/MemoryPool.h"
26
27#include "arcane/accelerator/core/internal/MemoryTracer.h"
28
29#include <iostream>
30
31/*---------------------------------------------------------------------------*/
32/*---------------------------------------------------------------------------*/
33
34namespace Arcane::Accelerator::Cuda
35{
36using namespace Arcane::impl;
37using namespace Arccore;
38
39/*---------------------------------------------------------------------------*/
40/*---------------------------------------------------------------------------*/
41
43enum class MemoryPoolFlags
44{
45 UVM = 1,
46 Device = 2,
47 HostPinned = 4
48};
49
50/*---------------------------------------------------------------------------*/
51/*---------------------------------------------------------------------------*/
52
53void arcaneCheckCudaErrors(const TraceInfo& ti, cudaError_t e)
54{
55 if (e != cudaSuccess)
56 ARCANE_FATAL("CUDA Error trace={0} e={1} str={2}", ti, e, cudaGetErrorString(e));
57}
58
59/*---------------------------------------------------------------------------*/
60/*---------------------------------------------------------------------------*/
61
62void arcaneCheckCudaErrorsNoThrow(const TraceInfo& ti, cudaError_t e)
63{
64 if (e == cudaSuccess)
65 return;
66 String str = String::format("CUDA Error trace={0} e={1} str={2}", ti, e, cudaGetErrorString(e));
67 FatalErrorException ex(ti, str);
68 ex.write(std::cerr);
69}
70
71/*---------------------------------------------------------------------------*/
72/*---------------------------------------------------------------------------*/
73
74/*---------------------------------------------------------------------------*/
75/*---------------------------------------------------------------------------*/
88{
89 public:
90
91 void initialize(Int64 block_size, bool do_block_alloc)
92 {
93 m_block_size = block_size;
94 if (m_block_size <= 0)
95 m_block_size = 128;
96 m_do_block_allocate = do_block_alloc;
97 }
98
99 void dumpStats(std::ostream& ostr, const String& name)
100 {
101 ostr << "Allocator '" << name << "' : nb_allocate=" << m_nb_allocate
102 << " nb_unaligned=" << m_nb_unaligned_allocate
103 << "\n";
104 }
105
106 Int64 adjustedCapacity(Int64 wanted_capacity, Int64 element_size) const
107 {
108 const bool do_page = m_do_block_allocate;
109 if (!do_page)
110 return wanted_capacity;
111 // Alloue un multiple de la taille d'un bloc
112 // Pour la mémoire unifiée, la taille de bloc est une page mémoire.
113 // Comme les transfers de la mémoire unifiée se font par page,
114 // cela permet de détecter quelles allocations provoquent le transfert.
115 // On se débrouille aussi pour limiter les différentes taille
116 // de bloc alloué pour éviter d'avoir trop de blocs de taille
117 // différente pour que l'éventuel MemoryPool ne contienne trop
118 // de valeurs.
119 Int64 orig_capacity = wanted_capacity;
120 Int64 new_size = orig_capacity * element_size;
121 Int64 block_size = m_block_size;
122 Int64 nb_iter = 4 + (4096 / block_size);
123 for (Int64 i = 0; i < nb_iter; ++i) {
124 if (new_size >= (4 * block_size))
125 block_size *= 4;
126 else
127 break;
128 }
129 new_size = _computeNextMultiple(new_size, block_size);
130 wanted_capacity = new_size / element_size;
131 if (wanted_capacity < orig_capacity)
132 wanted_capacity = orig_capacity;
133 return wanted_capacity;
134 }
135
136 void doAllocate(void* ptr, [[maybe_unused]] size_t new_size)
137 {
140 uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
141 if ((addr % m_block_size) != 0) {
143 }
144 }
145 }
146
147 private:
148
150 Int64 m_block_size = 128;
154 std::atomic<Int32> m_nb_allocate = 0;
156 std::atomic<Int32> m_nb_unaligned_allocate = 0;
157
158 private:
159
160 // Calcule la plus petite valeur de \a multiple de \a multiple
161 static Int64 _computeNextMultiple(Int64 n, Int64 multiple)
162 {
163 Int64 new_n = n / multiple;
164 if ((n % multiple) != 0)
165 ++new_n;
166 return (new_n * multiple);
167 }
168};
169
170/*---------------------------------------------------------------------------*/
171/*---------------------------------------------------------------------------*/
175class CudaMemoryAllocatorBase
176: public Arccore::AlignedMemoryAllocator3
177{
178 public:
179
180 using BaseClass = Arccore::AlignedMemoryAllocator3;
181
182 public:
183
185 {
186 public:
187
188 virtual ~ConcreteAllocator() = default;
189
190 public:
191
192 virtual cudaError_t _allocate(void** ptr, size_t new_size) = 0;
193 virtual cudaError_t _deallocate(void* ptr) = 0;
194 };
195
196 class UnderlyingAllocator
197 : public IMemoryPoolAllocator
198 {
199 public:
200
201 explicit UnderlyingAllocator(CudaMemoryAllocatorBase* v)
202 : m_base(v)
203 {
204 }
205
206 public:
207
208 void* allocateMemory(size_t size) override
209 {
210 void* out = nullptr;
211 ARCANE_CHECK_CUDA(m_base->m_concrete_allocator->_allocate(&out, size));
212 m_base->m_block_wrapper.doAllocate(out, size);
213 return out;
214 }
215 void freeMemory(void* ptr, [[maybe_unused]] size_t size) override
216 {
217 ARCANE_CHECK_CUDA_NOTHROW(m_base->m_concrete_allocator->_deallocate(ptr));
218 }
219
220 public:
221
222 CudaMemoryAllocatorBase* m_base = nullptr;
223 };
224
225 public:
226
227 CudaMemoryAllocatorBase(const String& allocator_name, ConcreteAllocator* concrete_allocator)
228 : AlignedMemoryAllocator3(128)
229 , m_concrete_allocator(concrete_allocator)
230 , m_direct_sub_allocator(this)
231 , m_memory_pool(&m_direct_sub_allocator, allocator_name)
232 , m_sub_allocator(&m_direct_sub_allocator)
233 , m_allocator_name(allocator_name)
234 {
235 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_MEMORY_PRINT_LEVEL", true))
236 m_print_level = v.value();
237 }
238
239 ~CudaMemoryAllocatorBase()
240 {
241 }
242
243 public:
244
245 void finalize(ITraceMng* tm)
246 {
247 if (m_print_level >= 1) {
248 OStringStream ostr;
249 if (m_use_memory_pool) {
250 m_memory_pool.dumpStats(ostr());
251 m_memory_pool.dumpFreeMap(ostr());
252 }
253 ostr() << "Allocator '" << m_allocator_name << "' nb_realloc=" << m_nb_reallocate
254 << " realloc_copy=" << m_reallocate_size << "\n";
255 m_block_wrapper.dumpStats(ostr(), m_allocator_name);
256 if (tm)
257 tm->info() << ostr.str();
258 else
259 std::cout << ostr.str();
260 }
261
262 m_memory_pool.freeCachedMemory();
263 }
264
265 public:
266
267 bool hasRealloc(MemoryAllocationArgs) const final { return true; }
268 AllocatedMemoryInfo allocate(MemoryAllocationArgs args, Int64 new_size) final
269 {
270 void* out = m_sub_allocator->allocateMemory(new_size);
271 Int64 a = reinterpret_cast<Int64>(out);
272 if ((a % 128) != 0)
273 ARCANE_FATAL("Bad alignment for CUDA allocator: offset={0}", (a % 128));
274 m_tracer.traceAllocate(out, new_size, args);
275 _applyHint(out, new_size, args);
276 return { out, new_size };
277 }
278 AllocatedMemoryInfo reallocate(MemoryAllocationArgs args, AllocatedMemoryInfo current_info, Int64 new_size) final
279 {
280 ++m_nb_reallocate;
281 Int64 current_size = current_info.size();
282 m_reallocate_size += current_size;
283 String array_name = args.arrayName();
284 const bool do_print = (m_print_level >= 2);
285 if (do_print) {
286 std::cout << "Reallocate allocator=" << m_allocator_name
287 << " current_size=" << current_size
288 << " current_capacity=" << current_info.capacity()
289 << " new_capacity=" << new_size
290 << " ptr=" << current_info.baseAddress();
291 if (array_name.null() && m_print_level >= 3) {
292 std::cout << " stack=" << platform::getStackTrace();
293 }
294 else {
295 std::cout << " name=" << array_name;
296 if (m_print_level >= 4)
297 std::cout << " stack=" << platform::getStackTrace();
298 }
299 std::cout << "\n";
300 }
301 if (m_use_memory_pool)
302 _removeHint(current_info.baseAddress(), current_size, args);
303 AllocatedMemoryInfo a = allocate(args, new_size);
304 // TODO: supprimer les Hint après le deallocate car la zone mémoire peut être réutilisée.
305 ARCANE_CHECK_CUDA(cudaMemcpy(a.baseAddress(), current_info.baseAddress(), current_size, cudaMemcpyDefault));
306 deallocate(args, current_info);
307 return a;
308 }
309 void deallocate(MemoryAllocationArgs args, AllocatedMemoryInfo mem_info) final
310 {
311 void* ptr = mem_info.baseAddress();
312 size_t mem_size = mem_info.capacity();
313 if (m_use_memory_pool)
314 _removeHint(ptr, mem_size, args);
315 // Ne lève pas d'exception en cas d'erreurs lors de la désallocation
316 // car elles ont souvent lieu dans les destructeurs et cela provoque
317 // un arrêt du code par std::terminate().
318 m_tracer.traceDeallocate(mem_info, args);
319 m_sub_allocator->freeMemory(ptr, mem_size);
320 }
321
322 Int64 adjustedCapacity(MemoryAllocationArgs args, Int64 wanted_capacity, Int64 element_size) const final
323 {
324 wanted_capacity = AlignedMemoryAllocator3::adjustedCapacity(args, wanted_capacity, element_size);
325 return m_block_wrapper.adjustedCapacity(wanted_capacity, element_size);
326 }
327
328 protected:
329
330 virtual void _applyHint([[maybe_unused]] void* ptr, [[maybe_unused]] size_t new_size,
331 [[maybe_unused]] MemoryAllocationArgs args) {}
332 virtual void _removeHint([[maybe_unused]] void* ptr, [[maybe_unused]] size_t new_size,
333 [[maybe_unused]] MemoryAllocationArgs args) {}
334
335 private:
336
337 impl::MemoryTracerWrapper m_tracer;
338 std::unique_ptr<ConcreteAllocator> m_concrete_allocator;
339 UnderlyingAllocator m_direct_sub_allocator;
340 MemoryPool m_memory_pool;
341 IMemoryPoolAllocator* m_sub_allocator = nullptr;
342 bool m_use_memory_pool = false;
343 String m_allocator_name;
344 std::atomic<Int32> m_nb_reallocate = 0;
345 std::atomic<Int64> m_reallocate_size = 0;
346 Int32 m_print_level = 0;
347
348 protected:
349
350 BlockAllocatorWrapper m_block_wrapper;
351
352 protected:
353
354 void _setTraceLevel(Int32 v) { m_tracer.setTraceLevel(v); }
355 // IMPORTANT: doit être appelé avant toute allocation et ne plus être modifié ensuite.
356 void _setUseMemoryPool(bool is_used)
357 {
358 IMemoryPoolAllocator* mem_pool = &m_memory_pool;
359 IMemoryPoolAllocator* direct = &m_direct_sub_allocator;
360 m_sub_allocator = (is_used) ? mem_pool : direct;
361 m_use_memory_pool = is_used;
362 if (is_used) {
363 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_MEMORY_POOL_MAX_BLOCK_SIZE", true)) {
364 if (v.value() < 0)
365 ARCANE_FATAL("Invalid value '{0}' for memory pool max block size");
366 size_t block_size = static_cast<size_t>(v.value());
367 m_memory_pool.setMaxCachedBlockSize(block_size);
368 }
369 }
370 }
371};
372
373/*---------------------------------------------------------------------------*/
374/*---------------------------------------------------------------------------*/
382class UnifiedMemoryCudaMemoryAllocator
383: public CudaMemoryAllocatorBase
384{
385 public:
386
387 class Allocator
388 : public ConcreteAllocator
389 {
390 public:
391
392 Allocator()
393 {
394 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUDA_USE_ALLOC_ATS", true))
395 m_use_ats = v.value();
396 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUDA_MEMORY_HINT_ON_DEVICE", true))
397 m_use_hint_as_mainly_device = (v.value() != 0);
398 }
399
400 cudaError_t _deallocate(void* ptr) final
401 {
402 if (m_use_ats) {
403 ::free(ptr);
404 return cudaSuccess;
405 }
406 //std::cout << "CUDA_MANAGED_FREE ptr=" << ptr << "\n";
407 return ::cudaFree(ptr);
408 }
409
410 cudaError_t _allocate(void** ptr, size_t new_size) final
411 {
412 if (m_use_ats) {
413 *ptr = ::aligned_alloc(128, new_size);
414 }
415 else {
416 auto r = ::cudaMallocManaged(ptr, new_size, cudaMemAttachGlobal);
417 //std::cout << "CUDA_MANAGED_MALLOC ptr=" << (*ptr) << " size=" << new_size << "\n";
418 //if (new_size < 4000)
419 //std::cout << "STACK=" << platform::getStackTrace() << "\n";
420
421 if (r != cudaSuccess)
422 return r;
423
424 // Si demandé, indique qu'on préfère allouer sur le GPU.
425 // NOTE: Dans ce cas, on récupère le device actuel pour positionner la localisation
426 // préférée. Dans le cas où on utilise MemoryPool, cette allocation ne sera effectuée
427 // qu'une seule fois. Si le device par défaut pour un thread change au cours du calcul
428 // il y aura une incohérence. Pour éviter cela, on pourrait faire un cudaMemAdvise()
429 // pour chaque allocation (via _applyHint()) mais ces opérations sont assez couteuses
430 // et s'il y a beaucoup d'allocation il peut en résulter une perte de performance.
432 int device_id = 0;
433 void* p = *ptr;
434 cudaGetDevice(&device_id);
435 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetPreferredLocation, device_id));
436 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetAccessedBy, cudaCpuDeviceId));
437 }
438 }
439
440 return cudaSuccess;
441 }
442
443 public:
444
445 bool m_use_ats = false;
448 };
449
450 public:
451
452 UnifiedMemoryCudaMemoryAllocator()
453 : CudaMemoryAllocatorBase("UnifiedMemoryCudaMemory", new Allocator())
454 {
455 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUDA_MALLOC_TRACE", true))
456 _setTraceLevel(v.value());
457 }
458
459 void initialize()
460 {
461 bool do_page_allocate = true;
462 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUDA_UM_PAGE_ALLOC", true))
463 do_page_allocate = (v.value() != 0);
464 Int64 page_size = platform::getPageSize();
465 m_block_wrapper.initialize(page_size, do_page_allocate);
466
467 bool use_memory_pool = false;
468 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_MEMORY_POOL", true))
469 use_memory_pool = (v.value() & static_cast<int>(MemoryPoolFlags::UVM)) != 0;
470 _setUseMemoryPool(use_memory_pool);
471 }
472
473 public:
474
475 void notifyMemoryArgsChanged([[maybe_unused]] MemoryAllocationArgs old_args,
476 MemoryAllocationArgs new_args, AllocatedMemoryInfo ptr) final
477 {
478 void* p = ptr.baseAddress();
479 Int64 s = ptr.capacity();
480 if (p && s > 0)
481 _applyHint(ptr.baseAddress(), ptr.size(), new_args);
482 }
484
485 protected:
486
487 void _applyHint(void* p, size_t new_size, MemoryAllocationArgs args)
488 {
489 eMemoryLocationHint hint = args.memoryLocationHint();
490 // Utilise le device actif pour positionner le GPU par défaut
491 // On ne le fait que si le \a hint le nécessite pour éviter d'appeler
492 // cudaGetDevice() à chaque fois.
493 int device_id = 0;
495 cudaGetDevice(&device_id);
496 }
497 //std::cout << "SET_MEMORY_HINT name=" << args.arrayName() << " size=" << new_size << " hint=" << (int)hint << "\n";
499 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetPreferredLocation, device_id));
500 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetAccessedBy, cudaCpuDeviceId));
501 }
503 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId));
504 //ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetAccessedBy, 0));
505 }
507 ARCANE_CHECK_CUDA(cudaMemAdvise(p, new_size, cudaMemAdviseSetReadMostly, device_id));
508 }
509 }
510 void _removeHint(void* p, size_t size, MemoryAllocationArgs args)
511 {
512 eMemoryLocationHint hint = args.memoryLocationHint();
513 if (hint == eMemoryLocationHint::None)
514 return;
515 int device_id = 0;
516 ARCANE_CHECK_CUDA(cudaMemAdvise(p, size, cudaMemAdviseUnsetReadMostly, device_id));
517 }
518
519 private:
520
521 bool m_use_ats = false;
522};
523
524/*---------------------------------------------------------------------------*/
525/*---------------------------------------------------------------------------*/
526
527class HostPinnedCudaMemoryAllocator
528: public CudaMemoryAllocatorBase
529{
530 public:
531
533 : public ConcreteAllocator
534 {
535 public:
536
537 cudaError_t _allocate(void** ptr, size_t new_size) final
538 {
539 return ::cudaMallocHost(ptr, new_size);
540 }
541 cudaError_t _deallocate(void* ptr) final
542 {
543 return ::cudaFreeHost(ptr);
544 }
545 };
546
547 public:
548
549 HostPinnedCudaMemoryAllocator()
550 : CudaMemoryAllocatorBase("HostPinnedCudaMemory", new Allocator())
551 {
552 }
553
554 public:
555
556 void initialize()
557 {
558 bool use_memory_pool = false;
559 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_MEMORY_POOL", true))
560 use_memory_pool = (v.value() & static_cast<int>(MemoryPoolFlags::HostPinned)) != 0;
561 _setUseMemoryPool(use_memory_pool);
562 m_block_wrapper.initialize(128, use_memory_pool);
563 }
565};
566
567/*---------------------------------------------------------------------------*/
568/*---------------------------------------------------------------------------*/
569
570class DeviceCudaMemoryAllocator
571: public CudaMemoryAllocatorBase
572{
573
574 class Allocator
575 : public ConcreteAllocator
576 {
577 public:
578
579 Allocator()
580 {
581 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUDA_USE_ALLOC_ATS", true))
582 m_use_ats = v.value();
583 }
584
585 cudaError_t _allocate(void** ptr, size_t new_size) final
586 {
587 if (m_use_ats) {
588 // FIXME: it does not work on WIN32
589 *ptr = std::aligned_alloc(128, new_size);
590 if (*ptr)
591 return cudaSuccess;
592 return cudaErrorMemoryAllocation;
593 }
594 cudaError_t r = ::cudaMalloc(ptr, new_size);
595 //std::cout << "ALLOCATE_DEVICE ptr=" << (*ptr) << " size=" << new_size << " r=" << (int)r << "\n";
596 return r;
597 }
598 cudaError_t _deallocate(void* ptr) final
599 {
600 if (m_use_ats) {
601 std::free(ptr);
602 return cudaSuccess;
603 }
604 //std::cout << "FREE_DEVICE ptr=" << ptr << "\n";
605 return ::cudaFree(ptr);
606 }
607
608 private:
609
610 bool m_use_ats = false;
611 };
612
613 public:
614
615 DeviceCudaMemoryAllocator()
616 : CudaMemoryAllocatorBase("DeviceCudaMemoryAllocator", new Allocator())
617 {
618 }
619
620 public:
621
622 void initialize()
623 {
624 bool use_memory_pool = false;
625 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_MEMORY_POOL", true))
626 use_memory_pool = (v.value() & static_cast<int>(MemoryPoolFlags::Device)) != 0;
627 _setUseMemoryPool(use_memory_pool);
628 m_block_wrapper.initialize(128, use_memory_pool);
629 }
631};
632
633/*---------------------------------------------------------------------------*/
634/*---------------------------------------------------------------------------*/
635
636namespace
637{
638 UnifiedMemoryCudaMemoryAllocator unified_memory_cuda_memory_allocator;
639 HostPinnedCudaMemoryAllocator host_pinned_cuda_memory_allocator;
640 DeviceCudaMemoryAllocator device_cuda_memory_allocator;
641} // namespace
642
643/*---------------------------------------------------------------------------*/
644/*---------------------------------------------------------------------------*/
645
647getCudaMemoryAllocator()
648{
649 return &unified_memory_cuda_memory_allocator;
650}
651
653getCudaDeviceMemoryAllocator()
654{
655 return &device_cuda_memory_allocator;
656}
657
659getCudaUnifiedMemoryAllocator()
660{
661 return &unified_memory_cuda_memory_allocator;
662}
663
665getCudaHostPinnedMemoryAllocator()
666{
667 return &host_pinned_cuda_memory_allocator;
668}
669
670/*---------------------------------------------------------------------------*/
671/*---------------------------------------------------------------------------*/
672
673void initializeCudaMemoryAllocators()
674{
675 unified_memory_cuda_memory_allocator.initialize();
676 device_cuda_memory_allocator.initialize();
677 host_pinned_cuda_memory_allocator.initialize();
678}
679
680void finalizeCudaMemoryAllocators(ITraceMng* tm)
681{
682 unified_memory_cuda_memory_allocator.finalize(tm);
683 device_cuda_memory_allocator.finalize(tm);
684 host_pinned_cuda_memory_allocator.finalize(tm);
685}
686
687/*---------------------------------------------------------------------------*/
688/*---------------------------------------------------------------------------*/
689
690} // namespace Arcane::Accelerator::Cuda
691
692/*---------------------------------------------------------------------------*/
693/*---------------------------------------------------------------------------*/
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Classe commune pour gérer l'allocation par bloc.
bool m_do_block_allocate
Indique si l'allocation en utilisant m_block_size.
std::atomic< Int32 > m_nb_allocate
Nombre d'allocations.
std::atomic< Int32 > m_nb_unaligned_allocate
Nombre d'allocations non alignées.
Int64 m_block_size
Taille d'un bloc. L'allocation sera un multiple de cette taille.
void freeMemory(void *ptr, size_t size) override
Libère le bloc situé à l'adresse address contenant size octets.
void * allocateMemory(size_t size) override
Alloue un bloc pour size octets.
Classe de base d'un allocateur spécifique pour 'Cuda'.
eMemoryResource memoryResource() const override
Ressource mémoire fournie par l'allocateur.
eMemoryResource memoryResource() const override
Ressource mémoire fournie par l'allocateur.
bool m_use_hint_as_mainly_device
Si vrai, par défaut on considère toutes les allocations comme eMemoryLocationHint::MainlyDevice.
eMemoryResource memoryResource() const override
Ressource mémoire fournie par l'allocateur.
Int64 adjustedCapacity(MemoryAllocationArgs args, Int64 wanted_capacity, Int64 element_size) const override
Ajuste la capacité suivant la taille d'élément.
Informations sur une zone mémoire allouée.
static std::optional< Int32 > tryParseFromEnvironment(StringView s, bool throw_if_invalid)
Definition Convert.cc:122
Exception lorsqu'une erreur fatale est survenue.
Interface du gestionnaire de traces.
virtual TraceMessage info()=0
Flot pour un message d'information.
Classe contenant des informations pour spécialiser les allocations.
Flot de sortie lié à une String.
Chaîne de caractères unicode.
bool null() const
Retourne true si la chaîne est nulle.
Definition String.cc:304
Interface d'un allocateur pour un MemoryPool.
Definition MemoryPool.h:39
ARCCORE_BASE_EXPORT String getStackTrace()
Retourne une chaîne de caractere contenant la pile d'appel.
Int64 getPageSize()
Taille des pages du système hôte en octets.
std::int64_t Int64
Type entier signé sur 64 bits.
eMemoryLocationHint
Indices sur la localisation mémoire attendue.
@ MainlyHost
Indique que la donnée sera plutôt utilisée sur CPU.
@ HostAndDeviceMostlyRead
Indique que la donnée sera utilisée à la fois sur accélérateur et sur CPU et qu'elle ne sera pas souv...
@ MainlyDevice
Indique que la donnée sera plutôt utilisée sur accélérateur.
eMemoryResource
Liste des ressources mémoire disponibles.
@ HostPinned
Alloue sur l'hôte.
@ UnifiedMemory
Alloue en utilisant la mémoire unifiée.
@ Device
Alloue sur le device.