Arcane  v3.16.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
CudaAcceleratorRuntime.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* CudaAcceleratorRuntime.cc (C) 2000-2025 */
9/* */
10/* Runtime pour 'Cuda'. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/accelerator/cuda/CudaAccelerator.h"
15
16#include "arcane/utils/PlatformUtils.h"
17#include "arcane/utils/Array.h"
18#include "arcane/utils/TraceInfo.h"
19#include "arcane/utils/NotSupportedException.h"
20#include "arcane/utils/FatalErrorException.h"
21#include "arcane/utils/NotImplementedException.h"
22#include "arcane/utils/IMemoryRessourceMng.h"
23#include "arcane/utils/MemoryView.h"
24#include "arcane/utils/OStringStream.h"
25#include "arcane/utils/ValueConvert.h"
26#include "arcane/utils/internal/IMemoryRessourceMngInternal.h"
27
28#include "arcane/accelerator/core/RunQueueBuildInfo.h"
29#include "arcane/accelerator/core/Memory.h"
30#include "arcane/accelerator/core/DeviceInfoList.h"
31
32#include "arcane/accelerator/core/internal/IRunnerRuntime.h"
33#include "arcane/accelerator/core/internal/RegisterRuntimeInfo.h"
34#include "arcane/accelerator/core/internal/RunCommandImpl.h"
35#include "arcane/accelerator/core/internal/IRunQueueStream.h"
36#include "arcane/accelerator/core/internal/IRunQueueEventImpl.h"
37#include "arcane/accelerator/core/PointerAttribute.h"
38#include "arcane/accelerator/core/RunQueue.h"
39#include "arcane/accelerator/core/DeviceMemoryInfo.h"
40#include "arcane/accelerator/core/NativeStream.h"
41
42#include "arcane/accelerator/cuda/runtime/internal/Cupti.h"
43
44#include <iostream>
45
46#include <cuda.h>
47
48#ifdef ARCANE_HAS_CUDA_NVTOOLSEXT
49#include <nvtx3/nvToolsExt.h>
50#endif
51
52using namespace Arccore;
53
54namespace Arcane::Accelerator::Cuda
55{
56namespace
57{
58 Int32 global_cupti_flush = 0;
59 CuptiInfo global_cupti_info;
60} // namespace
61
62/*---------------------------------------------------------------------------*/
63/*---------------------------------------------------------------------------*/
64
65void arcaneCheckCudaErrors(const TraceInfo& ti, CUresult e)
66{
67 if (e == CUDA_SUCCESS)
68 return;
69 const char* error_name = nullptr;
70 CUresult e2 = cuGetErrorName(e, &error_name);
71 if (e2 != CUDA_SUCCESS)
72 error_name = "Unknown";
73
74 const char* error_message = nullptr;
75 CUresult e3 = cuGetErrorString(e, &error_message);
76 if (e3 != CUDA_SUCCESS)
77 error_message = "Unknown";
78
79 ARCANE_FATAL("CUDA Error trace={0} e={1} name={2} message={3}",
80 ti, e, error_name, error_message);
81}
82
83/*---------------------------------------------------------------------------*/
84/*---------------------------------------------------------------------------*/
85
86class CudaRunQueueStream
88{
89 public:
90
91 CudaRunQueueStream(impl::IRunnerRuntime* runtime, const RunQueueBuildInfo& bi)
92 : m_runtime(runtime)
93 {
94 if (bi.isDefault())
95 ARCANE_CHECK_CUDA(cudaStreamCreate(&m_cuda_stream));
96 else {
97 int priority = bi.priority();
98 ARCANE_CHECK_CUDA(cudaStreamCreateWithPriority(&m_cuda_stream, cudaStreamDefault, priority));
99 }
100 }
101 ~CudaRunQueueStream() override
102 {
103 ARCANE_CHECK_CUDA_NOTHROW(cudaStreamDestroy(m_cuda_stream));
104 }
105
106 public:
107
108 void notifyBeginLaunchKernel([[maybe_unused]] impl::RunCommandImpl& c) override
109 {
110#ifdef ARCANE_HAS_CUDA_NVTOOLSEXT
111 auto kname = c.kernelName();
112 if (kname.empty())
113 nvtxRangePush(c.traceInfo().name());
114 else
115 nvtxRangePush(kname.localstr());
116#endif
117 return m_runtime->notifyBeginLaunchKernel();
118 }
120 {
121#ifdef ARCANE_HAS_CUDA_NVTOOLSEXT
122 nvtxRangePop();
123#endif
124 return m_runtime->notifyEndLaunchKernel();
125 }
126 void barrier() override
127 {
128 ARCANE_CHECK_CUDA(cudaStreamSynchronize(m_cuda_stream));
129 if (global_cupti_flush > 0)
130 global_cupti_info.flush();
131 }
132 bool _barrierNoException() override
133 {
134 return (cudaStreamSynchronize(m_cuda_stream) != cudaSuccess);
135 }
136 void copyMemory(const MemoryCopyArgs& args) override
137 {
138 auto source_bytes = args.source().bytes();
139 auto r = cudaMemcpyAsync(args.destination().data(), source_bytes.data(),
140 source_bytes.size(), cudaMemcpyDefault, m_cuda_stream);
141 ARCANE_CHECK_CUDA(r);
142 if (!args.isAsync())
143 barrier();
144 }
145 void prefetchMemory(const MemoryPrefetchArgs& args) override
146 {
147 auto src = args.source().bytes();
148 if (src.size() == 0)
149 return;
150 DeviceId d = args.deviceId();
151 int device = cudaCpuDeviceId;
152 if (!d.isHost())
153 device = d.asInt32();
154 //std::cout << "PREFETCH device=" << device << " host(id)=" << cudaCpuDeviceId
155 // << " size=" << args.source().size() << " data=" << src.data() << "\n";
156 auto r = cudaMemPrefetchAsync(src.data(), src.size(), device, m_cuda_stream);
157 ARCANE_CHECK_CUDA(r);
158 if (!args.isAsync())
159 barrier();
160 }
162 {
163 return impl::NativeStream(&m_cuda_stream);
164 }
165
166 public:
167
168 cudaStream_t trueStream() const
169 {
170 return m_cuda_stream;
171 }
172
173 private:
174
175 impl::IRunnerRuntime* m_runtime;
176 cudaStream_t m_cuda_stream;
177};
178
179/*---------------------------------------------------------------------------*/
180/*---------------------------------------------------------------------------*/
181
182class CudaRunQueueEvent
184{
185 public:
186
187 explicit CudaRunQueueEvent(bool has_timer)
188 {
189 if (has_timer)
190 ARCANE_CHECK_CUDA(cudaEventCreate(&m_cuda_event));
191 else
192 ARCANE_CHECK_CUDA(cudaEventCreateWithFlags(&m_cuda_event, cudaEventDisableTiming));
193 }
194 ~CudaRunQueueEvent() override
195 {
196 ARCANE_CHECK_CUDA_NOTHROW(cudaEventDestroy(m_cuda_event));
197 }
198
199 public:
200
201 // Enregistre l'événement au sein d'une RunQueue
202 void recordQueue(impl::IRunQueueStream* stream) final
203 {
204 auto* rq = static_cast<CudaRunQueueStream*>(stream);
205 ARCANE_CHECK_CUDA(cudaEventRecord(m_cuda_event, rq->trueStream()));
206 }
207
208 void wait() final
209 {
210 ARCANE_CHECK_CUDA(cudaEventSynchronize(m_cuda_event));
211 }
212
213 void waitForEvent(impl::IRunQueueStream* stream) final
214 {
215 auto* rq = static_cast<CudaRunQueueStream*>(stream);
216 ARCANE_CHECK_CUDA(cudaStreamWaitEvent(rq->trueStream(), m_cuda_event, cudaEventWaitDefault));
217 }
218
219 Int64 elapsedTime(IRunQueueEventImpl* start_event) final
220 {
221 // NOTE: Les évènements doivent avoir été créé avec le timer actif
222 ARCANE_CHECK_POINTER(start_event);
223 auto* true_start_event = static_cast<CudaRunQueueEvent*>(start_event);
224 float time_in_ms = 0.0;
225
226 // TODO: regarder si nécessaire
227 // ARCANE_CHECK_CUDA(cudaEventSynchronize(m_cuda_event));
228
229 ARCANE_CHECK_CUDA(cudaEventElapsedTime(&time_in_ms, true_start_event->m_cuda_event, m_cuda_event));
230 double x = time_in_ms * 1.0e6;
231 Int64 nano_time = static_cast<Int64>(x);
232 return nano_time;
233 }
234
235 bool hasPendingWork() final
236 {
237 cudaError_t v = cudaEventQuery(m_cuda_event);
238 if (v == cudaErrorNotReady)
239 return true;
240 ARCANE_CHECK_CUDA(v);
241 return false;
242 }
243
244 private:
245
246 cudaEvent_t m_cuda_event;
247};
248
249/*---------------------------------------------------------------------------*/
250/*---------------------------------------------------------------------------*/
251
254{
255 public:
256
257 ~CudaRunnerRuntime() override = default;
258
259 public:
260
261 void notifyBeginLaunchKernel() override
262 {
263 ++m_nb_kernel_launched;
264 if (m_is_verbose)
265 std::cout << "BEGIN CUDA KERNEL!\n";
266 }
267 void notifyEndLaunchKernel() override
268 {
269 ARCANE_CHECK_CUDA(cudaGetLastError());
270 if (m_is_verbose)
271 std::cout << "END CUDA KERNEL!\n";
272 }
273 void barrier() override
274 {
275 ARCANE_CHECK_CUDA(cudaDeviceSynchronize());
276 }
277 eExecutionPolicy executionPolicy() const override
278 {
280 }
281 impl::IRunQueueStream* createStream(const RunQueueBuildInfo& bi) override
282 {
283 return new CudaRunQueueStream(this, bi);
284 }
285 impl::IRunQueueEventImpl* createEventImpl() override
286 {
287 return new CudaRunQueueEvent(false);
288 }
289 impl::IRunQueueEventImpl* createEventImplWithTimer() override
290 {
291 return new CudaRunQueueEvent(true);
292 }
293 void setMemoryAdvice(ConstMemoryView buffer, eMemoryAdvice advice, DeviceId device_id) override
294 {
295 auto v = buffer.bytes();
296 const void* ptr = v.data();
297 size_t count = v.size();
298 int device = device_id.asInt32();
299 cudaMemoryAdvise cuda_advise;
300
301 if (advice == eMemoryAdvice::MostlyRead)
302 cuda_advise = cudaMemAdviseSetReadMostly;
304 cuda_advise = cudaMemAdviseSetPreferredLocation;
305 else if (advice == eMemoryAdvice::AccessedByDevice)
306 cuda_advise = cudaMemAdviseSetAccessedBy;
307 else if (advice == eMemoryAdvice::PreferredLocationHost) {
308 cuda_advise = cudaMemAdviseSetPreferredLocation;
309 device = cudaCpuDeviceId;
310 }
311 else if (advice == eMemoryAdvice::AccessedByHost) {
312 cuda_advise = cudaMemAdviseSetAccessedBy;
313 device = cudaCpuDeviceId;
314 }
315 else
316 return;
317 //std::cout << "MEMADVISE p=" << ptr << " size=" << count << " advise = " << cuda_advise << " id = " << device << "\n";
318 ARCANE_CHECK_CUDA(cudaMemAdvise(ptr, count, cuda_advise, device));
319 }
320 void unsetMemoryAdvice(ConstMemoryView buffer, eMemoryAdvice advice, DeviceId device_id) override
321 {
322 auto v = buffer.bytes();
323 const void* ptr = v.data();
324 size_t count = v.size();
325 int device = device_id.asInt32();
326 cudaMemoryAdvise cuda_advise;
327
328 if (advice == eMemoryAdvice::MostlyRead)
329 cuda_advise = cudaMemAdviseUnsetReadMostly;
331 cuda_advise = cudaMemAdviseUnsetPreferredLocation;
332 else if (advice == eMemoryAdvice::AccessedByDevice)
333 cuda_advise = cudaMemAdviseUnsetAccessedBy;
334 else if (advice == eMemoryAdvice::PreferredLocationHost) {
335 cuda_advise = cudaMemAdviseUnsetPreferredLocation;
336 device = cudaCpuDeviceId;
337 }
338 else if (advice == eMemoryAdvice::AccessedByHost) {
339 cuda_advise = cudaMemAdviseUnsetAccessedBy;
340 device = cudaCpuDeviceId;
341 }
342 else
343 return;
344 ARCANE_CHECK_CUDA(cudaMemAdvise(ptr, count, cuda_advise, device));
345 }
346
347 void setCurrentDevice(DeviceId device_id) final
348 {
349 Int32 id = device_id.asInt32();
350 if (!device_id.isAccelerator())
351 ARCANE_FATAL("Device {0} is not an accelerator device", id);
352 ARCANE_CHECK_CUDA(cudaSetDevice(id));
353 }
354
355 const IDeviceInfoList* deviceInfoList() final { return &m_device_info_list; }
356
357 void startProfiling() override
358 {
359 global_cupti_info.start();
360 }
361
362 void stopProfiling() override
363 {
364 global_cupti_info.stop();
365 }
366
367 bool isProfilingActive() override
368 {
369 return global_cupti_info.isActive();
370 }
371
372 void getPointerAttribute(PointerAttribute& attribute, const void* ptr) override
373 {
374 cudaPointerAttributes ca;
375 ARCANE_CHECK_CUDA(cudaPointerGetAttributes(&ca, ptr));
376 // NOTE: le type Arcane 'ePointerMemoryType' a normalememt les mêmes valeurs
377 // que le type CUDA correspondant donc on peut faire un cast simple.
378 auto mem_type = static_cast<ePointerMemoryType>(ca.type);
379 _fillPointerAttribute(attribute, mem_type, ca.device,
380 ptr, ca.devicePointer, ca.hostPointer);
381 }
382
383 DeviceMemoryInfo getDeviceMemoryInfo(DeviceId device_id) override
384 {
385 int d = 0;
386 int wanted_d = device_id.asInt32();
387 ARCANE_CHECK_CUDA(cudaGetDevice(&d));
388 if (d != wanted_d)
389 ARCANE_CHECK_CUDA(cudaSetDevice(wanted_d));
390 size_t free_mem = 0;
391 size_t total_mem = 0;
392 ARCANE_CHECK_CUDA(cudaMemGetInfo(&free_mem, &total_mem));
393 if (d != wanted_d)
394 ARCANE_CHECK_CUDA(cudaSetDevice(d));
396 dmi.setFreeMemory(free_mem);
397 dmi.setTotalMemory(total_mem);
398 return dmi;
399 }
400
401 void pushProfilerRange(const String& name, Int32 color_rgb) override
402 {
403#ifdef ARCANE_HAS_CUDA_NVTOOLSEXT
404 if (color_rgb >= 0) {
405 // NOTE: Il faudrait faire: nvtxEventAttributes_t eventAttrib = { 0 };
406 // mais cela provoque pleins d'avertissement de type 'missing initializer for member'
407 nvtxEventAttributes_t eventAttrib;
408 std::memset(&eventAttrib, 0, sizeof(nvtxEventAttributes_t));
409 eventAttrib.version = NVTX_VERSION;
410 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
411 eventAttrib.colorType = NVTX_COLOR_ARGB;
412 eventAttrib.color = color_rgb;
413 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
414 eventAttrib.message.ascii = name.localstr();
415 nvtxRangePushEx(&eventAttrib);
416 }
417 else
418 nvtxRangePush(name.localstr());
419#endif
420 }
421 void popProfilerRange() override
422 {
423#ifdef ARCANE_HAS_CUDA_NVTOOLSEXT
424 nvtxRangePop();
425#endif
426 }
427
428 void finalize(ITraceMng* tm) override
429 {
430 finalizeCudaMemoryAllocators(tm);
431 }
432
433 public:
434
435 void fillDevices(bool is_verbose);
436
437 private:
438
439 Int64 m_nb_kernel_launched = 0;
440 bool m_is_verbose = false;
441 impl::DeviceInfoList m_device_info_list;
442};
443
444/*---------------------------------------------------------------------------*/
445/*---------------------------------------------------------------------------*/
446
447void CudaRunnerRuntime::
448fillDevices(bool is_verbose)
449{
450 int nb_device = 0;
451 ARCANE_CHECK_CUDA(cudaGetDeviceCount(&nb_device));
452 std::ostream& omain = std::cout;
453 if (is_verbose)
454 omain << "ArcaneCUDA: Initialize Arcane CUDA runtime nb_available_device=" << nb_device << "\n";
455 for (int i = 0; i < nb_device; ++i) {
456 cudaDeviceProp dp;
457 cudaGetDeviceProperties(&dp, i);
458 OStringStream ostr;
459 std::ostream& o = ostr.stream();
460 o << "Device " << i << " name=" << dp.name << "\n";
461 o << " computeCapability = " << dp.major << "." << dp.minor << "\n";
462 o << " totalGlobalMem = " << dp.totalGlobalMem << "\n";
463 o << " sharedMemPerBlock = " << dp.sharedMemPerBlock << "\n";
464 o << " regsPerBlock = " << dp.regsPerBlock << "\n";
465 o << " warpSize = " << dp.warpSize << "\n";
466 o << " memPitch = " << dp.memPitch << "\n";
467 o << " maxThreadsPerBlock = " << dp.maxThreadsPerBlock << "\n";
468 o << " totalConstMem = " << dp.totalConstMem << "\n";
469 o << " clockRate = " << dp.clockRate << "\n";
470 o << " deviceOverlap = " << dp.deviceOverlap << "\n";
471 o << " cooperativeLaunch = " << dp.cooperativeLaunch << "\n";
472 o << " multiProcessorCount = " << dp.multiProcessorCount << "\n";
473 o << " kernelExecTimeoutEnabled = " << dp.kernelExecTimeoutEnabled << "\n";
474 o << " integrated = " << dp.integrated << "\n";
475 o << " canMapHostMemory = " << dp.canMapHostMemory << "\n";
476 o << " computeMode = " << dp.computeMode << "\n";
477 o << " directManagedMemAccessFromHost = " << dp.directManagedMemAccessFromHost << "\n";
478 o << " hostNativeAtomicSupported = " << dp.hostNativeAtomicSupported << "\n";
479 o << " pageableMemoryAccess = " << dp.pageableMemoryAccess << "\n";
480 o << " concurrentManagedAccess = " << dp.concurrentManagedAccess << "\n";
481 o << " pageableMemoryAccessUsesHostPageTables = " << dp.pageableMemoryAccessUsesHostPageTables << "\n";
482 o << " hostNativeAtomicSupported = " << dp.hostNativeAtomicSupported << "\n";
483 o << " maxThreadsDim = " << dp.maxThreadsDim[0] << " " << dp.maxThreadsDim[1]
484 << " " << dp.maxThreadsDim[2] << "\n";
485 o << " maxGridSize = " << dp.maxGridSize[0] << " " << dp.maxGridSize[1]
486 << " " << dp.maxGridSize[2] << "\n";
487 {
488 int least_val = 0;
489 int greatest_val = 0;
490 ARCANE_CHECK_CUDA(cudaDeviceGetStreamPriorityRange(&least_val, &greatest_val));
491 o << " leastPriority = " << least_val << " greatestPriority = " << greatest_val << "\n";
492 }
493 {
494 CUdevice device;
495 ARCANE_CHECK_CUDA(cuDeviceGet(&device, i));
496 CUuuid device_uuid;
497 ARCANE_CHECK_CUDA(cuDeviceGetUuid(&device_uuid, device));
498 o << " deviceUuid=";
499 impl::printUUID(o, device_uuid.bytes);
500 o << "\n";
501 }
502 String description(ostr.str());
503 if (is_verbose)
504 omain << description;
505
506 DeviceInfo device_info;
507 device_info.setDescription(description);
508 device_info.setDeviceId(DeviceId(i));
509 device_info.setName(dp.name);
510 m_device_info_list.addDevice(device_info);
511 }
512
513 Int32 global_cupti_level = 0;
514
515 // Regarde si on active Cupti
516 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUPTI_LEVEL", true))
517 global_cupti_level = v.value();
518 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUPTI_FLUSH", true))
519 global_cupti_flush = v.value();
520 bool do_print_cupti = true;
521 if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_CUPTI_PRINT", true))
522 do_print_cupti = (v.value() != 0);
523
524 if (global_cupti_level > 0) {
525#ifndef ARCANE_HAS_CUDA_CUPTI
526 ARCANE_FATAL("Trying to enable CUPTI but Arcane is not compiled with cupti support");
527#endif
528 global_cupti_info.init(global_cupti_level, do_print_cupti);
529 global_cupti_info.start();
530 }
531}
532
533/*---------------------------------------------------------------------------*/
534/*---------------------------------------------------------------------------*/
535
537: public IMemoryCopier
538{
539 void copy(ConstMemoryView from, [[maybe_unused]] eMemoryRessource from_mem,
540 MutableMemoryView to, [[maybe_unused]] eMemoryRessource to_mem,
541 const RunQueue* queue) override
542 {
543 if (queue) {
544 queue->copyMemory(MemoryCopyArgs(to.bytes(), from.bytes()).addAsync(queue->isAsync()));
545 return;
546 }
547 // 'cudaMemcpyDefault' sait automatiquement ce qu'il faut faire en tenant
548 // uniquement compte de la valeur des pointeurs. Il faudrait voir si
549 // utiliser \a from_mem et \a to_mem peut améliorer les performances.
550 ARCANE_CHECK_CUDA(cudaMemcpy(to.data(), from.data(), from.bytes().size(), cudaMemcpyDefault));
551 }
552};
553
554/*---------------------------------------------------------------------------*/
555/*---------------------------------------------------------------------------*/
556
557} // End namespace Arcane::Accelerator::Cuda
558
559namespace
560{
562Arcane::Accelerator::Cuda::CudaMemoryCopier global_cuda_memory_copier;
563} // namespace
564
565/*---------------------------------------------------------------------------*/
566/*---------------------------------------------------------------------------*/
567
568// Cette fonction est le point d'entrée utilisé lors du chargement
569// dynamique de cette bibliothèque
570extern "C" ARCANE_EXPORT void
571arcaneRegisterAcceleratorRuntimecuda(Arcane::Accelerator::RegisterRuntimeInfo& init_info)
572{
573 using namespace Arcane;
574 using namespace Arcane::Accelerator::Cuda;
575 Arcane::Accelerator::impl::setUsingCUDARuntime(true);
576 Arcane::Accelerator::impl::setCUDARunQueueRuntime(&global_cuda_runtime);
577 initializeCudaMemoryAllocators();
580 mrm->setIsAccelerator(true);
581 mrm->setAllocator(eMemoryRessource::UnifiedMemory, getCudaUnifiedMemoryAllocator());
582 mrm->setAllocator(eMemoryRessource::HostPinned, getCudaHostPinnedMemoryAllocator());
583 mrm->setAllocator(eMemoryRessource::Device, getCudaDeviceMemoryAllocator());
584 mrm->setCopier(&global_cuda_memory_copier);
585 global_cuda_runtime.fillDevices(init_info.isVerbose());
586}
587
588/*---------------------------------------------------------------------------*/
589/*---------------------------------------------------------------------------*/
#define ARCANE_CHECK_POINTER(ptr)
Macro retournant le pointeur ptr s'il est non nul ou lancant une exception s'il est nul.
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
void copy(ConstMemoryView from, eMemoryRessource from_mem, MutableMemoryView to, eMemoryRessource to_mem, const RunQueue *queue) override
Copie les données de from vers to avec la queue queue.
void notifyBeginLaunchKernel(impl::RunCommandImpl &c) override
Notification avant le lancement de la commande.
void barrier() override
Bloque jusqu'à ce que toutes les actions associées à cette file soient terminées.
bool _barrierNoException() override
Barrière sans exception. Retourne true en cas d'erreur.
void prefetchMemory(const MemoryPrefetchArgs &args) override
Effectue un pré-chargement d'une zone mémoire.
void notifyEndLaunchKernel(impl::RunCommandImpl &) override
Notification de fin de lancement de la commande.
impl::NativeStream nativeStream() override
Pointeur sur la structure interne dépendante de l'implémentation.
void copyMemory(const MemoryCopyArgs &args) override
Effectue une copie entre deux zones mémoire.
Classe singleton pour gérer CUPTI.
Definition Cupti.h:38
Identifiant d'un composant du système.
Definition DeviceId.h:33
bool isHost() const
Indique si l'instance est associée à l'hôte.
Definition DeviceId.h:60
Int32 asInt32() const
Valeur numérique du device.
Definition DeviceId.h:69
bool isAccelerator() const
Indique si l'instance est associée à un accélérateur.
Definition DeviceId.h:66
Information sur un device.
Definition DeviceInfo.h:32
Information mémoire d'un accélérateur.
Interface d'une liste de devices.
Arguments pour la copie mémoire.
Definition Memory.h:63
Arguments pour le préfetching mémoire.
Definition Memory.h:125
Informations sur une adresse mémoire.
Informations pour initialiser le runtime accélérateur.
Informations pour créer une RunQueue.
bool isDefault() const
Indique si l'instance a uniquement les valeurs par défaut.
File d'exécution pour un accélérateur.
bool isAsync() const
Indique si la file d'exécution est asynchrone.
Definition RunQueue.cc:320
void copyMemory(const MemoryCopyArgs &args) const
Copie des informations entre deux zones mémoires.
Definition RunQueue.cc:237
Interface d'une liste de devices.
Interface de l'implémentation d'un évènement.
Interface d'un flux d'exécution pour une RunQueue.
Interface du runtime associé à un accélérateur.
Type opaque pour encapsuler une 'stream' native.
Implémentation d'une commande pour accélérateur.
static std::optional< Int32 > tryParseFromEnvironment(StringView s, bool throw_if_invalid)
Definition Convert.cc:122
Interface pour les copies mémoire avec support des accélérateurs.
virtual IMemoryRessourceMngInternal * _internal()=0
Interface interne.
Partie interne à Arcane de 'IMemoryRessourceMng'.
virtual void setAllocator(eMemoryRessource r, IMemoryAllocator *allocator)=0
Positionne l'allocateur pour la ressource r.
virtual void setCopier(IMemoryCopier *copier)=0
Positionne l'instance gérant les copies.
virtual void setIsAccelerator(bool v)=0
Indique si un accélérateur est disponible.
Interface du gestionnaire de traces.
Flot de sortie lié à une String.
Chaîne de caractères unicode.
const char * localstr() const
Retourne la conversion de l'instance dans l'encodage UTF-8.
Definition String.cc:227
eMemoryAdvice
Conseils pour la gestion mémoire.
Definition Memory.h:36
@ AccessedByHost
Indique que la zone mémoire est accédée par l'hôte.
Definition Memory.h:48
@ PreferredLocationDevice
Privilégié le positionnement de la mémoire sur l'accélérateur.
Definition Memory.h:42
@ MostlyRead
Indique que la zone mémoire est principalement en lecture seule.
Definition Memory.h:40
@ PreferredLocationHost
Privilégié le positionnement de la mémoire sur l'hôte.
Definition Memory.h:44
@ AccessedByDevice
Indique que la zone mémoire est accédée par l'accélérateur.
Definition Memory.h:46
ePointerMemoryType
Type de mémoire pour un pointeur.
eExecutionPolicy
Politique d'exécution pour un Runner.
@ CUDA
Politique d'exécution utilisant l'environnement CUDA.
IMemoryRessourceMng * getDataMemoryRessourceMng()
Gestionnaire de ressource mémoire pour les données.
IMemoryAllocator * setAcceleratorHostMemoryAllocator(IMemoryAllocator *a)
Positionne l'allocateur spécifique pour les accélérateurs.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
@ HostPinned
Alloue sur l'hôte.
@ UnifiedMemory
Alloue en utilisant la mémoire unifiée.
@ Device
Alloue sur le device.
Espace de nom de Arccore.
Arcane::eMemoryResource eMemoryRessource
Typedef pour la version Arcane historique (avec 2's')