14#include "arcane/accelerator/hip/HipAccelerator.h"
16#include "arcane/utils/PlatformUtils.h"
17#include "arcane/utils/Array.h"
18#include "arcane/utils/TraceInfo.h"
19#include "arcane/utils/FatalErrorException.h"
20#include "arcane/utils/NotImplementedException.h"
21#include "arcane/utils/IMemoryRessourceMng.h"
22#include "arcane/utils/OStringStream.h"
23#include "arcane/utils/internal/IMemoryRessourceMngInternal.h"
25#include "arcane/accelerator/core/RunQueueBuildInfo.h"
26#include "arcane/accelerator/core/Memory.h"
27#include "arcane/accelerator/core/DeviceInfoList.h"
28#include "arcane/accelerator/core/RunQueue.h"
29#include "arcane/accelerator/core/DeviceMemoryInfo.h"
30#include "arcane/accelerator/core/NativeStream.h"
31#include "arcane/accelerator/core/internal/IRunnerRuntime.h"
32#include "arcane/accelerator/core/internal/RegisterRuntimeInfo.h"
33#include "arcane/accelerator/core/internal/IRunQueueStream.h"
34#include "arcane/accelerator/core/internal/IRunQueueEventImpl.h"
35#include "arcane/accelerator/core/internal/RunCommandImpl.h"
39#ifdef ARCANE_HAS_ROCTX
45namespace Arcane::Accelerator::Hip
51class HipRunQueueStream
60 ARCANE_CHECK_HIP(hipStreamCreate(&m_hip_stream));
62 int priority = bi.priority();
63 ARCANE_CHECK_HIP(hipStreamCreateWithPriority(&m_hip_stream, hipStreamDefault, priority));
66 ~HipRunQueueStream()
override
68 ARCANE_CHECK_HIP_NOTHROW(hipStreamDestroy(m_hip_stream));
75#ifdef ARCANE_HAS_ROCTX
76 auto kname = c.kernelName();
78 roctxRangePush(c.traceInfo().name());
80 roctxRangePush(kname.localstr());
82 return m_runtime->notifyBeginLaunchKernel();
86#ifdef ARCANE_HAS_ROCTX
89 return m_runtime->notifyEndLaunchKernel();
93 ARCANE_CHECK_HIP(hipStreamSynchronize(m_hip_stream));
97 return hipStreamSynchronize(m_hip_stream) != hipSuccess;
101 auto r = hipMemcpyAsync(args.destination().data(), args.source().data(),
102 args.source().bytes().size(), hipMemcpyDefault, m_hip_stream);
109 auto src = args.source().bytes();
113 int device = hipCpuDeviceId;
116 auto r = hipMemPrefetchAsync(src.data(), src.size(), device, m_hip_stream);
128 hipStream_t trueStream()
const
136 hipStream_t m_hip_stream;
51class HipRunQueueStream {
…};
142class HipRunQueueEvent
147 explicit HipRunQueueEvent(
bool has_timer)
150 ARCANE_CHECK_HIP(hipEventCreate(&m_hip_event));
152 ARCANE_CHECK_HIP(hipEventCreateWithFlags(&m_hip_event, hipEventDisableTiming));
154 ~HipRunQueueEvent()
override
156 ARCANE_CHECK_HIP_NOTHROW(hipEventDestroy(m_hip_event));
165 ARCANE_CHECK_HIP(hipEventRecord(m_hip_event, rq->trueStream()));
170 ARCANE_CHECK_HIP(hipEventSynchronize(m_hip_event));
176 ARCANE_CHECK_HIP(hipStreamWaitEvent(rq->trueStream(), m_hip_event, 0));
179 Int64 elapsedTime(IRunQueueEventImpl* from_event)
final
181 auto* true_from_event =
static_cast<HipRunQueueEvent*
>(from_event);
183 float time_in_ms = 0.0;
184 ARCANE_CHECK_HIP(hipEventElapsedTime(&time_in_ms, true_from_event->m_hip_event, m_hip_event));
185 double x = time_in_ms * 1.0e6;
186 Int64 nano_time =
static_cast<Int64
>(x);
190 bool hasPendingWork()
final
192 hipError_t v = hipEventQuery(m_hip_event);
193 if (v == hipErrorNotReady)
201 hipEvent_t m_hip_event;
142class HipRunQueueEvent {
…};
216 void notifyBeginLaunchKernel()
override
218 ++m_nb_kernel_launched;
220 std::cout <<
"BEGIN HIP KERNEL!\n";
222 void notifyEndLaunchKernel()
override
224 ARCANE_CHECK_HIP(hipGetLastError());
226 std::cout <<
"END HIP KERNEL!\n";
228 void barrier()
override
230 ARCANE_CHECK_HIP(hipDeviceSynchronize());
250 auto v = buffer.bytes();
251 const void* ptr = v.data();
252 size_t count = v.size();
253 int device = device_id.
asInt32();
254 hipMemoryAdvise hip_advise;
257 hip_advise = hipMemAdviseSetReadMostly;
259 hip_advise = hipMemAdviseSetPreferredLocation;
261 hip_advise = hipMemAdviseSetAccessedBy;
263 hip_advise = hipMemAdviseSetPreferredLocation;
264 device = hipCpuDeviceId;
267 hip_advise = hipMemAdviseSetAccessedBy;
268 device = hipCpuDeviceId;
273 ARCANE_CHECK_HIP(hipMemAdvise(ptr, count, hip_advise, device));
277 auto v = buffer.bytes();
278 const void* ptr = v.data();
279 size_t count = v.size();
280 int device = device_id.
asInt32();
281 hipMemoryAdvise hip_advise;
284 hip_advise = hipMemAdviseUnsetReadMostly;
286 hip_advise = hipMemAdviseUnsetPreferredLocation;
288 hip_advise = hipMemAdviseUnsetAccessedBy;
290 hip_advise = hipMemAdviseUnsetPreferredLocation;
291 device = hipCpuDeviceId;
294 hip_advise = hipMemAdviseUnsetAccessedBy;
295 device = hipCpuDeviceId;
299 ARCANE_CHECK_HIP(hipMemAdvise(ptr, count, hip_advise, device));
302 void setCurrentDevice(
DeviceId device_id)
final
304 Int32
id = device_id.
asInt32();
306 ARCANE_FATAL(
"Device {0} is not an accelerator device",
id);
307 ARCANE_CHECK_HIP(hipSetDevice(
id));
309 const IDeviceInfoList* deviceInfoList()
override {
return &m_device_info_list; }
311 void getPointerAttribute(
PointerAttribute& attribute,
const void* ptr)
override
313 hipPointerAttribute_t pa;
314 hipError_t ret_value = hipPointerGetAttributes(&pa, ptr);
315 auto mem_type = ePointerMemoryType::Unregistered;
319 if (ret_value==hipSuccess){
320#if HIP_VERSION_MAJOR >= 6
321 auto rocm_memory_type = pa.type;
323 auto rocm_memory_type = pa.memoryType;
326 mem_type = ePointerMemoryType::Managed;
327 else if (rocm_memory_type == hipMemoryTypeHost)
328 mem_type = ePointerMemoryType::Host;
329 else if (rocm_memory_type == hipMemoryTypeDevice)
330 mem_type = ePointerMemoryType::Device;
337 _fillPointerAttribute(attribute, mem_type, pa.device,
338 ptr, pa.devicePointer, pa.hostPointer);
344 int wanted_d = device_id.
asInt32();
345 ARCANE_CHECK_HIP(hipGetDevice(&d));
347 ARCANE_CHECK_HIP(hipSetDevice(wanted_d));
349 size_t total_mem = 0;
350 ARCANE_CHECK_HIP(hipMemGetInfo(&free_mem, &total_mem));
352 ARCANE_CHECK_HIP(hipSetDevice(d));
354 dmi.setFreeMemory(free_mem);
355 dmi.setTotalMemory(total_mem);
359 void pushProfilerRange(
const String& name, [[maybe_unused]] Int32 color)
override
361#ifdef ARCANE_HAS_ROCTX
365 void popProfilerRange()
override
367#ifdef ARCANE_HAS_ROCTX
374 void fillDevices(
bool is_verbose);
378 Int64 m_nb_kernel_launched = 0;
379 bool m_is_verbose =
false;
386void HipRunnerRuntime::
387fillDevices(
bool is_verbose)
390 ARCANE_CHECK_HIP(hipGetDeviceCount(&nb_device));
391 std::ostream& omain = std::cout;
393 omain <<
"ArcaneHIP: Initialize Arcane HIP runtime nb_available_device=" << nb_device <<
"\n";
394 for (
int i = 0; i < nb_device; ++i) {
396 std::ostream& o = ostr.stream();
399 ARCANE_CHECK_HIP(hipGetDeviceProperties(&dp, i));
401 int has_managed_memory = 0;
402 ARCANE_CHECK_HIP(hipDeviceGetAttribute(&has_managed_memory, hipDeviceAttributeManagedMemory, i));
404 o <<
"\nDevice " << i <<
" name=" << dp.name <<
"\n";
405 o <<
" computeCapability = " << dp.major <<
"." << dp.minor <<
"\n";
406 o <<
" totalGlobalMem = " << dp.totalGlobalMem <<
"\n";
407 o <<
" sharedMemPerBlock = " << dp.sharedMemPerBlock <<
"\n";
408 o <<
" regsPerBlock = " << dp.regsPerBlock <<
"\n";
409 o <<
" warpSize = " << dp.warpSize <<
"\n";
410 o <<
" memPitch = " << dp.memPitch <<
"\n";
411 o <<
" maxThreadsPerBlock = " << dp.maxThreadsPerBlock <<
"\n";
412 o <<
" totalConstMem = " << dp.totalConstMem <<
"\n";
413 o <<
" clockRate = " << dp.clockRate <<
"\n";
415 o <<
" multiProcessorCount = " << dp.multiProcessorCount <<
"\n";
416 o <<
" kernelExecTimeoutEnabled = " << dp.kernelExecTimeoutEnabled <<
"\n";
417 o <<
" integrated = " << dp.integrated <<
"\n";
418 o <<
" canMapHostMemory = " << dp.canMapHostMemory <<
"\n";
419 o <<
" computeMode = " << dp.computeMode <<
"\n";
420 o <<
" maxThreadsDim = " << dp.maxThreadsDim[0] <<
" " << dp.maxThreadsDim[1]
421 <<
" " << dp.maxThreadsDim[2] <<
"\n";
422 o <<
" maxGridSize = " << dp.maxGridSize[0] <<
" " << dp.maxGridSize[1]
423 <<
" " << dp.maxGridSize[2] <<
"\n";
424 o <<
" concurrentManagedAccess = " << dp.concurrentManagedAccess <<
"\n";
425 o <<
" directManagedMemAccessFromHost = " << dp.directManagedMemAccessFromHost <<
"\n";
426 o <<
" gcnArchName = " << dp.gcnArchName <<
"\n";
427 o <<
" pageableMemoryAccess = " << dp.pageableMemoryAccess <<
"\n";
428 o <<
" pageableMemoryAccessUsesHostPageTables = " << dp.pageableMemoryAccessUsesHostPageTables <<
"\n";
429 o <<
" hasManagedMemory = " << has_managed_memory <<
"\n";
430#if HIP_VERSION_MAJOR >= 6
431 o <<
" gpuDirectRDMASupported = " << dp.gpuDirectRDMASupported <<
"\n";
432 o <<
" hostNativeAtomicSupported = " << dp.hostNativeAtomicSupported <<
"\n";
433 o <<
" unifiedFunctionPointers = " << dp.unifiedFunctionPointers <<
"\n";
437 ARCANE_CHECK_HIP(hipDeviceGet(&device, i));
439 ARCANE_CHECK_HIP(hipDeviceGetUuid(&device_uuid, device));
441 impl::printUUID(o, device_uuid.bytes);
445 String description(ostr.str());
447 omain << description;
450 device_info.setDescription(description);
451 device_info.setDeviceId(
DeviceId(i));
452 device_info.setName(dp.name);
453 m_device_info_list.addDevice(device_info);
474 ARCANE_CHECK_HIP(hipMemcpy(to.data(), from.data(), from.bytes().size(), hipMemcpyDefault));
494extern "C" ARCANE_EXPORT
void
498 using namespace Arcane::Accelerator::Hip;
499 Arcane::Accelerator::impl::setUsingHIPRuntime(
true);
500 Arcane::Accelerator::impl::setHIPRunQueueRuntime(&global_hip_runtime);
507 mrm->
setCopier(&global_hip_memory_copier);
508 global_hip_runtime.fillDevices(init_info.isVerbose());
#define ARCANE_CHECK_POINTER(ptr)
Macro retournant le pointeur ptr s'il est non nul ou lancant une exception s'il est nul.
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Identifiant d'un composant du système.
bool isHost() const
Indique si l'instance est associée à l'hôte.
Int32 asInt32() const
Valeur numérique du device.
bool isAccelerator() const
Indique si l'instance est associée à un accélérateur.
Information sur un device.
Information mémoire d'un accélérateur.
void copy(ConstMemoryView from, eMemoryRessource from_mem, MutableMemoryView to, eMemoryRessource to_mem, const RunQueue *queue) override
Copie les données de from vers to avec la queue queue.
void notifyBeginLaunchKernel(impl::RunCommandImpl &c) override
Notification avant le lancement de la commande.
bool _barrierNoException() override
Barrière sans exception. Retourne true en cas d'erreur.
void barrier() override
Bloque jusqu'à ce que toutes les actions associées à cette file soient terminées.
void notifyEndLaunchKernel(impl::RunCommandImpl &) override
Notification de fin de lancement de la commande.
void prefetchMemory(const MemoryPrefetchArgs &args) override
Effectue un pré-chargement d'une zone mémoire.
void copyMemory(const MemoryCopyArgs &args) override
Effectue une copie entre deux zones mémoire.
impl::NativeStream nativeStream() override
Pointeur sur la structure interne dépendante de l'implémentation.
Interface d'une liste de devices.
Arguments pour la copie mémoire.
Arguments pour le préfetching mémoire.
Informations sur une adresse mémoire.
Informations pour initialiser le runtime accélérateur.
Informations pour créer une RunQueue.
bool isDefault() const
Indique si l'instance a uniquement les valeurs par défaut.
File d'exécution pour un accélérateur.
bool isAsync() const
Indique si la file d'exécution est asynchrone.
void copyMemory(const MemoryCopyArgs &args) const
Copie des informations entre deux zones mémoires.
Interface d'une liste de devices.
Interface de l'implémentation d'un évènement.
Interface d'un flux d'exécution pour une RunQueue.
Interface du runtime associé à un accélérateur.
Type opaque pour encapsuler une 'stream' native.
Implémentation d'une commande pour accélérateur.
Interface pour les copies mémoire avec support des accélérateurs.
virtual IMemoryRessourceMngInternal * _internal()=0
Interface interne.
Partie interne à Arcane de 'IMemoryRessourceMng'.
virtual void setAllocator(eMemoryRessource r, IMemoryAllocator *allocator)=0
Positionne l'allocateur pour la ressource r.
virtual void setCopier(IMemoryCopier *copier)=0
Positionne l'instance gérant les copies.
virtual void setIsAccelerator(bool v)=0
Indique si un accélérateur est disponible.
Flot de sortie lié à une String.
Chaîne de caractères unicode.
const char * localstr() const
Retourne la conversion de l'instance dans l'encodage UTF-8.
eMemoryAdvice
Conseils pour la gestion mémoire.
@ AccessedByHost
Indique que la zone mémoire est accédée par l'hôte.
@ PreferredLocationDevice
Privilégié le positionnement de la mémoire sur l'accélérateur.
@ MostlyRead
Indique que la zone mémoire est principalement en lecture seule.
@ PreferredLocationHost
Privilégié le positionnement de la mémoire sur l'hôte.
@ AccessedByDevice
Indique que la zone mémoire est accédée par l'accélérateur.
eExecutionPolicy
Politique d'exécution pour un Runner.
@ HIP
Politique d'exécution utilisant l'environnement HIP.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
@ HostPinned
Alloue sur l'hôte.
@ UnifiedMemory
Alloue en utilisant la mémoire unifiée.
@ Device
Alloue sur le device.
Espace de nom de Arccore.
Arcane::eMemoryResource eMemoryRessource
Typedef pour la version Arcane historique (avec 2's')