Arcane  v3.15.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
HipAcceleratorRuntime.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* HipAcceleratorRuntime.cc (C) 2000-2024 */
9/* */
10/* Runtime pour 'HIP'. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/accelerator/hip/HipAccelerator.h"
15
16#include "arcane/utils/PlatformUtils.h"
17#include "arcane/utils/Array.h"
18#include "arcane/utils/TraceInfo.h"
19#include "arcane/utils/FatalErrorException.h"
20#include "arcane/utils/NotImplementedException.h"
21#include "arcane/utils/IMemoryRessourceMng.h"
22#include "arcane/utils/OStringStream.h"
23#include "arcane/utils/internal/IMemoryRessourceMngInternal.h"
24
25#include "arcane/accelerator/core/RunQueueBuildInfo.h"
26#include "arcane/accelerator/core/Memory.h"
27#include "arcane/accelerator/core/DeviceInfoList.h"
28#include "arcane/accelerator/core/RunQueue.h"
29#include "arcane/accelerator/core/DeviceMemoryInfo.h"
30#include "arcane/accelerator/core/NativeStream.h"
31#include "arcane/accelerator/core/internal/IRunnerRuntime.h"
32#include "arcane/accelerator/core/internal/RegisterRuntimeInfo.h"
33#include "arcane/accelerator/core/internal/IRunQueueStream.h"
34#include "arcane/accelerator/core/internal/IRunQueueEventImpl.h"
35#include "arcane/accelerator/core/internal/RunCommandImpl.h"
36
37#include <iostream>
38
39#ifdef ARCANE_HAS_ROCTX
40#include <roctx.h>
41#endif
42
43using namespace Arccore;
44
45namespace Arcane::Accelerator::Hip
46{
47
48/*---------------------------------------------------------------------------*/
49/*---------------------------------------------------------------------------*/
50
53{
54 public:
55
57 : m_runtime(runtime)
58 {
59 if (bi.isDefault())
60 ARCANE_CHECK_HIP(hipStreamCreate(&m_hip_stream));
61 else {
62 int priority = bi.priority();
63 ARCANE_CHECK_HIP(hipStreamCreateWithPriority(&m_hip_stream, hipStreamDefault, priority));
64 }
65 }
66 ~HipRunQueueStream() override
67 {
68 ARCANE_CHECK_HIP_NOTHROW(hipStreamDestroy(m_hip_stream));
69 }
70
71 public:
72
74 {
75#ifdef ARCANE_HAS_ROCTX
76 auto kname = c.kernelName();
77 if (kname.empty())
78 roctxRangePush(c.traceInfo().name());
79 else
80 roctxRangePush(kname.localstr());
81#endif
82 return m_runtime->notifyBeginLaunchKernel();
83 }
85 {
86#ifdef ARCANE_HAS_ROCTX
88#endif
89 return m_runtime->notifyEndLaunchKernel();
90 }
91 void barrier() override
92 {
93 ARCANE_CHECK_HIP(hipStreamSynchronize(m_hip_stream));
94 }
95 bool _barrierNoException() override
96 {
97 return hipStreamSynchronize(m_hip_stream) != hipSuccess;
98 }
99 void copyMemory(const MemoryCopyArgs& args) override
100 {
101 auto r = hipMemcpyAsync(args.destination().data(), args.source().data(),
102 args.source().bytes().size(), hipMemcpyDefault, m_hip_stream);
103 ARCANE_CHECK_HIP(r);
104 if (!args.isAsync())
105 barrier();
106 }
107 void prefetchMemory(const MemoryPrefetchArgs& args) override
108 {
109 auto src = args.source().bytes();
110 if (src.size()==0)
111 return;
112 DeviceId d = args.deviceId();
113 int device = hipCpuDeviceId;
114 if (!d.isHost())
115 device = d.asInt32();
116 auto r = hipMemPrefetchAsync(src.data(), src.size(), device, m_hip_stream);
117 ARCANE_CHECK_HIP(r);
118 if (!args.isAsync())
119 barrier();
120 }
122 {
123 return impl::NativeStream(&m_hip_stream);
124 }
125
126 public:
127
128 hipStream_t trueStream() const
129 {
130 return m_hip_stream;
131 }
132
133 private:
134
135 impl::IRunnerRuntime* m_runtime;
136 hipStream_t m_hip_stream;
137};
138
139/*---------------------------------------------------------------------------*/
140/*---------------------------------------------------------------------------*/
141
144{
145 public:
146
147 explicit HipRunQueueEvent(bool has_timer)
148 {
149 if (has_timer)
150 ARCANE_CHECK_HIP(hipEventCreate(&m_hip_event));
151 else
152 ARCANE_CHECK_HIP(hipEventCreateWithFlags(&m_hip_event, hipEventDisableTiming));
153 }
154 ~HipRunQueueEvent() override
155 {
156 ARCANE_CHECK_HIP_NOTHROW(hipEventDestroy(m_hip_event));
157 }
158
159 public:
160
161 // Enregistre l'événement au sein d'une RunQueue
162 void recordQueue(impl::IRunQueueStream* stream) final
163 {
164 auto* rq = static_cast<HipRunQueueStream*>(stream);
165 ARCANE_CHECK_HIP(hipEventRecord(m_hip_event, rq->trueStream()));
166 }
167
168 void wait() final
169 {
170 ARCANE_CHECK_HIP(hipEventSynchronize(m_hip_event));
171 }
172
173 void waitForEvent(impl::IRunQueueStream* stream) final
174 {
175 auto* rq = static_cast<HipRunQueueStream*>(stream);
176 ARCANE_CHECK_HIP(hipStreamWaitEvent(rq->trueStream(), m_hip_event, 0));
177 }
178
179 Int64 elapsedTime(IRunQueueEventImpl* from_event) final
180 {
181 auto* true_from_event = static_cast<HipRunQueueEvent*>(from_event);
183 float time_in_ms = 0.0;
184 ARCANE_CHECK_HIP(hipEventElapsedTime(&time_in_ms, true_from_event->m_hip_event, m_hip_event));
185 double x = time_in_ms * 1.0e6;
186 Int64 nano_time = static_cast<Int64>(x);
187 return nano_time;
188 }
189
190 private:
191
192 hipEvent_t m_hip_event;
193};
194
195/*---------------------------------------------------------------------------*/
196/*---------------------------------------------------------------------------*/
197
200{
201 public:
202
203 ~HipRunnerRuntime() override = default;
204
205 public:
206
207 void notifyBeginLaunchKernel() override
208 {
209 ++m_nb_kernel_launched;
210 if (m_is_verbose)
211 std::cout << "BEGIN HIP KERNEL!\n";
212 }
213 void notifyEndLaunchKernel() override
214 {
215 ARCANE_CHECK_HIP(hipGetLastError());
216 if (m_is_verbose)
217 std::cout << "END HIP KERNEL!\n";
218 }
219 void barrier() override
220 {
221 ARCANE_CHECK_HIP(hipDeviceSynchronize());
222 }
223 eExecutionPolicy executionPolicy() const override
224 {
226 }
227 impl::IRunQueueStream* createStream(const RunQueueBuildInfo& bi) override
228 {
229 return new HipRunQueueStream(this, bi);
230 }
231 impl::IRunQueueEventImpl* createEventImpl() override
232 {
233 return new HipRunQueueEvent(false);
234 }
235 impl::IRunQueueEventImpl* createEventImplWithTimer() override
236 {
237 return new HipRunQueueEvent(true);
238 }
239 void setMemoryAdvice(ConstMemoryView buffer, eMemoryAdvice advice, DeviceId device_id) override
240 {
241 auto v = buffer.bytes();
242 const void* ptr = v.data();
243 size_t count = v.size();
244 int device = device_id.asInt32();
246
255 device = hipCpuDeviceId;
256 }
259 device = hipCpuDeviceId;
260 }
261 else
262 return;
263 //std::cout << "MEMADVISE p=" << ptr << " size=" << count << " advise = " << hip_advise << " id = " << device << "\n";
264 ARCANE_CHECK_HIP(hipMemAdvise(ptr, count, hip_advise, device));
265 }
266 void unsetMemoryAdvice(ConstMemoryView buffer, eMemoryAdvice advice, DeviceId device_id) override
267 {
268 auto v = buffer.bytes();
269 const void* ptr = v.data();
270 size_t count = v.size();
271 int device = device_id.asInt32();
273
282 device = hipCpuDeviceId;
283 }
286 device = hipCpuDeviceId;
287 }
288 else
289 return;
290 ARCANE_CHECK_HIP(hipMemAdvise(ptr, count, hip_advise, device));
291 }
292
293 void setCurrentDevice(DeviceId device_id) final
294 {
295 Int32 id = device_id.asInt32();
296 if (!device_id.isAccelerator())
297 ARCANE_FATAL("Device {0} is not an accelerator device", id);
298 ARCANE_CHECK_HIP(hipSetDevice(id));
299 }
300 const IDeviceInfoList* deviceInfoList() override { return &m_device_info_list; }
301
302 void getPointerAttribute(PointerAttribute& attribute, const void* ptr) override
303 {
306 auto mem_type = ePointerMemoryType::Unregistered;
307 // Si \a ptr n'a pas été alloué dynamiquement (i.e: il est sur la pile),
308 // hipPointerGetAttribute() retourne une erreur. Dans ce cas on considère
309 // la mémoire comme non enregistrée.
310 if (ret_value==hipSuccess){
311#if HIP_VERSION_MAJOR >= 6
312 auto rocm_memory_type = pa.type;
313#else
314 auto rocm_memory_type = pa.memoryType;
315#endif
316 if (pa.isManaged)
317 mem_type = ePointerMemoryType::Managed;
319 mem_type = ePointerMemoryType::Host;
321 mem_type = ePointerMemoryType::Device;
322 }
323
324 //std::cout << "HIP Info: hip_memory_type=" << (int)pa.memoryType << " is_managed?=" << pa.isManaged
325 // << " flags=" << pa.allocationFlags
326 // << " my_memory_type=" << (int)mem_type
327 // << "\n";
328 _fillPointerAttribute(attribute, mem_type, pa.device,
329 ptr, pa.devicePointer, pa.hostPointer);
330 }
331
332 DeviceMemoryInfo getDeviceMemoryInfo(DeviceId device_id) override
333 {
334 int d = 0;
335 int wanted_d = device_id.asInt32();
336 ARCANE_CHECK_HIP(hipGetDevice(&d));
337 if (d != wanted_d)
338 ARCANE_CHECK_HIP(hipSetDevice(wanted_d));
339 size_t free_mem = 0;
340 size_t total_mem = 0;
341 ARCANE_CHECK_HIP(hipMemGetInfo(&free_mem, &total_mem));
342 if (d != wanted_d)
343 ARCANE_CHECK_HIP(hipSetDevice(d));
345 dmi.setFreeMemory(free_mem);
346 dmi.setTotalMemory(total_mem);
347 return dmi;
348 }
349
350 void pushProfilerRange(const String& name, [[maybe_unused]] Int32 color) override
351 {
352#ifdef ARCANE_HAS_ROCTX
353 roctxRangePush(name.localstr());
354#endif
355 }
356 void popProfilerRange() override
357 {
358#ifdef ARCANE_HAS_ROCTX
360#endif
361 }
362
363 public:
364
365 void fillDevices(bool is_verbose);
366
367 private:
368
369 Int64 m_nb_kernel_launched = 0;
370 bool m_is_verbose = false;
371 impl::DeviceInfoList m_device_info_list;
372};
373
374/*---------------------------------------------------------------------------*/
375/*---------------------------------------------------------------------------*/
376
377void HipRunnerRuntime::
378fillDevices(bool is_verbose)
379{
380 int nb_device = 0;
381 ARCANE_CHECK_HIP(hipGetDeviceCount(&nb_device));
382 std::ostream& omain = std::cout;
383 if (is_verbose)
384 omain << "ArcaneHIP: Initialize Arcane HIP runtime nb_available_device=" << nb_device << "\n";
385 for (int i = 0; i < nb_device; ++i) {
387 std::ostream& o = ostr.stream();
388
390 ARCANE_CHECK_HIP(hipGetDeviceProperties(&dp, i));
391
392 int has_managed_memory = 0;
394
395 o << "\nDevice " << i << " name=" << dp.name << "\n";
396 o << " computeCapability = " << dp.major << "." << dp.minor << "\n";
397 o << " totalGlobalMem = " << dp.totalGlobalMem << "\n";
398 o << " sharedMemPerBlock = " << dp.sharedMemPerBlock << "\n";
399 o << " regsPerBlock = " << dp.regsPerBlock << "\n";
400 o << " warpSize = " << dp.warpSize << "\n";
401 o << " memPitch = " << dp.memPitch << "\n";
402 o << " maxThreadsPerBlock = " << dp.maxThreadsPerBlock << "\n";
403 o << " totalConstMem = " << dp.totalConstMem << "\n";
404 o << " clockRate = " << dp.clockRate << "\n";
405 //o << " deviceOverlap = " << dp.deviceOverlap<< "\n";
406 o << " multiProcessorCount = " << dp.multiProcessorCount << "\n";
407 o << " kernelExecTimeoutEnabled = " << dp.kernelExecTimeoutEnabled << "\n";
408 o << " integrated = " << dp.integrated << "\n";
409 o << " canMapHostMemory = " << dp.canMapHostMemory << "\n";
410 o << " computeMode = " << dp.computeMode << "\n";
411 o << " maxThreadsDim = " << dp.maxThreadsDim[0] << " " << dp.maxThreadsDim[1]
412 << " " << dp.maxThreadsDim[2] << "\n";
413 o << " maxGridSize = " << dp.maxGridSize[0] << " " << dp.maxGridSize[1]
414 << " " << dp.maxGridSize[2] << "\n";
415 o << " concurrentManagedAccess = " << dp.concurrentManagedAccess << "\n";
416 o << " directManagedMemAccessFromHost = " << dp.directManagedMemAccessFromHost << "\n";
417 o << " gcnArchName = " << dp.gcnArchName << "\n";
418 o << " pageableMemoryAccess = " << dp.pageableMemoryAccess << "\n";
419 o << " pageableMemoryAccessUsesHostPageTables = " << dp.pageableMemoryAccessUsesHostPageTables << "\n";
420 o << " hasManagedMemory = " << has_managed_memory << "\n";
421#if HIP_VERSION_MAJOR >= 6
422 o << " gpuDirectRDMASupported = " << dp.gpuDirectRDMASupported << "\n";
423 o << " hostNativeAtomicSupported = " << dp.hostNativeAtomicSupported << "\n";
424 o << " unifiedFunctionPointers = " << dp.unifiedFunctionPointers << "\n";
425#endif
426 {
427 hipDevice_t device;
428 ARCANE_CHECK_HIP(hipDeviceGet(&device, i));
430 ARCANE_CHECK_HIP(hipDeviceGetUuid(&device_uuid, device));
431 o << " deviceUuid=";
432 impl::printUUID(o, device_uuid.bytes);
433 o << "\n";
434 }
435
436 String description(ostr.str());
437 if (is_verbose)
438 omain << description;
439
441 device_info.setDescription(description);
442 device_info.setDeviceId(DeviceId(i));
443 device_info.setName(dp.name);
444 m_device_info_list.addDevice(device_info);
445 }
446}
447
448/*---------------------------------------------------------------------------*/
449/*---------------------------------------------------------------------------*/
450
452: public IMemoryCopier
453{
454 void copy(ConstMemoryView from, [[maybe_unused]] eMemoryRessource from_mem,
456 const RunQueue* queue) override
457 {
458 if (queue) {
459 queue->copyMemory(MemoryCopyArgs(to.bytes(), from.bytes()).addAsync(queue->isAsync()));
460 return;
461 }
462 // 'hipMemcpyDefault' sait automatiquement ce qu'il faut faire en tenant
463 // uniquement compte de la valeur des pointeurs. Il faudrait voir si
464 // utiliser \a from_mem et \a to_mem peut améliorer les performances.
465 ARCANE_CHECK_HIP(hipMemcpy(to.data(), from.data(), from.bytes().size(), hipMemcpyDefault));
466 }
467};
468
469/*---------------------------------------------------------------------------*/
470/*---------------------------------------------------------------------------*/
471
472} // End namespace Arcane::Accelerator::Hip
473
474namespace
475{
478}
479
480/*---------------------------------------------------------------------------*/
481/*---------------------------------------------------------------------------*/
482
483// Cette fonction est le point d'entrée utilisé lors du chargement
484// dynamique de cette bibliothèque
485extern "C" ARCANE_EXPORT void
486arcaneRegisterAcceleratorRuntimehip(Arcane::Accelerator::RegisterRuntimeInfo& init_info)
487{
488 using namespace Arcane;
489 using namespace Arcane::Accelerator::Hip;
490 Arcane::Accelerator::impl::setUsingHIPRuntime(true);
491 Arcane::Accelerator::impl::setHIPRunQueueRuntime(&global_hip_runtime);
492 Arcane::platform::setAcceleratorHostMemoryAllocator(getHipMemoryAllocator());
493 IMemoryRessourceMngInternal* mrm = platform::getDataMemoryRessourceMng()->_internal();
494 mrm->setIsAccelerator(true);
495 mrm->setAllocator(eMemoryRessource::UnifiedMemory, getHipUnifiedMemoryAllocator());
496 mrm->setAllocator(eMemoryRessource::HostPinned, getHipHostPinnedMemoryAllocator());
497 mrm->setAllocator(eMemoryRessource::Device, getHipDeviceMemoryAllocator());
498 mrm->setCopier(&global_hip_memory_copier);
499 global_hip_runtime.fillDevices(init_info.isVerbose());
500}
501
502/*---------------------------------------------------------------------------*/
503/*---------------------------------------------------------------------------*/
#define ARCANE_CHECK_POINTER(ptr)
Macro retournant le pointeur ptr s'il est non nul ou lancant une exception s'il est nul.
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
Identifiant d'un composant du système.
Definition DeviceId.h:33
bool isHost() const
Indique si l'instance est associée à l'hôte.
Definition DeviceId.h:60
Int32 asInt32() const
Valeur numérique du device.
Definition DeviceId.h:69
Information sur un device.
Definition DeviceInfo.h:32
Information mémoire d'un accélérateur.
void copy(ConstMemoryView from, eMemoryRessource from_mem, MutableMemoryView to, eMemoryRessource to_mem, const RunQueue *queue) override
Copie les données de from vers to avec la queue queue.
void notifyBeginLaunchKernel(impl::RunCommandImpl &c) override
Notification avant le lancement de la commande.
bool _barrierNoException() override
Barrière sans exception. Retourne true en cas d'erreur.
void barrier() override
Bloque jusqu'à ce que toutes les actions associées à cette file soient terminées.
void notifyEndLaunchKernel(impl::RunCommandImpl &) override
Notification de fin de lancement de la commande.
void prefetchMemory(const MemoryPrefetchArgs &args) override
Effectue un pré-chargement d'une zone mémoire.
void copyMemory(const MemoryCopyArgs &args) override
Effectue une copie entre deux zones mémoire.
impl::NativeStream nativeStream() override
Pointeur sur la structure interne dépendante de l'implémentation.
Interface d'une liste de devices.
Arguments pour la copie mémoire.
Definition Memory.h:63
Arguments pour le préfetching mémoire.
Definition Memory.h:125
Informations sur une adresse mémoire.
Informations pour initialiser le runtime accélérateur.
Informations pour créer une RunQueue.
File d'exécution pour un accélérateur.
bool isAsync() const
Indique si la file d'exécution est asynchrone.
Definition RunQueue.cc:320
void copyMemory(const MemoryCopyArgs &args) const
Copie des informations entre deux zones mémoires.
Definition RunQueue.cc:237
Interface d'une liste de devices.
Interface de l'implémentation d'un évènement.
Interface d'un flux d'exécution pour une RunQueue.
Interface du runtime associé à un accélérateur.
Type opaque pour encapsuler une 'stream' native.
Implémentation d'une commande pour accélérateur.
Interface pour les copies mémoire avec support des accélérateurs.
Partie interne à Arcane de 'IMemoryRessourceMng'.
Lecteur des fichiers de maillage via la bibliothèque LIMA.
Definition Lima.cc:149
Flot de sortie lié à une String.
Chaîne de caractères unicode.
eMemoryAdvice
Conseils pour la gestion mémoire.
Definition Memory.h:36
@ AccessedByHost
Indique que la zone mémoire est accédée par l'hôte.
@ PreferredLocationDevice
Privilégié le positionnement de la mémoire sur l'accélérateur.
@ MostlyRead
Indique que la zone mémoire est principalement en lecture seule.
@ PreferredLocationHost
Privilégié le positionnement de la mémoire sur l'hôte.
@ AccessedByDevice
Indique que la zone mémoire est accédée par l'accélérateur.
eExecutionPolicy
Politique d'exécution pour un Runner.
@ HIP
Politique d'exécution utilisant l'environnement HIP.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
Espace de nom de Arccore.
Definition ArcaneTypes.h:24
std::int64_t Int64
Type entier signé sur 64 bits.
eMemoryResource
Liste des ressources mémoire disponibles.
std::int32_t Int32
Type entier signé sur 32 bits.