Arcane  v4.1.1.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
HipAcceleratorRuntime.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* HipAcceleratorRuntime.cc (C) 2000-2025 */
9/* */
10/* Runtime pour 'HIP'. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arccore/accelerator_native/HipAccelerator.h"
15
16#include "arccore/base/FatalErrorException.h"
17
18#include "arccore/common/internal/MemoryUtilsInternal.h"
19#include "arccore/common/internal/IMemoryResourceMngInternal.h"
20
21#include "arccore/common/accelerator/RunQueueBuildInfo.h"
22#include "arccore/common/accelerator/Memory.h"
23#include "arccore/common/accelerator/DeviceInfoList.h"
24#include "arccore/common/accelerator/KernelLaunchArgs.h"
25#include "arccore/common/accelerator/RunQueue.h"
26#include "arccore/common/accelerator/DeviceMemoryInfo.h"
27#include "arccore/common/accelerator/NativeStream.h"
28#include "arccore/common/accelerator/internal/IRunnerRuntime.h"
29#include "arccore/common/accelerator/internal/RegisterRuntimeInfo.h"
30#include "arccore/common/accelerator/internal/RunCommandImpl.h"
31#include "arccore/common/accelerator/internal/IRunQueueStream.h"
32#include "arccore/common/accelerator/internal/IRunQueueEventImpl.h"
33#include "arccore/common/accelerator/internal/AcceleratorMemoryAllocatorBase.h"
34
35#include <sstream>
36
37#ifdef ARCCORE_HAS_ROCTX
38#include <roctx.h>
39#endif
40
41using namespace Arccore;
42
43namespace Arcane::Accelerator::Hip
44{
45
46/*---------------------------------------------------------------------------*/
47/*---------------------------------------------------------------------------*/
48
50{
51 public:
52
53 virtual ~ConcreteAllocator() = default;
54
55 public:
56
57 virtual hipError_t _allocate(void** ptr, size_t new_size) = 0;
58 virtual hipError_t _deallocate(void* ptr) = 0;
59};
60
61/*---------------------------------------------------------------------------*/
62/*---------------------------------------------------------------------------*/
63
64template <typename ConcreteAllocatorType>
65class UnderlyingAllocator
67{
68 public:
69
70 UnderlyingAllocator() = default;
71
72 public:
73
74 void* allocateMemory(size_t size) final
75 {
76 void* out = nullptr;
77 ARCCORE_CHECK_HIP(m_concrete_allocator._allocate(&out, size));
78 return out;
79 }
80 void freeMemory(void* ptr, [[maybe_unused]] size_t size) final
81 {
82 ARCCORE_CHECK_HIP_NOTHROW(m_concrete_allocator._deallocate(ptr));
83 }
84
85 void doMemoryCopy(void* destination, const void* source, Int64 size) final
86 {
87 ARCCORE_CHECK_HIP(hipMemcpy(destination, source, size, hipMemcpyDefault));
88 }
89
90 eMemoryResource memoryResource() const final
91 {
92 return m_concrete_allocator.memoryResource();
93 }
94
95 public:
96
97 ConcreteAllocatorType m_concrete_allocator;
98};
99
100/*---------------------------------------------------------------------------*/
101/*---------------------------------------------------------------------------*/
102
104: public ConcreteAllocator
105{
106 public:
107
108 hipError_t _deallocate(void* ptr) final
109 {
110 return ::hipFree(ptr);
111 }
112
113 hipError_t _allocate(void** ptr, size_t new_size) final
114 {
115 auto r = ::hipMallocManaged(ptr, new_size, hipMemAttachGlobal);
116 return r;
117 }
118
119 constexpr eMemoryResource memoryResource() const { return eMemoryResource::UnifiedMemory; }
120};
121
122/*---------------------------------------------------------------------------*/
123/*---------------------------------------------------------------------------*/
124
125class UnifiedMemoryHipMemoryAllocator
126: public AcceleratorMemoryAllocatorBase
127{
128 public:
129
130 UnifiedMemoryHipMemoryAllocator()
131 : AcceleratorMemoryAllocatorBase("UnifiedMemoryHipMemory", new UnderlyingAllocator<UnifiedMemoryConcreteAllocator>())
132 {
133 }
134
135 public:
136
137 void initialize()
138 {
139 _doInitializeUVM(true);
140 }
141};
142
143/*---------------------------------------------------------------------------*/
144/*---------------------------------------------------------------------------*/
145
147: public ConcreteAllocator
148{
149 public:
150
151 hipError_t _allocate(void** ptr, size_t new_size) final
152 {
153 return ::hipHostMalloc(ptr, new_size);
154 }
155 hipError_t _deallocate(void* ptr) final
156 {
157 return ::hipHostFree(ptr);
158 }
159 constexpr eMemoryResource memoryResource() const { return eMemoryResource::HostPinned; }
160};
161
162/*---------------------------------------------------------------------------*/
163/*---------------------------------------------------------------------------*/
164
165class HostPinnedHipMemoryAllocator
166: public AcceleratorMemoryAllocatorBase
167{
168 public:
169 public:
170
171 HostPinnedHipMemoryAllocator()
172 : AcceleratorMemoryAllocatorBase("HostPinnedHipMemory", new UnderlyingAllocator<HostPinnedConcreteAllocator>())
173 {
174 }
175
176 public:
177
178 void initialize()
179 {
181 }
182};
183
184/*---------------------------------------------------------------------------*/
185/*---------------------------------------------------------------------------*/
186
187class DeviceConcreteAllocator
188: public ConcreteAllocator
189{
190 public:
191
192 DeviceConcreteAllocator()
193 {
194 }
195
196 hipError_t _allocate(void** ptr, size_t new_size) final
197 {
198 hipError_t r = ::hipMalloc(ptr, new_size);
199 return r;
200 }
201 hipError_t _deallocate(void* ptr) final
202 {
203 return ::hipFree(ptr);
204 }
205
206 constexpr eMemoryResource memoryResource() const { return eMemoryResource::Device; }
207};
208
209/*---------------------------------------------------------------------------*/
210/*---------------------------------------------------------------------------*/
211
212class DeviceHipMemoryAllocator
213: public AcceleratorMemoryAllocatorBase
214{
215
216 public:
217
218 DeviceHipMemoryAllocator()
219 : AcceleratorMemoryAllocatorBase("DeviceHipMemoryAllocator", new UnderlyingAllocator<DeviceConcreteAllocator>())
220 {
221 }
222
223 public:
224
225 void initialize()
226 {
228 }
229};
230
231/*---------------------------------------------------------------------------*/
232/*---------------------------------------------------------------------------*/
233
234namespace
235{
236 UnifiedMemoryHipMemoryAllocator unified_memory_hip_memory_allocator;
237 HostPinnedHipMemoryAllocator host_pinned_hip_memory_allocator;
238 DeviceHipMemoryAllocator device_hip_memory_allocator;
239} // namespace
240
241/*---------------------------------------------------------------------------*/
242/*---------------------------------------------------------------------------*/
243
244void initializeHipMemoryAllocators()
245{
246 unified_memory_hip_memory_allocator.initialize();
247 device_hip_memory_allocator.initialize();
248 host_pinned_hip_memory_allocator.initialize();
249}
250
251void finalizeHipMemoryAllocators(ITraceMng* tm)
252{
253 unified_memory_hip_memory_allocator.finalize(tm);
254 device_hip_memory_allocator.finalize(tm);
255 host_pinned_hip_memory_allocator.finalize(tm);
256}
257
258/*---------------------------------------------------------------------------*/
259/*---------------------------------------------------------------------------*/
260
261class HipRunQueueStream
263{
264 public:
265
266 HipRunQueueStream(impl::IRunnerRuntime* runtime, const RunQueueBuildInfo& bi)
267 : m_runtime(runtime)
268 {
269 if (bi.isDefault())
270 ARCCORE_CHECK_HIP(hipStreamCreate(&m_hip_stream));
271 else {
272 int priority = bi.priority();
273 ARCCORE_CHECK_HIP(hipStreamCreateWithPriority(&m_hip_stream, hipStreamDefault, priority));
274 }
275 }
276 ~HipRunQueueStream() override
277 {
278 ARCCORE_CHECK_HIP_NOTHROW(hipStreamDestroy(m_hip_stream));
279 }
280
281 public:
282
283 void notifyBeginLaunchKernel([[maybe_unused]] impl::RunCommandImpl& c) override
284 {
285#ifdef ARCCORE_HAS_ROCTX
286 auto kname = c.kernelName();
287 if (kname.empty())
288 roctxRangePush(c.traceInfo().name());
289 else
290 roctxRangePush(kname.localstr());
291#endif
292 return m_runtime->notifyBeginLaunchKernel();
293 }
295 {
296#ifdef ARCCORE_HAS_ROCTX
297 roctxRangePop();
298#endif
299 return m_runtime->notifyEndLaunchKernel();
300 }
301 void barrier() override
302 {
303 ARCCORE_CHECK_HIP(hipStreamSynchronize(m_hip_stream));
304 }
305 bool _barrierNoException() override
306 {
307 return hipStreamSynchronize(m_hip_stream) != hipSuccess;
308 }
309 void copyMemory(const MemoryCopyArgs& args) override
310 {
311 auto r = hipMemcpyAsync(args.destination().data(), args.source().data(),
312 args.source().bytes().size(), hipMemcpyDefault, m_hip_stream);
313 ARCCORE_CHECK_HIP(r);
314 if (!args.isAsync())
315 barrier();
316 }
317 void prefetchMemory(const MemoryPrefetchArgs& args) override
318 {
319 auto src = args.source().bytes();
320 if (src.size()==0)
321 return;
322 DeviceId d = args.deviceId();
323 int device = hipCpuDeviceId;
324 if (!d.isHost())
325 device = d.asInt32();
326 auto r = hipMemPrefetchAsync(src.data(), src.size(), device, m_hip_stream);
327 ARCCORE_CHECK_HIP(r);
328 if (!args.isAsync())
329 barrier();
330 }
332 {
333 return Impl::NativeStream(&m_hip_stream);
334 }
335
336 public:
337
338 hipStream_t trueStream() const
339 {
340 return m_hip_stream;
341 }
342
343 private:
344
345 impl::IRunnerRuntime* m_runtime;
346 hipStream_t m_hip_stream;
347};
348
349/*---------------------------------------------------------------------------*/
350/*---------------------------------------------------------------------------*/
351
352class HipRunQueueEvent
354{
355 public:
356
357 explicit HipRunQueueEvent(bool has_timer)
358 {
359 if (has_timer)
360 ARCCORE_CHECK_HIP(hipEventCreate(&m_hip_event));
361 else
362 ARCCORE_CHECK_HIP(hipEventCreateWithFlags(&m_hip_event, hipEventDisableTiming));
363 }
364 ~HipRunQueueEvent() override
365 {
366 ARCCORE_CHECK_HIP_NOTHROW(hipEventDestroy(m_hip_event));
367 }
368
369 public:
370
371 // Enregistre l'événement au sein d'une RunQueue
372 void recordQueue(impl::IRunQueueStream* stream) final
373 {
374 auto* rq = static_cast<HipRunQueueStream*>(stream);
375 ARCCORE_CHECK_HIP(hipEventRecord(m_hip_event, rq->trueStream()));
376 }
377
378 void wait() final
379 {
380 ARCCORE_CHECK_HIP(hipEventSynchronize(m_hip_event));
381 }
382
383 void waitForEvent(impl::IRunQueueStream* stream) final
384 {
385 auto* rq = static_cast<HipRunQueueStream*>(stream);
386 ARCCORE_CHECK_HIP(hipStreamWaitEvent(rq->trueStream(), m_hip_event, 0));
387 }
388
389 Int64 elapsedTime(IRunQueueEventImpl* from_event) final
390 {
391 auto* true_from_event = static_cast<HipRunQueueEvent*>(from_event);
392 ARCCORE_CHECK_POINTER(true_from_event);
393 float time_in_ms = 0.0;
394 ARCCORE_CHECK_HIP(hipEventElapsedTime(&time_in_ms, true_from_event->m_hip_event, m_hip_event));
395 double x = time_in_ms * 1.0e6;
396 Int64 nano_time = static_cast<Int64>(x);
397 return nano_time;
398 }
399
400 bool hasPendingWork() final
401 {
402 hipError_t v = hipEventQuery(m_hip_event);
403 if (v == hipErrorNotReady)
404 return true;
405 ARCCORE_CHECK_HIP(v);
406 return false;
407 }
408
409 private:
410
411 hipEvent_t m_hip_event;
412};
413
414/*---------------------------------------------------------------------------*/
415/*---------------------------------------------------------------------------*/
416
419{
420 public:
421
422 ~HipRunnerRuntime() override = default;
423
424 public:
425
426 void notifyBeginLaunchKernel() override
427 {
428 ++m_nb_kernel_launched;
429 if (m_is_verbose)
430 std::cout << "BEGIN HIP KERNEL!\n";
431 }
432 void notifyEndLaunchKernel() override
433 {
434 ARCCORE_CHECK_HIP(hipGetLastError());
435 if (m_is_verbose)
436 std::cout << "END HIP KERNEL!\n";
437 }
438 void barrier() override
439 {
440 ARCCORE_CHECK_HIP(hipDeviceSynchronize());
441 }
442 eExecutionPolicy executionPolicy() const override
443 {
445 }
446 impl::IRunQueueStream* createStream(const RunQueueBuildInfo& bi) override
447 {
448 return new HipRunQueueStream(this, bi);
449 }
450 impl::IRunQueueEventImpl* createEventImpl() override
451 {
452 return new HipRunQueueEvent(false);
453 }
454 impl::IRunQueueEventImpl* createEventImplWithTimer() override
455 {
456 return new HipRunQueueEvent(true);
457 }
458 void setMemoryAdvice(ConstMemoryView buffer, eMemoryAdvice advice, DeviceId device_id) override
459 {
460 auto v = buffer.bytes();
461 const void* ptr = v.data();
462 size_t count = v.size();
463 int device = device_id.asInt32();
464 hipMemoryAdvise hip_advise;
465
466 if (advice == eMemoryAdvice::MostlyRead)
467 hip_advise = hipMemAdviseSetReadMostly;
469 hip_advise = hipMemAdviseSetPreferredLocation;
470 else if (advice == eMemoryAdvice::AccessedByDevice)
471 hip_advise = hipMemAdviseSetAccessedBy;
472 else if (advice == eMemoryAdvice::PreferredLocationHost) {
473 hip_advise = hipMemAdviseSetPreferredLocation;
474 device = hipCpuDeviceId;
475 }
476 else if (advice == eMemoryAdvice::AccessedByHost) {
477 hip_advise = hipMemAdviseSetAccessedBy;
478 device = hipCpuDeviceId;
479 }
480 else
481 return;
482 //std::cout << "MEMADVISE p=" << ptr << " size=" << count << " advise = " << hip_advise << " id = " << device << "\n";
483 ARCCORE_CHECK_HIP(hipMemAdvise(ptr, count, hip_advise, device));
484 }
485 void unsetMemoryAdvice(ConstMemoryView buffer, eMemoryAdvice advice, DeviceId device_id) override
486 {
487 auto v = buffer.bytes();
488 const void* ptr = v.data();
489 size_t count = v.size();
490 int device = device_id.asInt32();
491 hipMemoryAdvise hip_advise;
492
493 if (advice == eMemoryAdvice::MostlyRead)
494 hip_advise = hipMemAdviseUnsetReadMostly;
496 hip_advise = hipMemAdviseUnsetPreferredLocation;
497 else if (advice == eMemoryAdvice::AccessedByDevice)
498 hip_advise = hipMemAdviseUnsetAccessedBy;
499 else if (advice == eMemoryAdvice::PreferredLocationHost) {
500 hip_advise = hipMemAdviseUnsetPreferredLocation;
501 device = hipCpuDeviceId;
502 }
503 else if (advice == eMemoryAdvice::AccessedByHost) {
504 hip_advise = hipMemAdviseUnsetAccessedBy;
505 device = hipCpuDeviceId;
506 }
507 else
508 return;
509 ARCCORE_CHECK_HIP(hipMemAdvise(ptr, count, hip_advise, device));
510 }
511
512 void setCurrentDevice(DeviceId device_id) final
513 {
514 Int32 id = device_id.asInt32();
515 ARCCORE_FATAL_IF(!device_id.isAccelerator(), "Device {0} is not an accelerator device", id);
516 ARCCORE_CHECK_HIP(hipSetDevice(id));
517 }
518 const IDeviceInfoList* deviceInfoList() override { return &m_device_info_list; }
519
520 void getPointerAttribute(PointerAttribute& attribute, const void* ptr) override
521 {
522 hipPointerAttribute_t pa;
523 hipError_t ret_value = hipPointerGetAttributes(&pa, ptr);
524 auto mem_type = ePointerMemoryType::Unregistered;
525 // Si \a ptr n'a pas été alloué dynamiquement (i.e: il est sur la pile),
526 // hipPointerGetAttribute() retourne une erreur. Dans ce cas on considère
527 // la mémoire comme non enregistrée.
528 if (ret_value==hipSuccess){
529#if HIP_VERSION_MAJOR >= 6
530 auto rocm_memory_type = pa.type;
531#else
532 auto rocm_memory_type = pa.memoryType;
533#endif
534 if (pa.isManaged)
535 mem_type = ePointerMemoryType::Managed;
536 else if (rocm_memory_type == hipMemoryTypeHost)
537 mem_type = ePointerMemoryType::Host;
538 else if (rocm_memory_type == hipMemoryTypeDevice)
539 mem_type = ePointerMemoryType::Device;
540 }
541
542 //std::cout << "HIP Info: hip_memory_type=" << (int)pa.memoryType << " is_managed?=" << pa.isManaged
543 // << " flags=" << pa.allocationFlags
544 // << " my_memory_type=" << (int)mem_type
545 // << "\n";
546 _fillPointerAttribute(attribute, mem_type, pa.device,
547 ptr, pa.devicePointer, pa.hostPointer);
548 }
549
550 DeviceMemoryInfo getDeviceMemoryInfo(DeviceId device_id) override
551 {
552 int d = 0;
553 int wanted_d = device_id.asInt32();
554 ARCCORE_CHECK_HIP(hipGetDevice(&d));
555 if (d != wanted_d)
556 ARCCORE_CHECK_HIP(hipSetDevice(wanted_d));
557 size_t free_mem = 0;
558 size_t total_mem = 0;
559 ARCCORE_CHECK_HIP(hipMemGetInfo(&free_mem, &total_mem));
560 if (d != wanted_d)
561 ARCCORE_CHECK_HIP(hipSetDevice(d));
563 dmi.setFreeMemory(free_mem);
564 dmi.setTotalMemory(total_mem);
565 return dmi;
566 }
567
568 void pushProfilerRange(const String& name, [[maybe_unused]] Int32 color) override
569 {
570#ifdef ARCCORE_HAS_ROCTX
571 roctxRangePush(name.localstr());
572#endif
573 }
574 void popProfilerRange() override
575 {
576#ifdef ARCCORE_HAS_ROCTX
577 roctxRangePop();
578#endif
579 }
580
581 void finalize(ITraceMng* tm) override
582 {
583 finalizeHipMemoryAllocators(tm);
584 }
585
586 public:
587
588 void fillDevices(bool is_verbose);
589
590 private:
591
592 Int64 m_nb_kernel_launched = 0;
593 bool m_is_verbose = false;
594 impl::DeviceInfoList m_device_info_list;
595};
596
597/*---------------------------------------------------------------------------*/
598/*---------------------------------------------------------------------------*/
599
600void HipRunnerRuntime::
601fillDevices(bool is_verbose)
602{
603 int nb_device = 0;
604 ARCCORE_CHECK_HIP(hipGetDeviceCount(&nb_device));
605 std::ostream& omain = std::cout;
606 if (is_verbose)
607 omain << "ArcaneHIP: Initialize Arcane HIP runtime nb_available_device=" << nb_device << "\n";
608 for (int i = 0; i < nb_device; ++i) {
609 std::ostringstream ostr;
610 std::ostream& o = ostr;
611
612 hipDeviceProp_t dp;
613 ARCCORE_CHECK_HIP(hipGetDeviceProperties(&dp, i));
614
615 int has_managed_memory = 0;
616 ARCCORE_CHECK_HIP(hipDeviceGetAttribute(&has_managed_memory, hipDeviceAttributeManagedMemory, i));
617
618 // Le format des versions dans HIP est:
619 // HIP_VERSION = (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH)
620
621 int runtime_version = 0;
622 ARCCORE_CHECK_HIP(hipRuntimeGetVersion(&runtime_version));
623 //runtime_version /= 10000;
624 int runtime_major = runtime_version / 10000000;
625 int runtime_minor = (runtime_version / 100000) % 100;
626
627 int driver_version = 0;
628 ARCCORE_CHECK_HIP(hipDriverGetVersion(&driver_version));
629 //driver_version /= 10000;
630 int driver_major = driver_version / 10000000;
631 int driver_minor = (driver_version / 100000) % 100;
632
633 o << "\nDevice " << i << " name=" << dp.name << "\n";
634 o << " Driver version = " << driver_major << "." << (driver_minor) << "." << (driver_version % 100000) << "\n";
635 o << " Runtime version = " << runtime_major << "." << (runtime_minor) << "." << (runtime_version % 100000) << "\n";
636 o << " computeCapability = " << dp.major << "." << dp.minor << "\n";
637 o << " totalGlobalMem = " << dp.totalGlobalMem << "\n";
638 o << " sharedMemPerBlock = " << dp.sharedMemPerBlock << "\n";
639 o << " regsPerBlock = " << dp.regsPerBlock << "\n";
640 o << " warpSize = " << dp.warpSize << "\n";
641 o << " memPitch = " << dp.memPitch << "\n";
642 o << " maxThreadsPerBlock = " << dp.maxThreadsPerBlock << "\n";
643 o << " totalConstMem = " << dp.totalConstMem << "\n";
644 o << " clockRate = " << dp.clockRate << "\n";
645 //o << " deviceOverlap = " << dp.deviceOverlap<< "\n";
646 o << " multiProcessorCount = " << dp.multiProcessorCount << "\n";
647 o << " kernelExecTimeoutEnabled = " << dp.kernelExecTimeoutEnabled << "\n";
648 o << " integrated = " << dp.integrated << "\n";
649 o << " canMapHostMemory = " << dp.canMapHostMemory << "\n";
650 o << " computeMode = " << dp.computeMode << "\n";
651 o << " maxThreadsDim = " << dp.maxThreadsDim[0] << " " << dp.maxThreadsDim[1]
652 << " " << dp.maxThreadsDim[2] << "\n";
653 o << " maxGridSize = " << dp.maxGridSize[0] << " " << dp.maxGridSize[1]
654 << " " << dp.maxGridSize[2] << "\n";
655 o << " concurrentManagedAccess = " << dp.concurrentManagedAccess << "\n";
656 o << " directManagedMemAccessFromHost = " << dp.directManagedMemAccessFromHost << "\n";
657 o << " gcnArchName = " << dp.gcnArchName << "\n";
658 o << " pageableMemoryAccess = " << dp.pageableMemoryAccess << "\n";
659 o << " pageableMemoryAccessUsesHostPageTables = " << dp.pageableMemoryAccessUsesHostPageTables << "\n";
660 o << " hasManagedMemory = " << has_managed_memory << "\n";
661#if HIP_VERSION_MAJOR >= 6
662 o << " gpuDirectRDMASupported = " << dp.gpuDirectRDMASupported << "\n";
663 o << " hostNativeAtomicSupported = " << dp.hostNativeAtomicSupported << "\n";
664 o << " unifiedFunctionPointers = " << dp.unifiedFunctionPointers << "\n";
665#endif
666 {
667 hipDevice_t device;
668 ARCCORE_CHECK_HIP(hipDeviceGet(&device, i));
669 hipUUID device_uuid;
670 ARCCORE_CHECK_HIP(hipDeviceGetUuid(&device_uuid, device));
671 o << " deviceUuid=";
672 impl::printUUID(o, device_uuid.bytes);
673 o << "\n";
674 }
675
676 String description(ostr.str());
677 if (is_verbose)
678 omain << description;
679
680 DeviceInfo device_info;
681 device_info.setDescription(description);
682 device_info.setDeviceId(DeviceId(i));
683 device_info.setName(dp.name);
684 device_info.setWarpSize(dp.warpSize);
685 m_device_info_list.addDevice(device_info);
686 }
687}
688
689/*---------------------------------------------------------------------------*/
690/*---------------------------------------------------------------------------*/
691
693: public IMemoryCopier
694{
695 void copy(ConstMemoryView from, [[maybe_unused]] eMemoryResource from_mem,
696 MutableMemoryView to, [[maybe_unused]] eMemoryResource to_mem,
697 const RunQueue* queue) override
698 {
699 if (queue) {
700 queue->copyMemory(MemoryCopyArgs(to.bytes(), from.bytes()).addAsync(queue->isAsync()));
701 return;
702 }
703 // 'hipMemcpyDefault' sait automatiquement ce qu'il faut faire en tenant
704 // uniquement compte de la valeur des pointeurs. Il faudrait voir si
705 // utiliser \a from_mem et \a to_mem peut améliorer les performances.
706 ARCCORE_CHECK_HIP(hipMemcpy(to.data(), from.data(), from.bytes().size(), hipMemcpyDefault));
707 }
708};
709
710/*---------------------------------------------------------------------------*/
711/*---------------------------------------------------------------------------*/
712
713} // End namespace Arcane::Accelerator::Hip
714
715namespace
716{
718Arcane::Accelerator::Hip::HipMemoryCopier global_hip_memory_copier;
719}
720
721/*---------------------------------------------------------------------------*/
722/*---------------------------------------------------------------------------*/
723
724// Cette fonction est le point d'entrée utilisé lors du chargement
725// dynamique de cette bibliothèque
726extern "C" ARCCORE_EXPORT void
727arcaneRegisterAcceleratorRuntimehip(Arcane::Accelerator::RegisterRuntimeInfo& init_info)
728{
729 using namespace Arcane;
730 using namespace Arcane::Accelerator::Hip;
731 Arcane::Accelerator::impl::setUsingHIPRuntime(true);
732 Arcane::Accelerator::impl::setHIPRunQueueRuntime(&global_hip_runtime);
733 initializeHipMemoryAllocators();
735 MemoryUtils::setAcceleratorHostMemoryAllocator(&unified_memory_hip_memory_allocator);
737 mrm->setIsAccelerator(true);
738 mrm->setAllocator(eMemoryResource::UnifiedMemory, &unified_memory_hip_memory_allocator);
739 mrm->setAllocator(eMemoryResource::HostPinned, &host_pinned_hip_memory_allocator);
740 mrm->setAllocator(eMemoryResource::Device, &device_hip_memory_allocator);
741 mrm->setCopier(&global_hip_memory_copier);
742 global_hip_runtime.fillDevices(init_info.isVerbose());
743}
744
745/*---------------------------------------------------------------------------*/
746/*---------------------------------------------------------------------------*/
#define ARCCORE_CHECK_POINTER(ptr)
Macro retournant le pointeur ptr s'il est non nul ou lancant une exception s'il est nul.
#define ARCCORE_FATAL_IF(cond,...)
Macro envoyant une exception FatalErrorException si cond est vrai.
void _doInitializeDevice(bool default_use_memory_pool=false)
Initialisation pour la mémoire Device.
void _doInitializeHostPinned(bool default_use_memory_pool=false)
Initialisation pour la mémoire HostPinned.
void _doInitializeUVM(bool default_use_memory_pool=false)
Initialisation pour la mémoire UVM.
bool isHost() const
Indique si l'instance est associée à l'hôte.
bool isAccelerator() const
Indique si l'instance est associée à un accélérateur.
void copy(ConstMemoryView from, eMemoryResource from_mem, MutableMemoryView to, eMemoryResource to_mem, const RunQueue *queue) override
Copie les données de from vers to avec la queue queue.
void notifyBeginLaunchKernel(impl::RunCommandImpl &c) override
Notification avant le lancement de la commande.
bool _barrierNoException() override
Barrière sans exception. Retourne true en cas d'erreur.
void barrier() override
Bloque jusqu'à ce que toutes les actions associées à cette file soient terminées.
void notifyEndLaunchKernel(impl::RunCommandImpl &) override
Notification de fin de lancement de la commande.
void prefetchMemory(const MemoryPrefetchArgs &args) override
Effectue un pré-chargement d'une zone mémoire.
void copyMemory(const MemoryCopyArgs &args) override
Effectue une copie entre deux zones mémoire.
Impl::NativeStream nativeStream() override
Pointeur sur la structure interne dépendante de l'implémentation.
void * allocateMemory(size_t size) final
Alloue un bloc pour size octets.
void freeMemory(void *ptr, size_t size) final
Libère le bloc situé à l'adresse address contenant size octets.
Informations pour initialiser le runtime accélérateur.
bool isDefault() const
Indique si l'instance a uniquement les valeurs par défaut.
bool isAsync() const
Indique si la file d'exécution est asynchrone.
Definition RunQueue.cc:320
void copyMemory(const MemoryCopyArgs &args) const
Copie des informations entre deux zones mémoires.
Definition RunQueue.cc:237
Interface de l'implémentation d'un évènement.
Interface d'un flux d'exécution pour une RunQueue.
Interface du runtime associé à un accélérateur.
Vue constante sur une zone mémoire contigue contenant des éléments de taille fixe.
constexpr SpanType bytes() const
Vue sous forme d'octets.
constexpr const std::byte * data() const
Pointeur sur la zone mémoire.
Interface pour les copies mémoire avec support des accélérateurs.
Partie interne à Arcane de 'IMemoryRessourceMng'.
virtual void setAllocator(eMemoryResource r, IMemoryAllocator *allocator)=0
Positionne l'allocateur pour la ressource r.
virtual void setIsAccelerator(bool v)=0
Indique si un accélérateur est disponible.
virtual void setCopier(IMemoryCopier *copier)=0
Positionne l'instance gérant les copies.
virtual IMemoryResourceMngInternal * _internal()=0
Interface interne.
Interface du gestionnaire de traces.
Vue modifiable sur une zone mémoire contigue contenant des éléments de taille fixe.
constexpr std::byte * data() const
Pointeur sur la zone mémoire.
constexpr SpanType bytes() const
Vue sous forme d'octets.
constexpr __host__ __device__ pointer data() const noexcept
Pointeur sur le début de la vue.
Definition Span.h:537
constexpr __host__ __device__ SizeType size() const noexcept
Retourne la taille du tableau.
Definition Span.h:325
Chaîne de caractères unicode.
const char * localstr() const
Retourne la conversion de l'instance dans l'encodage UTF-8.
Definition String.cc:228
@ AccessedByHost
Indique que la zone mémoire est accédée par l'hôte.
@ PreferredLocationDevice
Privilégié le positionnement de la mémoire sur l'accélérateur.
@ MostlyRead
Indique que la zone mémoire est principalement en lecture seule.
@ PreferredLocationHost
Privilégié le positionnement de la mémoire sur l'hôte.
@ AccessedByDevice
Indique que la zone mémoire est accédée par l'accélérateur.
eExecutionPolicy
Politique d'exécution pour un Runner.
@ HIP
Politique d'exécution utilisant l'environnement HIP.
ARCCORE_COMMON_EXPORT IMemoryRessourceMng * getDataMemoryResourceMng()
Gestionnaire de ressource mémoire pour les données.
ARCCORE_COMMON_EXPORT IMemoryAllocator * setAcceleratorHostMemoryAllocator(IMemoryAllocator *a)
Positionne l'allocateur spécifique pour les accélérateurs.
ARCCORE_COMMON_EXPORT void setDefaultDataMemoryResource(eMemoryResource mem_resource)
Positionne la ressource mémoire utilisée pour l'allocateur mémoire des données.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
std::int64_t Int64
Type entier signé sur 64 bits.
eMemoryResource
Liste des ressources mémoire disponibles.
@ HostPinned
Alloue sur l'hôte.
@ UnifiedMemory
Alloue en utilisant la mémoire unifiée.
@ Device
Alloue sur le device.
std::int32_t Int32
Type entier signé sur 32 bits.
Espace de nom de Arccore.