Arcane  v4.1.1.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
LocalMemory.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* LocalMemory.h (C) 2000-2025 */
9/* */
10/* Mémoire locale à une RunCommand. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCANE_ACCELERATOR_LOCALMEMORY_H
13#define ARCANE_ACCELERATOR_LOCALMEMORY_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16
17#include "arcane/accelerator/core/RunCommand.h"
18
19#include "arccore/base/Span.h"
20
21/*---------------------------------------------------------------------------*/
22/*---------------------------------------------------------------------------*/
23
24namespace Arcane::Accelerator::Impl
25{
26#if defined(ARCANE_COMPILING_CUDA_OR_HIP)
27inline __device__ std::byte* _getAcceleratorSharedMemory()
28{
29 extern __shared__ Int64 shared_memory_ptr[];
30 return reinterpret_cast<std::byte*>(shared_memory_ptr);
31}
32#endif
33
35
36} // namespace Arcane::Accelerator::Impl
37
38/*---------------------------------------------------------------------------*/
39/*---------------------------------------------------------------------------*/
40
41namespace Arcane::Accelerator
42{
43
44/*---------------------------------------------------------------------------*/
45/*---------------------------------------------------------------------------*/
52template <typename T, Int32 Extent>
53class LocalMemory
54{
56
57 public:
58
59 static_assert(std::is_trivially_copyable_v<T>, "type T is not trivially copiable");
60
61 public:
62
63 using SpanType = SmallSpan<T, Extent>;
64 using RemainingArgHandlerType = Impl::LocalMemoryKernelRemainingArg;
65
66 public:
67
68 LocalMemory(RunCommand& command, Int32 size)
69 : m_size(size)
70 {
71 _addShareMemory(command);
72 }
73
74 explicit LocalMemory(RunCommand& command) requires(Extent != DynExtent)
75 {
76 _addShareMemory(command);
77 }
78
79 constexpr ARCCORE_HOST_DEVICE SmallSpan<T, Extent> span()
80 {
81 return { m_ptr, m_size.size() };
82 }
83
84 private:
85
86 T* m_ptr = nullptr;
87 // TODO: l'offset n'est utilisé on pourrait supprimer l'offset en le passant
92
93 protected:
94
95 void _addShareMemory(RunCommand& command)
96 {
97 m_offset = command._addSharedMemory(static_cast<Int32>(sizeof(T) * m_size.size()));
98 }
99};
100
101} // namespace Arcane::Accelerator
102
103/*---------------------------------------------------------------------------*/
104/*---------------------------------------------------------------------------*/
105
106namespace Arcane::Accelerator::Impl
107{
108
109/*---------------------------------------------------------------------------*/
110/*---------------------------------------------------------------------------*/
115{
116 public:
117
118 template <typename T, Int32 Extent> static void
119 execWorkItemAtBeginForHost(LocalMemory<T, Extent>& local_memory)
120 {
121 local_memory.m_ptr = new T[local_memory.m_size.size()];
122 }
123 template <typename T, Int32 Extent> static void
124 execWorkItemAtEndForHost(LocalMemory<T, Extent>& local_memory)
125 {
126 delete[] local_memory.m_ptr;
127 }
128
129#if defined(ARCANE_COMPILING_CUDA_OR_HIP)
130 template <typename T, Int32 Extent> static ARCCORE_DEVICE void
131 execWorkItemAtBeginForCudaHip(LocalMemory<T, Extent>& local_memory, Int32)
132 {
133 std::byte* begin = Impl::_getAcceleratorSharedMemory() + local_memory.m_offset;
134 local_memory.m_ptr = reinterpret_cast<T*>(begin);
135 }
136 template <typename T, Int32 Extent> static ARCCORE_DEVICE void
137 execWorkItemAtEndForCudaHip(LocalMemory<T, Extent>&, Int32)
138 {
139 }
140#endif
141
142#if defined(ARCANE_COMPILING_SYCL)
143 template <typename T, Int32 Extent> static void
144 execWorkItemAtBeginForSycl(LocalMemory<T, Extent>& local_memory,
145 sycl::nd_item<1>,
146 SmallSpan<std::byte> shm_view)
147 {
148 std::byte* begin = shm_view.ptrAt(local_memory.m_offset);
149 local_memory.m_ptr = reinterpret_cast<T*>(begin);
150 }
151 template <typename T, Int32 Extent> static void
152 execWorkItemAtEndForSycl(LocalMemory<T, Extent>&,
153 sycl::nd_item<1>,
155 {
156 }
157#endif
158};
159
160/*---------------------------------------------------------------------------*/
161/*---------------------------------------------------------------------------*/
162
163} // namespace Arcane::Accelerator::Impl
164
165/*---------------------------------------------------------------------------*/
166/*---------------------------------------------------------------------------*/
167
168#endif
Handler pour LocalMemory appelés en début et fin d'exécution de noyau.
Mémoire locale (shared) à une RunCommand.
Definition LocalMemory.h:54
Int32 m_offset
Offset depuis le début de la mémoire shared
Definition LocalMemory.h:89
::Arcane::Impl::ExtentStorage< Int32, Extent > m_size
Nombre d'éléments du tableau.
Definition LocalMemory.h:91
Spécialisation pour le nombre d'éléments connu à la compilation.
Definition Span.h:107
Vue d'un tableau d'éléments de type T.
Definition Span.h:801
constexpr __host__ __device__ pointer ptrAt(SizeType index) const
Adresse du index-ème élément.
Definition Span.h:360
Espace de nom pour l'utilisation des accélérateurs.
std::int64_t Int64
Type entier signé sur 64 bits.
constexpr Int32 DynExtent
Constante pour indiquer que la dimension d'un tableau est dynamique.
Definition BaseTypes.h:54
std::int32_t Int32
Type entier signé sur 32 bits.