Arcane  v3.15.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
CommonCudaHipAtomicImpl.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* CommonCudaHipAtomicImpl.h (C) 2000-2024 */
9/* */
10/* Implémentation CUDA et HIP des opérations atomiques. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCANE_ACCELERATOR_COMMONCUDHIPATOMICIMPL_H
13#define ARCANE_ACCELERATOR_COMMONCUDHIPATOMICIMPL_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16
17// Ce fichier doit être inclus uniquement par 'arcane/accelerator/Reduce.h'
18// et n'est valide que compilé par le compilateur CUDA et HIP
19
20/*---------------------------------------------------------------------------*/
21/*---------------------------------------------------------------------------*/
22
23// Attention: avec ROCm et un GPU sur bus PCI express la plupart des
24// méthodes atomiques ne fonctionnent pas si le pointeur est allouée
25// en mémoire unifiée. A priori le problème se pose avec atomicMin, atomicMax,
26// atomicInc. Par contre atomicAdd a l'air de fonctionner si les accès
27// concurrents ne sont pas trop nombreux.
28
29/*---------------------------------------------------------------------------*/
30/*---------------------------------------------------------------------------*/
31
32namespace Arcane::Accelerator::impl
33{
34
35/*---------------------------------------------------------------------------*/
36/*---------------------------------------------------------------------------*/
37
38template <typename DataType, enum eAtomicOperation>
40
41template <typename DataType>
43template <typename DataType>
45template <typename DataType>
47
48template <>
50{
51 public:
52
53 static ARCCORE_DEVICE int apply(int* ptr, int v)
54 {
55 return ::atomicAdd(ptr, v);
56 }
57};
58
59template <>
61{
62 public:
63
64 static ARCCORE_DEVICE int apply(int* ptr, int v)
65 {
66 return ::atomicMax(ptr, v);
67 }
68};
69
70template <>
72{
73 public:
74
75 static ARCCORE_DEVICE int apply(int* ptr, int v)
76 {
77 return ::atomicMin(ptr, v);
78 }
79};
80
81template <>
83{
84 public:
85
86 static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
87 {
88 static_assert(sizeof(Int64) == sizeof(long long int), "Bad pointer size");
89 return static_cast<Int64>(::atomicAdd((unsigned long long int*)ptr, v));
90 }
91};
92
93template <>
95{
96 public:
97
98#if defined(__HIP__)
99 static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
100 {
101 unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(ptr);
102 unsigned long long int old = *address_as_ull, assumed;
103
104 do {
105 assumed = old;
106 Int64 assumed_as_int64 = static_cast<Int64>(assumed);
108 static_cast<unsigned long long int>(v > assumed_as_int64 ? v : assumed_as_int64));
109 } while (assumed != old);
110 return static_cast<Int64>(old);
111 }
112#else
113 static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
114 {
115 return static_cast<Int64>(::atomicMax((long long int*)ptr, v));
116 }
117#endif
118};
119
120template <>
122{
123 public:
124
125#if defined(__HIP__)
126 static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
127 {
128 unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(ptr);
129 unsigned long long int old = *address_as_ull, assumed;
130
131 do {
132 assumed = old;
133 Int64 assumed_as_int64 = static_cast<Int64>(assumed);
135 static_cast<unsigned long long int>(v < assumed_as_int64 ? v : assumed_as_int64));
136 } while (assumed != old);
137 return static_cast<Int64>(old);
138 }
139#else
140 static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
141 {
142 return static_cast<Int64>(::atomicMin((long long int*)ptr, v));
143 }
144#endif
145};
146
147// Les devices d'architecture inférieure à 6.0 ne supportent pas
148// les atomicAdd sur les 'double'.
149// Ce code est issu de la documentation NVIDIA (programming guide)
150__device__ inline double
151preArch60atomicAdd(double* address, double val)
152{
153 unsigned long long int* address_as_ull = (unsigned long long int*)address;
154 unsigned long long int old = *address_as_ull, assumed;
155
156 do {
157 assumed = old;
161 // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
162 } while (assumed != old);
163
165}
166__device__ inline double
167atomicMaxDouble(double* address, double val)
168{
169 unsigned long long int* address_as_ull = (unsigned long long int*)address;
170 unsigned long long int old = *address_as_ull, assumed;
171
172 do {
173 assumed = old;
177 // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
178 } while (assumed != old);
179
180 return __longlong_as_double(old);
181}
182
183__device__ inline double
184atomicMinDouble(double* address, double val)
185{
186 unsigned long long int* address_as_ull = (unsigned long long int*)address;
187 unsigned long long int old = *address_as_ull, assumed;
188
189 do {
190 assumed = old;
191 double assumed_as_double = __longlong_as_double(assumed);
192 old = atomicCAS(address_as_ull, assumed,
193 __double_as_longlong(val < assumed_as_double ? val : assumed_as_double));
194 // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
195 } while (assumed != old);
196
197 return __longlong_as_double(old);
198}
199
200template <>
202{
203 public:
204
205 static ARCCORE_DEVICE double apply(double* ptr, double v)
206 {
207#if __CUDA_ARCH__ >= 600
208 return ::atomicAdd(ptr, v);
209#else
210 return preArch60atomicAdd(ptr, v);
211#endif
212 }
213};
214
215template <>
217{
218 public:
219
220 static ARCCORE_DEVICE double apply(double* ptr, double v)
221 {
222 return atomicMaxDouble(ptr, v);
223 }
224};
225
226template <>
228{
229 public:
230
231 static ARCCORE_DEVICE double apply(double* ptr, double v)
232 {
233 return atomicMinDouble(ptr, v);
234 }
235};
236
237/*---------------------------------------------------------------------------*/
238/*---------------------------------------------------------------------------*/
239
240} // namespace Arcane::Accelerator::impl
241
242/*---------------------------------------------------------------------------*/
243/*---------------------------------------------------------------------------*/
244
245#endif
Lecteur des fichiers de maillage via la bibliothèque LIMA.
Definition Lima.cc:149
eAtomicOperation
Type d'opération atomique supportée.
std::int64_t Int64
Type entier signé sur 64 bits.