Arcane  v3.15.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
SimdSSE.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2022 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* SimdSSE.h (C) 2000-2016 */
9/* */
10/* Vectorisation pour le SSE. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCANE_UTILS_SIMDSSE_H
13#define ARCANE_UTILS_SIMDSSE_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16/*
17 * Ce fichier ne doit pas être inclus directement.
18 * Utiliser 'Simd.h' à la place.
19 */
20/*---------------------------------------------------------------------------*/
21/*---------------------------------------------------------------------------*/
22
23ARCANE_BEGIN_NAMESPACE
24
25/*---------------------------------------------------------------------------*/
26/*---------------------------------------------------------------------------*/
35{
36 public:
37 static const int BLOCK_SIZE = 4;
38 enum
39 {
40 Length = 4,
41 Alignment = 16
42 };
43 public:
44 __m128i v0;
47 explicit SSESimdX4Int32(Int32 a) : v0(_mm_set1_epi32(a)){}
48 private:
49 SSESimdX4Int32(Int32 a3,Int32 a2,Int32 a1,Int32 a0)
50 : v0(_mm_set_epi32(a3,a2,a1,a0)){}
51 public:
52 SSESimdX4Int32(const Int32* base,const Int32* idx)
53 : v0(_mm_set_epi32(base[idx[3]],base[idx[2]],base[idx[1]],base[idx[0]])) {}
54 // TODO: faire la version non alignée
55 explicit SSESimdX4Int32(const Int32* base)
56 : v0(_mm_load_si128((const __m128i*)base)){}
57
58 Int32 operator[](Integer i) const { return ((const Int32*)&v0)[i]; }
59 Int32& operator[](Integer i) { return ((Int32*)&v0)[i]; }
60
61 void set(ARCANE_RESTRICT Int32* base,const ARCANE_RESTRICT Int32* idx) const
62 {
63 const Int32* x = (const Int32*)(this);
64 base[idx[0]] = x[0];
65 base[idx[1]] = x[1];
66 base[idx[2]] = x[2];
67 base[idx[3]] = x[3];
68 }
69
70 void set(ARCANE_RESTRICT Int32* base) const
71 {
72 // TODO: faire la version non alignée
73 _mm_store_si128((__m128i*)base,v0);
74 }
75
76 static SSESimdX4Int32 fromScalar(Int32 a0,Int32 a1,Int32 a2,Int32 a3)
77 {
78 return SSESimdX4Int32(a3,a2,a1,a0);
79 }
80
81 private:
82 void operator=(Int32 _v);
83};
84
85/*---------------------------------------------------------------------------*/
86/*---------------------------------------------------------------------------*/
95{
96 public:
97 static const int BLOCK_SIZE = 2;
98 enum
99 {
100 Length = 2
101 };
103 public:
104 __m128d v0;
105 SSESimdX2Real(){}
107 : v0(_v0){}
108 explicit SSESimdX2Real(Real r)
109 : v0(_mm_set1_pd(r)){}
110 private:
111 SSESimdX2Real(Real a1,Real a0)
112 : v0(_mm_set_pd(a1,a0)){}
113 public:
114 SSESimdX2Real(const Real* base,const Int32* idx)
115 : v0(_mm_set_pd(base[idx[1]],base[idx[0]])) { }
116 SSESimdX2Real(const Real* base,const Int32IndexType* simd_idx)
117 {
118 const Int32* idx = (const Int32*)simd_idx;
119 v0 = _mm_set_pd(base[idx[1]],base[idx[0]]);
120 }
121 SSESimdX2Real(const Real* base,const Int32IndexType& simd_idx)
122 {
123 const Int32* idx = (const Int32*)&simd_idx;
124 v0 = _mm_set_pd(base[idx[1]],base[idx[0]]);
125 }
126 SSESimdX2Real(const Real* base)
127 {
128 v0 = _mm_load_pd(base);
129 }
130
131 Real operator[](Integer i) const { return ((const Real*)&v0)[i]; }
132 Real& operator[](Integer i) { return ((Real*)&v0)[i]; }
133
134 void set(ARCANE_RESTRICT Real* base,const ARCANE_RESTRICT Int32* idx) const
135 {
136 const Real* x = (const Real*)(this);
137 base[idx[0]] = x[0];
138 base[idx[1]] = x[1];
139 }
140
141 void set(ARCANE_RESTRICT Real* base,const ARCANE_RESTRICT Int32IndexType& simd_idx) const
142 {
143 this->set(base,&simd_idx);
144 }
145
146 void set(ARCANE_RESTRICT Real* base,const ARCANE_RESTRICT Int32IndexType* simd_idx) const
147 {
148 const Int32* idx = (const ARCANE_RESTRICT Int32*)simd_idx;
149 const Real* x = (const Real*)(this);
150 base[idx[0]] = x[0];
151 base[idx[1]] = x[1];
152 }
153
154 void set(ARCANE_RESTRICT Real* base) const
155 {
156 _mm_store_pd(base,v0);
157 }
158
159 static SSESimdX2Real fromScalar(Real a0,Real a1)
160 {
161 return SSESimdX2Real(a1,a0);
162 }
163
164 // Unary operation operator-
165 inline SSESimdX2Real operator- () const
166 {
168 }
169
170 private:
171 void operator=(Real _v);
172};
173
174/*---------------------------------------------------------------------------*/
175/*---------------------------------------------------------------------------*/
184{
185 public:
186 static const int BLOCK_SIZE = 4;
187 enum
188 {
189 Length = 4
190 };
191 // NOTE: utiliser EMULSimd au lieu de SSE est beaucoup plus performant
192 // avec gcc 4.9 et gcc 6.1. Avec Intel 16, c'est le contraire mais la
193 // différence n'est pas énorme.
194 // typedef SSESimdX4Int32 Int32IndexType;
196 public:
197 __m128d v0;
198 __m128d v1;
199 SSESimdX4Real(){}
201 : v0(_v0), v1(_v1) {}
202 explicit SSESimdX4Real(Real r)
203 : v0(_mm_set1_pd(r)), v1(_mm_set1_pd(r)){}
204 private:
205 SSESimdX4Real(Real a3,Real a2,Real a1,Real a0)
206 : v0(_mm_set_pd(a1,a0)), v1(_mm_set_pd(a3,a2)){}
207 public:
208 SSESimdX4Real(const Real* base,const Int32* idx)
209 : v0(_mm_set_pd(base[idx[1]],base[idx[0]]))
210 , v1(_mm_set_pd(base[idx[3]],base[idx[2]])){}
211
212 SSESimdX4Real(const Real* base,const Int32IndexType* simd_idx)
213 : SSESimdX4Real(base,(const Int32*)simd_idx) {}
214
215 SSESimdX4Real(const Real* base,const Int32IndexType& simd_idx)
216 : SSESimdX4Real(base,(const Int32*)&simd_idx) {}
217
218 SSESimdX4Real(const Real* base)
219 : v0(_mm_load_pd(base)), v1(_mm_load_pd(base+2)) {}
220
221 Real operator[](Integer i) const { return ((const Real*)&v0)[i]; }
222 Real& operator[](Integer i) { return ((Real*)&v0)[i]; }
223
224 void set(ARCANE_RESTRICT Real* base,const ARCANE_RESTRICT Int32* idx) const
225 {
226 const Real* x = (const Real*)(this);
227 base[idx[0]] = x[0];
228 base[idx[1]] = x[1];
229 base[idx[2]] = x[2];
230 base[idx[3]] = x[3];
231 }
232
233 void set(ARCANE_RESTRICT Real* base,const ARCANE_RESTRICT Int32IndexType& simd_idx) const
234 {
235 this->set(base,(const Int32*)&simd_idx);
236 }
237
238 void set(ARCANE_RESTRICT Real* base,const ARCANE_RESTRICT Int32IndexType* simd_idx) const
239 {
240 this->set(base,(const Int32*)simd_idx);
241 }
242
243 void set(ARCANE_RESTRICT Real* base) const
244 {
245 _mm_store_pd(base,v0);
246 _mm_store_pd(base+2,v1);
247 }
248
249 static SSESimdX4Real fromScalar(Real a0,Real a1,Real a2,Real a3)
250 {
251 return SSESimdX4Real(a3,a2,a1,a0);
252 }
253
254 // Unary operation operator-
255 inline SSESimdX4Real operator- () const
256 {
259 }
260 private:
261 void operator=(Real _v);
262};
263
264/*---------------------------------------------------------------------------*/
265/*---------------------------------------------------------------------------*/
271{
272 public:
273 static const int BLOCK_SIZE = 8;
274 enum
275 {
276 Length = 8
277 };
278 public:
279 __m128d v0;
280 __m128d v1;
281 __m128d v2;
282 __m128d v3;
283 SSESimdX8Real(){}
285 : v0(_v0), v1(_v1), v2(_v2), v3(_v3) {}
286 explicit SSESimdX8Real(Real r)
287 : v0(_mm_set1_pd(r)), v1(_mm_set1_pd(r)), v2(_mm_set1_pd(r)), v3(_mm_set1_pd(r)){}
288 private:
289 SSESimdX8Real(Real a7,Real a6,Real a5,Real a4,Real a3,Real a2,Real a1,Real a0)
290 : v0(_mm_set_pd(a1,a0)), v1(_mm_set_pd(a3,a2)),
291 v2(_mm_set_pd(a5,a4)), v3(_mm_set_pd(a7,a6)){}
292 public:
293 SSESimdX8Real(const Real* base,const Int32* idx)
294 {
295 v0 = _mm_set_pd(base[idx[1]],base[idx[0]]);
296 v1 = _mm_set_pd(base[idx[3]],base[idx[2]]);
297 v2 = _mm_set_pd(base[idx[5]],base[idx[4]]);
298 v3 = _mm_set_pd(base[idx[7]],base[idx[6]]);
299 }
300
301 Real operator[](Integer i) const { return ((const Real*)&v0)[i]; }
302 Real& operator[](Integer i) { return ((Real*)&v0)[i]; }
303
304 void set(ARCANE_RESTRICT Real* base,const ARCANE_RESTRICT Int32* idx) const
305 {
306 const Real* x = (const Real*)(this);
307 base[idx[0]] = x[0];
308 base[idx[1]] = x[1];
309 base[idx[2]] = x[2];
310 base[idx[3]] = x[3];
311 base[idx[4]] = x[4];
312 base[idx[5]] = x[5];
313 base[idx[6]] = x[6];
314 base[idx[7]] = x[7];
315 }
316
317 static SSESimdX8Real fromScalar(Real a0,Real a1,Real a2,Real a3,Real a4,Real a5,Real a6,Real a7)
318 {
319 return SSESimdX8Real(a7,a6,a5,a4,a3,a2,a1,a0);
320 }
321
322 // Unary operation operator-
323 inline SSESimdX8Real operator- () const
324 {
329 }
330 private:
331 void operator=(Real _v);
332};
333
334/*---------------------------------------------------------------------------*/
335/*---------------------------------------------------------------------------*/
336
347
348/*---------------------------------------------------------------------------*/
349/*---------------------------------------------------------------------------*/
350
352{
353 public:
354 static const char* name() { return "SSE"; }
355 enum
356 {
357 Int32IndexSize = SSESimdReal::Length
358 };
359 typedef SSESimdReal SimdReal;
361};
362
363/*---------------------------------------------------------------------------*/
364/*---------------------------------------------------------------------------*/
365
366ARCANE_UTILS_EXPORT std::ostream&
367operator<<(std::ostream& o,const SSESimdReal& s);
368
369/*---------------------------------------------------------------------------*/
370/*---------------------------------------------------------------------------*/
371
372ARCANE_END_NAMESPACE
373
374/*---------------------------------------------------------------------------*/
375/*---------------------------------------------------------------------------*/
376
377#endif
#define ARCANE_ALIGNAS_PACKED(value)
Macro pour garantir le compactage et l'alignement d'une classe sur value octets.
Vectorisation des entiers en utilisant une émulation.
Definition SimdEMUL.h:35
Vectorisation des entiers en utilisant une émulation.
Definition SimdEMUL.h:88
Lecteur des fichiers de maillage via la bibliothèque LIMA.
Definition Lima.cc:149
Vectorisation des réels en utilisant SSE.
Definition SimdSSE.h:95
Vectorisation des entiers en utilisant SSE.
Definition SimdSSE.h:35
Vectorisation des réels en utilisant SSE.
Definition SimdSSE.h:184
Vecteur de 8 doubles avec implémentation SSE.
Definition SimdSSE.h:271