37 static const int BLOCK_SIZE = 4;
46 SSESimdX4Int32(__m128i _v0) : v0(_v0) {}
47 explicit SSESimdX4Int32(
Int32 a) : v0(_mm_set1_epi32(a)){}
50 : v0(_mm_set_epi32(a3,a2,a1,a0)){}
52 SSESimdX4Int32(
const Int32* base,
const Int32* idx)
53 : v0(_mm_set_epi32(base[idx[3]],base[idx[2]],base[idx[1]],base[idx[0]])) {}
55 explicit SSESimdX4Int32(
const Int32* base)
56 : v0(_mm_load_si128((
const __m128i*)base)){}
61 void set(ARCANE_RESTRICT
Int32* base,
const ARCANE_RESTRICT
Int32* idx)
const
70 void set(ARCANE_RESTRICT
Int32* base)
const
73 _mm_store_si128((__m128i*)base,v0);
78 return SSESimdX4Int32(a3,a2,a1,a0);
82 void operator=(
Int32 _v);
97 static const int BLOCK_SIZE = 2;
106 SSESimdX2Real(__m128d _v0)
108 explicit SSESimdX2Real(
Real r)
109 : v0(_mm_set1_pd(r)){}
112 : v0(_mm_set_pd(a1,a0)){}
114 SSESimdX2Real(
const Real* base,
const Int32* idx)
115 : v0(_mm_set_pd(base[idx[1]],base[idx[0]])) { }
116 SSESimdX2Real(
const Real* base,
const Int32IndexType* simd_idx)
119 v0 = _mm_set_pd(base[idx[1]],base[idx[0]]);
121 SSESimdX2Real(
const Real* base,
const Int32IndexType& simd_idx)
124 v0 = _mm_set_pd(base[idx[1]],base[idx[0]]);
126 SSESimdX2Real(
const Real* base)
128 v0 = _mm_load_pd(base);
134 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT
Int32* idx)
const
136 const Real* x = (
const Real*)(
this);
141 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT Int32IndexType& simd_idx)
const
143 this->set(base,&simd_idx);
146 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT Int32IndexType* simd_idx)
const
148 const Int32* idx = (
const ARCANE_RESTRICT
Int32*)simd_idx;
149 const Real* x = (
const Real*)(
this);
154 void set(ARCANE_RESTRICT
Real* base)
const
156 _mm_store_pd(base,v0);
159 static SSESimdX2Real fromScalar(
Real a0,
Real a1)
161 return SSESimdX2Real(a1,a0);
165 inline SSESimdX2Real operator- ()
const
167 return SSESimdX2Real(_mm_sub_pd(_mm_setzero_pd(),v0));
171 void operator=(
Real _v);
186 static const int BLOCK_SIZE = 4;
200 SSESimdX4Real(__m128d _v0,__m128d _v1)
201 : v0(_v0), v1(_v1) {}
202 explicit SSESimdX4Real(
Real r)
203 : v0(_mm_set1_pd(r)), v1(_mm_set1_pd(r)){}
206 : v0(_mm_set_pd(a1,a0)), v1(_mm_set_pd(a3,a2)){}
208 SSESimdX4Real(
const Real* base,
const Int32* idx)
209 : v0(_mm_set_pd(base[idx[1]],base[idx[0]]))
210 , v1(_mm_set_pd(base[idx[3]],base[idx[2]])){}
212 SSESimdX4Real(
const Real* base,
const Int32IndexType* simd_idx)
213 : SSESimdX4Real(base,(
const Int32*)simd_idx) {}
215 SSESimdX4Real(
const Real* base,
const Int32IndexType& simd_idx)
216 : SSESimdX4Real(base,(
const Int32*)&simd_idx) {}
218 SSESimdX4Real(
const Real* base)
219 : v0(_mm_load_pd(base)), v1(_mm_load_pd(base+2)) {}
224 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT
Int32* idx)
const
226 const Real* x = (
const Real*)(
this);
233 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT Int32IndexType& simd_idx)
const
235 this->set(base,(
const Int32*)&simd_idx);
238 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT Int32IndexType* simd_idx)
const
240 this->set(base,(
const Int32*)simd_idx);
243 void set(ARCANE_RESTRICT
Real* base)
const
245 _mm_store_pd(base,v0);
246 _mm_store_pd(base+2,v1);
251 return SSESimdX4Real(a3,a2,a1,a0);
255 inline SSESimdX4Real operator- ()
const
257 return SSESimdX4Real(_mm_sub_pd(_mm_setzero_pd(),v0),
258 _mm_sub_pd(_mm_setzero_pd(),v1));
261 void operator=(
Real _v);
273 static const int BLOCK_SIZE = 8;
284 SSESimdX8Real(__m128d _v0,__m128d _v1,__m128d _v2,__m128d _v3)
285 : v0(_v0), v1(_v1), v2(_v2), v3(_v3) {}
286 explicit SSESimdX8Real(
Real r)
287 : v0(_mm_set1_pd(r)), v1(_mm_set1_pd(r)), v2(_mm_set1_pd(r)), v3(_mm_set1_pd(r)){}
290 : v0(_mm_set_pd(a1,a0)), v1(_mm_set_pd(a3,a2)),
291 v2(_mm_set_pd(a5,a4)), v3(_mm_set_pd(a7,a6)){}
293 SSESimdX8Real(
const Real* base,
const Int32* idx)
295 v0 = _mm_set_pd(base[idx[1]],base[idx[0]]);
296 v1 = _mm_set_pd(base[idx[3]],base[idx[2]]);
297 v2 = _mm_set_pd(base[idx[5]],base[idx[4]]);
298 v3 = _mm_set_pd(base[idx[7]],base[idx[6]]);
304 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT
Int32* idx)
const
306 const Real* x = (
const Real*)(
this);
319 return SSESimdX8Real(a7,a6,a5,a4,a3,a2,a1,a0);
323 inline SSESimdX8Real operator- ()
const
325 return SSESimdX8Real(_mm_sub_pd(_mm_setzero_pd(),v0),
326 _mm_sub_pd(_mm_setzero_pd(),v1),
327 _mm_sub_pd(_mm_setzero_pd(),v2),
328 _mm_sub_pd(_mm_setzero_pd(),v3));
331 void operator=(
Real _v);