42 static const int BLOCK_SIZE = 4;
53 SSESimdX4Int32(__m128i _v0)
56 explicit SSESimdX4Int32(
Int32 a)
57 : v0(_mm_set1_epi32(a))
63 : v0(_mm_set_epi32(a3, a2, a1, a0))
68 SSESimdX4Int32(
const Int32* base,
const Int32* idx)
69 : v0(_mm_set_epi32(base[idx[3]], base[idx[2]], base[idx[1]], base[idx[0]]))
72 explicit SSESimdX4Int32(
const Int32* base)
73 : v0(_mm_load_si128((
const __m128i*)base))
79 void set(ARCANE_RESTRICT
Int32* base,
const ARCANE_RESTRICT
Int32* idx)
const
88 void set(ARCANE_RESTRICT
Int32* base)
const
91 _mm_store_si128((__m128i*)base, v0);
96 return SSESimdX4Int32(a3, a2, a1, a0);
101 void operator=(
Int32 _v);
118 static const int BLOCK_SIZE = 2;
129 SSESimdX2Real(__m128d _v0)
132 explicit SSESimdX2Real(
Real r)
139 : v0(_mm_set_pd(a1, a0))
144 SSESimdX2Real(
const Real* base,
const Int32* idx)
145 : v0(_mm_set_pd(base[idx[1]], base[idx[0]]))
147 SSESimdX2Real(
const Real* base,
const Int32IndexType* simd_idx)
150 v0 = _mm_set_pd(base[idx[1]], base[idx[0]]);
152 SSESimdX2Real(
const Real* base,
const Int32IndexType& simd_idx)
155 v0 = _mm_set_pd(base[idx[1]], base[idx[0]]);
157 SSESimdX2Real(
const Real* base)
159 v0 = _mm_load_pd(base);
165 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT
Int32* idx)
const
167 const Real* x = (
const Real*)(
this);
172 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT Int32IndexType& simd_idx)
const
174 this->set(base, &simd_idx);
177 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT Int32IndexType* simd_idx)
const
179 const Int32* idx = (
const ARCANE_RESTRICT
Int32*)simd_idx;
180 const Real* x = (
const Real*)(
this);
185 void set(ARCANE_RESTRICT
Real* base)
const
187 _mm_store_pd(base, v0);
190 static SSESimdX2Real fromScalar(
Real a0,
Real a1)
192 return SSESimdX2Real(a1, a0);
196 inline SSESimdX2Real operator-()
const
198 return SSESimdX2Real(_mm_sub_pd(_mm_setzero_pd(), v0));
203 void operator=(
Real _v);
220 static const int BLOCK_SIZE = 4;
236 SSESimdX4Real(__m128d _v0, __m128d _v1)
240 explicit SSESimdX4Real(
Real r)
248 : v0(_mm_set_pd(a1, a0))
249 , v1(_mm_set_pd(a3, a2))
254 SSESimdX4Real(
const Real* base,
const Int32* idx)
255 : v0(_mm_set_pd(base[idx[1]], base[idx[0]]))
256 , v1(_mm_set_pd(base[idx[3]], base[idx[2]]))
259 SSESimdX4Real(
const Real* base,
const Int32IndexType* simd_idx)
260 : SSESimdX4Real(base, (
const Int32*)simd_idx)
263 SSESimdX4Real(
const Real* base,
const Int32IndexType& simd_idx)
264 : SSESimdX4Real(base, (
const Int32*)&simd_idx)
267 SSESimdX4Real(
const Real* base)
268 : v0(_mm_load_pd(base))
269 , v1(_mm_load_pd(base + 2))
275 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT
Int32* idx)
const
277 const Real* x = (
const Real*)(
this);
284 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT Int32IndexType& simd_idx)
const
286 this->set(base, (
const Int32*)&simd_idx);
289 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT Int32IndexType* simd_idx)
const
291 this->set(base, (
const Int32*)simd_idx);
294 void set(ARCANE_RESTRICT
Real* base)
const
296 _mm_store_pd(base, v0);
297 _mm_store_pd(base + 2, v1);
302 return SSESimdX4Real(a3, a2, a1, a0);
306 inline SSESimdX4Real operator-()
const
308 return SSESimdX4Real(_mm_sub_pd(_mm_setzero_pd(), v0),
309 _mm_sub_pd(_mm_setzero_pd(), v1));
314 void operator=(
Real _v);
328 static const int BLOCK_SIZE = 8;
341 SSESimdX8Real(__m128d _v0, __m128d _v1, __m128d _v2, __m128d _v3)
347 explicit SSESimdX8Real(
Real r)
357 : v0(_mm_set_pd(a1, a0))
358 , v1(_mm_set_pd(a3, a2))
359 , v2(_mm_set_pd(a5, a4))
360 , v3(_mm_set_pd(a7, a6))
365 SSESimdX8Real(
const Real* base,
const Int32* idx)
367 v0 = _mm_set_pd(base[idx[1]], base[idx[0]]);
368 v1 = _mm_set_pd(base[idx[3]], base[idx[2]]);
369 v2 = _mm_set_pd(base[idx[5]], base[idx[4]]);
370 v3 = _mm_set_pd(base[idx[7]], base[idx[6]]);
376 void set(ARCANE_RESTRICT
Real* base,
const ARCANE_RESTRICT
Int32* idx)
const
378 const Real* x = (
const Real*)(
this);
391 return SSESimdX8Real(a7, a6, a5, a4, a3, a2, a1, a0);
395 inline SSESimdX8Real operator-()
const
397 return SSESimdX8Real(_mm_sub_pd(_mm_setzero_pd(), v0),
398 _mm_sub_pd(_mm_setzero_pd(), v1),
399 _mm_sub_pd(_mm_setzero_pd(), v2),
400 _mm_sub_pd(_mm_setzero_pd(), v3));
405 void operator=(
Real _v);