50 static const int BLOCK_SIZE = 8;
61 AVXSimdX8Int32(__m256i _v0)
64 explicit AVXSimdX8Int32(
Int32 a)
65 : v0(_mm256_set1_epi32(a))
71 : v0(_mm256_set_epi32(a7, a6, a5, a4, a3, a2, a1, a0))
76 AVXSimdX8Int32(
const Int32* base,
const Int32* idx)
77 : v0(_mm256_set_epi32(base[idx[7]], base[idx[6]], base[idx[5]], base[idx[4]],
78 base[idx[3]], base[idx[2]], base[idx[1]], base[idx[0]]))
80 explicit AVXSimdX8Int32(
const Int32* base)
81 : v0(_mm256_load_si256((
const __m256i*)base))
87 void set(ARCANE_RESTRICT
Int32* base,
const ARCANE_RESTRICT
Int32* idx)
const
100 void set(ARCANE_RESTRICT
Int32* base)
const
102 _mm256_store_si256((__m256i*)base, v0);
105 void load(
const AVXSimdX8Int32* base)
107 v0 = _mm256_load_si256((
const __m256i*)base);
113 return AVXSimdX8Int32(a7, a6, a5, a4, a3, a2, a1, a0);
118 void operator=(
Int32 _v);
133 static const int BLOCK_SIZE = 4;
144 AVXSimdX4Real(__m256d _v0)
147 explicit AVXSimdX4Real(
Real r)
148 : v0(_mm256_set1_pd(r))
154 : v0(_mm256_set_pd(a3, a2, a1, a0))
159 AVXSimdX4Real(
const Real* base,
const Int32* idx)
160 : v0(_mm256_set_pd(base[idx[3]], base[idx[2]], base[idx[1]], base[idx[0]]))
163 AVXSimdX4Real(
const Real* base,
const Int32IndexType& simd_idx)
164#ifdef ARCANE_USE_AVX2_GATHER
165 : v0(_mm256_i32gather_pd(base, simd_idx.v0, 8)){}
167 : AVXSimdX4Real(base, (
const Int32*)&simd_idx)
172 AVXSimdX4Real(
const Real* base,
const Int32IndexType* simd_idx)
173#ifdef ARCANE_USE_AVX2_GATHER
174 : v0(_mm256_i32gather_pd((
Real*)base, simd_idx->v0, 8)){}
176 : AVXSimdX4Real(base, (
const Int32*)simd_idx)
183 : v0(_mm256_load_pd(base))
187 Real& operator[](Integer i) {
return ((Real*)&v0)[i]; }
189 void set(ARCANE_RESTRICT Real* base,
const Int32* idx)
const
192 const Real* x = (
const Real*)(
this);
200 __m128i idx0 = _mm_load_si128((__m128i*)idx);
201 _mm256_i32scatter_pd(base, idx0, v0, 8);
205 void set(ARCANE_RESTRICT Real* base,
const Int32IndexType& simd_idx)
const
207 this->set(base, &simd_idx);
210 void set(ARCANE_RESTRICT Real* base,
const Int32IndexType* simd_idx)
const
212 this->set(base, (
const Int32*)simd_idx);
218 _mm256_store_pd(base, v0);
227 inline AVXSimdX4Real operator-()
const
229 return AVXSimdX4Real(_mm256_sub_pd(_mm256_setzero_pd(), v0));
234 void operator=(Real _v);
249 static const int BLOCK_SIZE = 8;
260 AVXSimdX8Real(__m256d _v0, __m256d _v1)
264 explicit AVXSimdX8Real(
Real r)
266 v0 = _mm256_set1_pd(r);
267 v1 = _mm256_set1_pd(r);
274 v0 = _mm256_set_pd(a3, a2, a1, a0);
275 v1 = _mm256_set_pd(a7, a6, a5, a4);
280 AVXSimdX8Real(
const Real* base,
const Int32* idx)
286 v0 = _mm256_set_pd(base[idx[3]], base[idx[2]], base[idx[1]], base[idx[0]]);
287 v1 = _mm256_set_pd(base[idx[7]], base[idx[6]], base[idx[5]], base[idx[4]]);
289 __m128i idx0 = _mm_loadu_si128((__m128i*)idx);
290 __m128i idx1 = _mm_loadu_si128((__m128i*)(idx + 4));
291 v0 = _mm256_i32gather_pd((
Real*)base, idx0, 8);
292 v1 = _mm256_i32gather_pd((
Real*)base, idx1, 8);
299 v0 = _mm256_load_pd(base);
300 v1 = _mm256_load_pd(base + 4);
304 Real& operator[](Integer i) {
return ((Real*)&v0)[i]; }
306 void set(ARCANE_RESTRICT Real* base,
const ARCANE_RESTRICT Int32* idx)
const
309 const Real* x = (
const Real*)(
this);
321 __m128i idx0 = _mm_loadu_si128((__m128i*)idx);
322 __m128i idx1 = _mm_loadu_si128((__m128i*)(idx + 4));
323 _mm256_i32scatter_pd(base, idx0, v0, 8);
324 _mm256_i32scatter_pd(base, idx1, v1, 8);
331 _mm256_store_pd(base, v0);
332 _mm256_store_pd(base + 4, v1);
342 inline AVXSimdX8Real operator-()
const
344 return AVXSimdX8Real(_mm256_sub_pd(_mm256_setzero_pd(), v0),
345 _mm256_sub_pd(_mm256_setzero_pd(), v1));
350 void operator=(Real _v);