46 static const int BLOCK_SIZE = 8;
55 AVXSimdX8Int32(__m256i _v0) : v0(_v0) {}
56 explicit AVXSimdX8Int32(
Int32 a) : v0(_mm256_set1_epi32(a)){}
59 : v0(_mm256_set_epi32(a7,a6,a5,a4,a3,a2,a1,a0)){}
61 AVXSimdX8Int32(
const Int32* base,
const Int32* idx)
62 : v0(_mm256_set_epi32(base[idx[7]],base[idx[6]],base[idx[5]],base[idx[4]],
63 base[idx[3]],base[idx[2]],base[idx[1]],base[idx[0]])) {}
64 explicit AVXSimdX8Int32(
const Int32* base)
65 : v0(_mm256_load_si256((
const __m256i*)base)){}
70 void set(ARCANE_RESTRICT
Int32* base,
const ARCANE_RESTRICT
Int32* idx)
const
83 void set(ARCANE_RESTRICT
Int32* base)
const
85 _mm256_store_si256((__m256i*)base,v0);
88 void load(
const AVXSimdX8Int32* base)
90 v0 = _mm256_load_si256((
const __m256i*)base);
96 return AVXSimdX8Int32(a7,a6,a5,a4,a3,a2,a1,a0);
100 void operator=(
Int32 _v);
113 static const int BLOCK_SIZE = 4;
122 AVXSimdX4Real(__m256d _v0)
124 explicit AVXSimdX4Real(
Real r)
125 : v0(_mm256_set1_pd(r)) { }
128 : v0(_mm256_set_pd(a3,a2,a1,a0)) { }
130 AVXSimdX4Real(
const Real* base,
const Int32* idx)
131 : v0(_mm256_set_pd(base[idx[3]],base[idx[2]],base[idx[1]],base[idx[0]])) {}
133 AVXSimdX4Real(
const Real* base,
const Int32IndexType& simd_idx)
134#ifdef ARCANE_USE_AVX2_GATHER
135 : v0(_mm256_i32gather_pd(base,simd_idx.v0,8)){}
137 : AVXSimdX4Real(base,(
const Int32*)&simd_idx){}
140 AVXSimdX4Real(
const Real* base,
const Int32IndexType* simd_idx)
141#ifdef ARCANE_USE_AVX2_GATHER
142 : v0(_mm256_i32gather_pd((
Real*)base,simd_idx->v0,8)){}
144 : AVXSimdX4Real(base,(
const Int32*)simd_idx){}
149 : v0(_mm256_load_pd(base)) { }
152 Real& operator[](Integer i) {
return ((Real*)&v0)[i]; }
154 void set(ARCANE_RESTRICT Real* base,
const Int32* idx)
const
157 const Real* x = (
const Real*)(
this);
165 __m128i idx0 = _mm_load_si128((__m128i*)idx);
166 _mm256_i32scatter_pd(base,idx0,v0, 8);
170 void set(ARCANE_RESTRICT Real* base,
const Int32IndexType& simd_idx)
const
172 this->set(base,&simd_idx);
175 void set(ARCANE_RESTRICT Real* base,
const Int32IndexType* simd_idx)
const
177 this->set(base,(
const Int32*)simd_idx);
182 _mm256_store_pd(base,v0);
191 inline AVXSimdX4Real operator- ()
const
193 return AVXSimdX4Real(_mm256_sub_pd(_mm256_setzero_pd(),v0));
197 void operator=(Real _v);
210 static const int BLOCK_SIZE = 8;
219 AVXSimdX8Real(__m256d _v0,__m256d _v1)
221 explicit AVXSimdX8Real(
Real r)
223 v0 = _mm256_set1_pd(r);
224 v1 = _mm256_set1_pd(r);
229 v0 = _mm256_set_pd(a3,a2,a1,a0);
230 v1 = _mm256_set_pd(a7,a6,a5,a4);
233 AVXSimdX8Real(
const Real* base,
const Int32* idx)
239 v0 = _mm256_set_pd(base[idx[3]],base[idx[2]],base[idx[1]],base[idx[0]]);
240 v1 = _mm256_set_pd(base[idx[7]],base[idx[6]],base[idx[5]],base[idx[4]]);
242 __m128i idx0 = _mm_loadu_si128((__m128i*)idx);
243 __m128i idx1 = _mm_loadu_si128((__m128i*)(idx+4));
244 v0 = _mm256_i32gather_pd((
Real*)base,idx0,8);
245 v1 = _mm256_i32gather_pd((
Real*)base,idx1,8);
252 v0 = _mm256_load_pd(base);
253 v1 = _mm256_load_pd(base+4);
257 Real& operator[](Integer i) {
return ((Real*)&v0)[i]; }
259 void set(ARCANE_RESTRICT Real* base,
const ARCANE_RESTRICT Int32* idx)
const
262 const Real* x = (
const Real*)(
this);
274 __m128i idx0 = _mm_loadu_si128((__m128i*)idx);
275 __m128i idx1 = _mm_loadu_si128((__m128i*)(idx+4));
276 _mm256_i32scatter_pd(base,idx0,v0, 8);
277 _mm256_i32scatter_pd(base,idx1,v1, 8);
284 _mm256_store_pd(base,v0);
285 _mm256_store_pd(base+4,v1);
295 inline AVXSimdX8Real operator- ()
const
297 return AVXSimdX8Real(_mm256_sub_pd(_mm256_setzero_pd(),v0),
298 _mm256_sub_pd(_mm256_setzero_pd(),v1));
302 void operator=(Real _v);