8#ifndef SRC_PARTICLECONTAINER_ADAPTER_VECTORIZATION_MASKVECDOUBLE_H_
9#define SRC_PARTICLECONTAINER_ADAPTER_VECTORIZATION_MASKVECDOUBLE_H_
21 #if VCP_VEC_WIDTH == VCP_VEC_W__64
22 typedef uint8_t mask_vec;
23 typedef uint8_t mask_single;
24 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
25 typedef __m128i mask_vec;
26 typedef uint64_t mask_single;
27 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
28 typedef __m256i mask_vec;
29 typedef uint64_t mask_single;
30 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
33 typedef __mmask8 mask_vec;
34 typedef __mmask8 mask_single;
36 #if VCP_VEC_TYPE == VCP_VEC_KNL_GATHER or VCP_VEC_TYPE == VCP_VEC_AVX512F_GATHER
37 typedef __m512i lookupOrMask_vec;
38 typedef countertype32 lookupOrMask_single;
49 operator mask_vec()
const {
60 #if VCP_VEC_WIDTH == VCP_VEC_W__64
62 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
63 return _mm_setzero_si128();
64 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
65 return _mm256_setzero_si256();
66 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
73 #if VCP_VEC_WIDTH == VCP_VEC_W__64
75 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
76 return _mm_set_epi32(~0, ~0, ~0, ~0);
77 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
78 return _mm256_set_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0);
79 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
86 #if VCP_VEC_WIDTH == VCP_VEC_W__64
88 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
89 return _mm_and_si128(_m, rhs);
90 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
91 return _mm256_castpd_si256(_mm256_and_pd(_mm256_castsi256_pd(_m), _mm256_castsi256_pd(rhs)));
92 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
99 #if VCP_VEC_WIDTH == VCP_VEC_W__64
101 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
102 return _mm_or_si128(_m, rhs);
103 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
104 return _mm256_castpd_si256(_mm256_or_pd(_mm256_castsi256_pd(_m), _mm256_castsi256_pd(rhs)));
105 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
112 #if VCP_VEC_WIDTH == VCP_VEC_W__64
114 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
115 return _mm_xor_si128(_m, rhs);
116 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
117 return _mm256_castpd_si256(_mm256_xor_pd(_mm256_castsi256_pd(_m), _mm256_castsi256_pd(rhs)));
118 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
124 static MaskVec aligned_load(
const mask_single *
const a) {
125 #if VCP_VEC_WIDTH == VCP_VEC_W__64
127 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
128 return _mm_load_si128((
const __m128i*)a);
129 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
130 return _mm256_load_si256((
const __m256i*)a);
131 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
137#if VCP_VEC_TYPE == VCP_VEC_KNL_GATHER or VCP_VEC_TYPE == VCP_VEC_AVX512F_GATHER
139 static lookupOrMask_vec aligned_load(
const lookupOrMask_single *
const a) {
140 return _mm512_load_epi64(a);
145 void aligned_store(mask_single * location)
const {
146 #if VCP_VEC_WIDTH == VCP_VEC_W__64
148 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
149 _mm_store_si128((__m128i*)location, _m);
150 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
151 _mm256_store_si256((__m256i*)location, _m);
152 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
158 int movemask()
const {
159 #if VCP_VEC_WIDTH == VCP_VEC_W__64
160 return _m != MaskVec::zero();
161 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
162 return _mm_movemask_pd(_mm_castsi128_pd(_m));
163 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
164 return _mm256_movemask_pd(_mm256_castsi256_pd(_m));
165 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
166 return _m != MaskVec::zero();
171 int countUnmasked()
const {
172 #if VCP_VEC_WIDTH == VCP_VEC_W__64
174 #elif VCP_VEC_WIDTH == VCP_VEC_W_128 or VCP_VEC_WIDTH == VCP_VEC_W_256
175 return __builtin_popcount(movemask());
176 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
177 return __builtin_popcount(_m);