ls1-MarDyn
ls1-MarDyn molecular dynamics code
MaskVecDouble.h
1/*
2 * MaskVecDouble.h
3 *
4 * Created on: 6 Feb 2018
5 * Author: tchipevn
6 */
7
8#ifndef SRC_PARTICLECONTAINER_ADAPTER_VECTORIZATION_MASKVECDOUBLE_H_
9#define SRC_PARTICLECONTAINER_ADAPTER_VECTORIZATION_MASKVECDOUBLE_H_
10
11#include "MaskVec.h"
12
13// keep this file and MaskVecFloat as close as possible, so that they can be examined via diff!
14
15namespace vcp {
16
17template<>
18class MaskVec<double> {
19private:
20 // own typedefs necessary
21 #if VCP_VEC_WIDTH == VCP_VEC_W__64
22 typedef uint8_t mask_vec;
23 typedef uint8_t mask_single;
24 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
25 typedef __m128i mask_vec;
26 typedef uint64_t mask_single;
27 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
28 typedef __m256i mask_vec;
29 typedef uint64_t mask_single;
30 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
31
32 // these can't be put in std::conditional, because it would just be too nice. warnings.
33 typedef __mmask8 mask_vec;
34 typedef __mmask8 mask_single;
35
36 #if VCP_VEC_TYPE == VCP_VEC_KNL_GATHER or VCP_VEC_TYPE == VCP_VEC_AVX512F_GATHER
37 typedef __m512i lookupOrMask_vec;
38 typedef countertype32 lookupOrMask_single;
39 #endif
40 #endif
41
42 mask_vec _m;
43
44public:
45 vcp_inline
46 MaskVec() {}
47
48 vcp_inline
49 operator mask_vec() const {
50 return _m;
51 }
52
53 vcp_inline
54 MaskVec(const mask_vec & m) {
55 _m = m;
56 }
57
58 vcp_inline
59 static MaskVec zero() {
60 #if VCP_VEC_WIDTH == VCP_VEC_W__64
61 return 0;
62 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
63 return _mm_setzero_si128();
64 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
65 return _mm256_setzero_si256();
66 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
67 return 0x00;
68 #endif
69 }
70
71 vcp_inline
72 static MaskVec ones() {
73 #if VCP_VEC_WIDTH == VCP_VEC_W__64
74 return ~0;
75 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
76 return _mm_set_epi32(~0, ~0, ~0, ~0);
77 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
78 return _mm256_set_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0);
79 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
80 return 0xFF;
81 #endif
82 }
83
84 vcp_inline
85 MaskVec operator and (const MaskVec& rhs) const {
86 #if VCP_VEC_WIDTH == VCP_VEC_W__64
87 return _m & rhs;
88 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
89 return _mm_and_si128(_m, rhs);
90 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
91 return _mm256_castpd_si256(_mm256_and_pd(_mm256_castsi256_pd(_m), _mm256_castsi256_pd(rhs)));
92 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
93 return _m & rhs;
94 #endif
95 }
96
97 vcp_inline
98 MaskVec operator or (const MaskVec& rhs) const {
99 #if VCP_VEC_WIDTH == VCP_VEC_W__64
100 return _m | rhs;
101 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
102 return _mm_or_si128(_m, rhs);
103 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
104 return _mm256_castpd_si256(_mm256_or_pd(_mm256_castsi256_pd(_m), _mm256_castsi256_pd(rhs)));
105 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
106 return _m | rhs;
107 #endif
108 }
109
110 vcp_inline
111 MaskVec operator xor (const MaskVec & rhs) const {
112 #if VCP_VEC_WIDTH == VCP_VEC_W__64
113 return _m xor rhs;
114 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
115 return _mm_xor_si128(_m, rhs);
116 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
117 return _mm256_castpd_si256(_mm256_xor_pd(_mm256_castsi256_pd(_m), _mm256_castsi256_pd(rhs)));
118 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
119 return _m ^ rhs;
120 #endif
121 }
122
123 vcp_inline
124 static MaskVec aligned_load(const mask_single * const a) {
125 #if VCP_VEC_WIDTH == VCP_VEC_W__64
126 return *a;
127 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
128 return _mm_load_si128((const __m128i*)a);
129 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
130 return _mm256_load_si256((const __m256i*)a);
131 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
132 return *a; // is this used ?
133 #endif
134 }
135
136
137#if VCP_VEC_TYPE == VCP_VEC_KNL_GATHER or VCP_VEC_TYPE == VCP_VEC_AVX512F_GATHER
138 vcp_inline
139 static lookupOrMask_vec aligned_load(const lookupOrMask_single * const a) {
140 return _mm512_load_epi64(a);
141 }
142#endif
143
144 vcp_inline
145 void aligned_store(mask_single * location) const {
146 #if VCP_VEC_WIDTH == VCP_VEC_W__64
147 *location = _m;
148 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
149 _mm_store_si128((__m128i*)location, _m);
150 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
151 _mm256_store_si256((__m256i*)location, _m);
152 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
153 *location = _m; // is this used ?
154 #endif
155 }
156
157 vcp_inline
158 int movemask() const {
159 #if VCP_VEC_WIDTH == VCP_VEC_W__64
160 return _m != MaskVec::zero();
161 #elif VCP_VEC_WIDTH == VCP_VEC_W_128
162 return _mm_movemask_pd(_mm_castsi128_pd(_m));
163 #elif VCP_VEC_WIDTH == VCP_VEC_W_256
164 return _mm256_movemask_pd(_mm256_castsi256_pd(_m));
165 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
166 return _m != MaskVec::zero();
167 #endif
168 }
169
170 vcp_inline
171 int countUnmasked() const {
172 #if VCP_VEC_WIDTH == VCP_VEC_W__64
173 return _m;
174 #elif VCP_VEC_WIDTH == VCP_VEC_W_128 or VCP_VEC_WIDTH == VCP_VEC_W_256
175 return __builtin_popcount(movemask());
176 #elif VCP_VEC_WIDTH == VCP_VEC_W_512
177 return __builtin_popcount(_m);
178 #endif
179 }
180};
181
182} /* namespace vcp */
183
184#endif /* SRC_PARTICLECONTAINER_ADAPTER_VECTORIZATION_MASKVECDOUBLE_H_ */
Definition: MaskVec.h:16