ls1-MarDyn
ls1-MarDyn molecular dynamics code
SIMD_DEFINITIONS.h
Go to the documentation of this file.
1/***********************************************************************************/
10#pragma once
11
12#ifndef SIMD_DEFINITIONS_H
13#define SIMD_DEFINITIONS_H
14
15#if defined(__GNUC__)
16#define vcp_inline inline __attribute__((always_inline))
17#else
18#define vcp_inline inline
19#endif
20
21#ifdef IN_IDE_PARSER //just for the ide parser include the simd_types.h -- normally this is not done.
22 #include "./SIMD_TYPES.h"
23#endif
24
25#include "RealVec.h"
26
27namespace vcp {
30}
31
32#if VCP_PREC == VCP_SPDP
33#include "RealAccumVecSPDP.h"
34namespace vcp {
36}
37#else
38namespace vcp {
40}
41#endif
42
43// use constexpr instead of conditional compilation to death:
44
45constexpr size_t VCP_VEC_SIZE = sizeof(vcp::RealCalcVec) / sizeof(vcp_real_calc);
46constexpr size_t VCP_VEC_SIZE_M1 = VCP_VEC_SIZE - 1u;
47
48constexpr size_t VCP_INDICES_PER_LOOKUP_SINGLE = (VCP_VEC_TYPE != VCP_VEC_KNL) and (VCP_VEC_TYPE != VCP_VEC_AVX512F) ? 1u : VCP_VEC_SIZE;
49constexpr size_t VCP_INDICES_PER_LOOKUP_SINGLE_M1 = (VCP_VEC_TYPE != VCP_VEC_KNL) and (VCP_VEC_TYPE != VCP_VEC_AVX512F) ? 0u : VCP_VEC_SIZE_M1;
50
51constexpr size_t VCP_ALIGNMENT = (VCP_VEC_TYPE != VCP_NOVEC) ? sizeof(vcp::RealCalcVec) : 8u;
52
53#include <cmath>
54#include "sys/types.h"
55
56
57using namespace vcp;
58
59/*
60 * Check whether the file SIMD_TYPES.hpp has been included.
61 * This file (SIMD_DEFINITIONS.hpp) needs some macros to be set properly by that file (SIMD_TYPES.hpp).
62 */
63#ifndef SIMD_TYPES_H
64 #error "SIMD_DEFINITIONS included without SIMD_TYPES! Never include this file directly! Include it only via SIMD_TYPES!"
65#endif /* defined SIMD_TYPES_H */
66
67#if VCP_VEC_TYPE==VCP_NOVEC
68 static vcp_inline MaskCalcVec vcp_simd_getInitMask(const size_t& /*i*/){
69 return true;
70 }
71 static vcp_inline MaskCalcVec vcp_simd_getRemainderMask(const size_t& /*size*/){
72 return false;
73 }
74
75#elif VCP_VEC_TYPE==VCP_VEC_SSE3
76 #if VCP_PREC == VCP_SPSP or VCP_PREC == VCP_SPDP
77 static vcp_inline MaskCalcVec vcp_simd_getInitMask(const size_t& i){
78 switch (i & static_cast<size_t>(VCP_VEC_SIZE_M1)) {
79 case 0: return _mm_set_epi32(~0, ~0, ~0, ~0);
80 case 1: return _mm_set_epi32(~0, ~0, ~0, 0);
81 case 2: return _mm_set_epi32(~0, ~0, 0, 0);
82 default: return _mm_set_epi32(~0, 0, 0, 0);
83 }
84 }
85 static vcp_inline MaskCalcVec vcp_simd_getRemainderMask(const size_t& size) {
86 switch (size & static_cast<size_t>(VCP_VEC_SIZE_M1)) {
87 case 0: return _mm_set_epi32(0, 0, 0, 0);
88 case 1: return _mm_set_epi32(0, 0, 0, ~0);
89 case 2: return _mm_set_epi32(0, 0, ~0, ~0);
90 default: return _mm_set_epi32(0, ~0, ~0, ~0);
91 }
92 }
93 #else /*VCP_DPDP*/
94 static vcp_inline MaskCalcVec vcp_simd_getInitMask(const size_t& i){
95 switch (i & static_cast<size_t>(VCP_VEC_SIZE_M1)) {
96 case 0: return _mm_set_epi32(~0, ~0, ~0, ~0);
97 default: return _mm_set_epi32(~0, ~0, 0, 0);
98 }
99 }
100 static vcp_inline MaskCalcVec vcp_simd_getRemainderMask(const size_t& size) {
101 switch (size & static_cast<size_t>(VCP_VEC_SIZE_M1)) {
102 case 0: return _mm_set_epi32(0, 0, 0, 0);
103 default: return _mm_set_epi32(0, 0, ~0, ~0);
104 }
105 }
106 #endif
107#elif VCP_VEC_TYPE==VCP_VEC_AVX or VCP_VEC_TYPE==VCP_VEC_AVX2
108 #if VCP_PREC == VCP_SPSP or VCP_PREC == VCP_SPDP
109 static vcp_inline MaskCalcVec vcp_simd_getInitMask(const size_t& i){
110 switch (i & static_cast<size_t>(VCP_VEC_SIZE_M1)) {
111 case 0: return _mm256_set_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0);
112 case 1: return _mm256_set_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, 0);
113 case 2: return _mm256_set_epi32(~0, ~0, ~0, ~0, ~0, ~0, 0, 0);
114 case 3: return _mm256_set_epi32(~0, ~0, ~0, ~0, ~0, 0, 0, 0);
115 case 4: return _mm256_set_epi32(~0, ~0, ~0, ~0, 0, 0, 0, 0);
116 case 5: return _mm256_set_epi32(~0, ~0, ~0, 0, 0, 0, 0, 0);
117 case 6: return _mm256_set_epi32(~0, ~0, 0, 0, 0, 0, 0, 0);
118 default: return _mm256_set_epi32(~0, 0, 0, 0, 0, 0, 0, 0);
119 }
120 }
121 static vcp_inline MaskCalcVec vcp_simd_getRemainderMask(const size_t& size) {
122 switch (size & static_cast<size_t>(VCP_VEC_SIZE_M1)) {
123 case 0: return _mm256_set_epi32( 0, 0, 0, 0, 0, 0, 0, 0);
124 case 1: return _mm256_set_epi32( 0, 0, 0, 0, 0, 0, 0, ~0);
125 case 2: return _mm256_set_epi32( 0, 0, 0, 0, 0, 0, ~0, ~0);
126 case 3: return _mm256_set_epi32( 0, 0, 0, 0, 0, ~0, ~0, ~0);
127 case 4: return _mm256_set_epi32( 0, 0, 0, 0, ~0, ~0, ~0, ~0);
128 case 5: return _mm256_set_epi32( 0, 0, 0, ~0, ~0, ~0, ~0, ~0);
129 case 6: return _mm256_set_epi32( 0, 0, ~0, ~0, ~0, ~0, ~0, ~0);
130 default: return _mm256_set_epi32( 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0);
131 }
132 }
133 #else /* VCP_DPDP */
134 static vcp_inline MaskCalcVec vcp_simd_getInitMask(const size_t& i){
135 switch (i & static_cast<size_t>(VCP_VEC_SIZE_M1)) {
136 case 0: return _mm256_set_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0);
137 case 1: return _mm256_set_epi32(~0, ~0, ~0, ~0, ~0, ~0, 0, 0);
138 case 2: return _mm256_set_epi32(~0, ~0, ~0, ~0, 0, 0, 0, 0);
139 default: return _mm256_set_epi32(~0, ~0, 0, 0, 0, 0, 0, 0);
140 }
141 }
142 static vcp_inline MaskCalcVec vcp_simd_getRemainderMask(const size_t& size) {
143 switch (size & static_cast<size_t>(VCP_VEC_SIZE_M1)) {
144 case 0: return MaskCalcVec::zero();
145 case 1: return _mm256_set_epi32(0, 0, 0, 0, 0, 0, ~0, ~0);
146 case 2: return _mm256_set_epi32(0, 0, 0, 0, ~0, ~0, ~0, ~0);
147 default: return _mm256_set_epi32(0, 0, ~0, ~0, ~0, ~0, ~0, ~0);
148 }
149 }
150 #endif
151#elif VCP_VEC_WIDTH==VCP_VEC_W_512
152
153 #if VCP_PREC == VCP_SPSP or VCP_PREC == VCP_SPDP
154 static vcp_inline MaskCalcVec vcp_simd_getInitMask(const size_t& i){
155 static const MaskCalcVec possibleInitJMasks[VCP_VEC_SIZE] = { 0xFFFF, 0xFFFE, 0xFFFC, 0xFFF8, 0xFFF0, 0xFFE0, 0xFFC0, 0xFF80,
156 0xFF00, 0xFE00, 0xFC00, 0xF800, 0xF000, 0xE000, 0xC000, 0x8000 };
157 return possibleInitJMasks[i & static_cast<size_t>(VCP_VEC_SIZE_M1)];
158 }
159
160 static vcp_inline MaskCalcVec vcp_simd_getRemainderMask(const size_t& size) {
161 static const MaskCalcVec possibleRemainderJMasks[VCP_VEC_SIZE] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F,
162 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF };
163 return possibleRemainderJMasks[size & static_cast<size_t>(VCP_VEC_SIZE_M1)];
164 }
165 #else /* VCP_DPDP */
166 static vcp_inline MaskCalcVec vcp_simd_getInitMask(const size_t& i){
167 static const MaskCalcVec possibleInitJMasks[VCP_VEC_SIZE] = { 0xFF, 0xFE, 0xFC, 0xF8, 0xF0, 0xE0, 0xC0, 0x80 };
168 return possibleInitJMasks[i & static_cast<size_t>(VCP_VEC_SIZE_M1)];
169 }
170
171 static vcp_inline MaskCalcVec vcp_simd_getRemainderMask(const size_t& size) {
172 static const MaskCalcVec possibleRemainderJMasks[VCP_VEC_SIZE] = { 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F };
173 return possibleRemainderJMasks[size & static_cast<size_t>(VCP_VEC_SIZE_M1)];
174 }
175 #endif
176#endif
177
183template<class T>
184static vcp_inline T vcp_ceil_to_vec_size(const T& num){
185 return (num + static_cast<T>(VCP_VEC_SIZE_M1)) & (~~static_cast<T>(VCP_VEC_SIZE_M1));
186}
187
193template<class T>
194static vcp_inline T vcp_floor_to_vec_size(const T& num){
195 return num & (~~static_cast<T>(VCP_VEC_SIZE_M1));
196}
197
198#endif /* SIMD_DEFINITIONS_H */
Defines the length of the vectors and the corresponding functions.
Definition: MaskVec.h:16
Definition: RealAccumVecSPDP.h:294
Definition: RealVec.h:22