ls1-MarDyn
ls1-MarDyn molecular dynamics code
VelocityCellProcessorRMM.h
1/*
2 * VelocityCellProcessorRMM.h
3 *
4 * Created on: 5 Oct 2017
5 * Author: tchipevn
6 */
7
8#ifndef SRC_INTEGRATORS_VELOCITYCELLPROCESSORRMM_H_
9#define SRC_INTEGRATORS_VELOCITYCELLPROCESSORRMM_H_
10
11#include "WrapOpenMP.h"
12#include "particleContainer/ParticleCell.h"
13#include "particleContainer/adapter/CellProcessor.h"
14#include "particleContainer/adapter/vectorization/SIMD_VectorizedCellProcessorHelpers.h"
15
16// just compute summv2 in a vectorized fashion in RMM
17
19public:
21 CellProcessor(0.0, 0.0), _N(0), _summv2(0.0) {
22
23 _threadData.resize(mardyn_get_max_threads());
24
25 #if defined(_OPENMP)
26 #pragma omp parallel
27 #endif
28 {
29 ThreadData * myown = new ThreadData();
30 const int myid = mardyn_get_thread_num();
31 _threadData[myid] = myown;
32 } // end pragma omp parallel
33 }
35 #if defined(_OPENMP)
36 #pragma omp parallel
37 #endif
38 {
39 const int myid = mardyn_get_thread_num();
40 delete _threadData[myid];
41 }
42 }
43
45
46 void processCellPair(ParticleCell& cell1, ParticleCell& cell2, bool sumAll = false) {}
47
48 double processSingleMolecule(Molecule* m1, ParticleCell& cell2) { return 0.0; }
49
51
53 #if defined(_OPENMP)
54 #pragma omp master
55 #endif
56 {
57 _N = 0;
58 _summv2 = 0.0;
59 } // end pragma omp master
60
61 global_log->debug() << "VelocityCellProcessorRMM::initTraversal()." << std::endl;
62 }
63
64 void endTraversal() {
65 vcp_real_accum glob_summv2 = 0.0;
66 unsigned long glob_N = 0;
67
68 #if defined(_OPENMP)
69 #pragma omp parallel reduction(+:glob_summv2, glob_N)
70 #endif
71 {
72 const int tid = mardyn_get_thread_num();
73
74 // reduce vectors and clear local variable
75 vcp_real_accum thread_summv2 = 0.0;
76
77 load_hSum_Store_Clear(&thread_summv2, _threadData[tid]->_thread_summv2V);
78
79 // add to global sum
80 glob_summv2 += thread_summv2;
81 glob_N += _threadData[tid]->_thread_N;
82 _threadData[tid]->_thread_N = 0;
83 } // end pragma omp parallel reduction
84
85 _summv2 = glob_summv2;
86 _N = glob_N;
87 }
88
90 // just compute the velocity sums
91
92 ParticleCellRMM & c = downcastCellReferenceRMM(cell);
93 CellDataSoARMM & soa = c.getCellDataSoA();
94
95 const size_t molNum = soa.getMolNum();
96 const size_t end_i = vcp_floor_to_vec_size(molNum);
97
98 const int tid = mardyn_get_thread_num();
99 ThreadData &my_threadData = *_threadData[tid];
100 my_threadData._thread_N += static_cast<unsigned long>(molNum);
101
102 const vcp_real_accum * const soa_v_x = soa.v_xBegin();
103 const vcp_real_accum * const soa_v_y = soa.v_yBegin();
104 const vcp_real_accum * const soa_v_z = soa.v_zBegin();
105
106 RealAccumVec sum_summv2 = RealAccumVec::zero();
107
108 size_t i = 0;
109 for (; i < end_i; i += VCP_VEC_SIZE) {
110 const RealAccumVec v_x = RealAccumVec::aligned_load(soa_v_x + i);
111 const RealAccumVec v_y = RealAccumVec::aligned_load(soa_v_y + i);
112 const RealAccumVec v_z = RealAccumVec::aligned_load(soa_v_z + i);
113
114 const RealAccumVec v2 = RealAccumVec::scal_prod(v_x, v_y, v_z, v_x, v_y, v_z);
115 sum_summv2 = sum_summv2 + v2;
116
117 }
118 const MaskCalcVec remainderMask = vcp_simd_getRemainderMask(soa.getMolNum());
119 if (remainderMask.movemask()) {
120 const RealAccumVec v_x = RealAccumVec::aligned_load_mask(soa_v_x + i, remainderMask);
121 const RealAccumVec v_y = RealAccumVec::aligned_load_mask(soa_v_y + i, remainderMask);
122 const RealAccumVec v_z = RealAccumVec::aligned_load_mask(soa_v_z + i, remainderMask);
123
124 const RealAccumVec v2 = RealAccumVec::scal_prod(v_x, v_y, v_z, v_x, v_y, v_z);
125 sum_summv2 = sum_summv2 + v2;
126 }
127 sum_summv2.aligned_load_add_store(&(my_threadData._thread_summv2V[0]));
128 }
129
131 public:
132 ThreadData() {
133 _thread_summv2V.resize(VCP_VEC_SIZE);
134
135 for (size_t j = 0; j < VCP_VEC_SIZE; ++j) {
136 _thread_summv2V[j] = 0.0;
137 }
138 _thread_N = 0;
139 }
140
141 AlignedArray<vcp_real_accum> _thread_summv2V;
142 unsigned long _thread_N;
143 };
144
145 unsigned long getN() const {
146 return _N;
147 }
148
149 double getSummv2() const {
150 return _summv2;
151 }
152private:
153 unsigned long _N;
154 double _summv2;
155 std::vector<ThreadData *> _threadData;
156};
157
158#endif /* SRC_INTEGRATORS_VELOCITYCELLPROCESSORRMM_H_ */
An aligned array.
Definition: AlignedArray.h:75
Structure of Arrays for single-center lennard-Jones molecules for the RMM run.
Definition: CellDataSoARMM.h:16
Definition: CellProcessor.h:29
FullMolecule modeled as LJ sphere with point polarities.
Definition: FullMolecule.h:18
FullParticleCell data structure. Renamed from ParticleCell.
Definition: FullParticleCell.h:49
Definition: ParticleCellRMM.h:8
Definition: VelocityCellProcessorRMM.h:130
Definition: VelocityCellProcessorRMM.h:18
void processCellPair(ParticleCell &cell1, ParticleCell &cell2, bool sumAll=false)
Definition: VelocityCellProcessorRMM.h:46
void processCell(ParticleCell &cell)
Definition: VelocityCellProcessorRMM.h:89
void preprocessCell(ParticleCell &cell)
Definition: VelocityCellProcessorRMM.h:44
void endTraversal()
Definition: VelocityCellProcessorRMM.h:64
void postprocessCell(ParticleCell &cell)
Definition: VelocityCellProcessorRMM.h:50
void initTraversal()
Definition: VelocityCellProcessorRMM.h:52
Definition: MaskVec.h:16
Definition: RealVec.h:22