8#ifndef SRC_INTEGRATORS_VELOCITYCELLPROCESSORRMM_H_
9#define SRC_INTEGRATORS_VELOCITYCELLPROCESSORRMM_H_
11#include "WrapOpenMP.h"
12#include "particleContainer/ParticleCell.h"
13#include "particleContainer/adapter/CellProcessor.h"
14#include "particleContainer/adapter/vectorization/SIMD_VectorizedCellProcessorHelpers.h"
23 _threadData.resize(mardyn_get_max_threads());
30 const int myid = mardyn_get_thread_num();
31 _threadData[myid] = myown;
39 const int myid = mardyn_get_thread_num();
40 delete _threadData[myid];
61 global_log->debug() <<
"VelocityCellProcessorRMM::initTraversal()." << std::endl;
65 vcp_real_accum glob_summv2 = 0.0;
66 unsigned long glob_N = 0;
69 #pragma omp parallel reduction(+:glob_summv2, glob_N)
72 const int tid = mardyn_get_thread_num();
75 vcp_real_accum thread_summv2 = 0.0;
77 load_hSum_Store_Clear(&thread_summv2, _threadData[tid]->_thread_summv2V);
80 glob_summv2 += thread_summv2;
81 glob_N += _threadData[tid]->_thread_N;
82 _threadData[tid]->_thread_N = 0;
85 _summv2 = glob_summv2;
95 const size_t molNum = soa.getMolNum();
96 const size_t end_i = vcp_floor_to_vec_size(molNum);
98 const int tid = mardyn_get_thread_num();
100 my_threadData._thread_N +=
static_cast<unsigned long>(molNum);
102 const vcp_real_accum *
const soa_v_x = soa.v_xBegin();
103 const vcp_real_accum *
const soa_v_y = soa.v_yBegin();
104 const vcp_real_accum *
const soa_v_z = soa.v_zBegin();
109 for (; i < end_i; i += VCP_VEC_SIZE) {
110 const RealAccumVec v_x = RealAccumVec::aligned_load(soa_v_x + i);
111 const RealAccumVec v_y = RealAccumVec::aligned_load(soa_v_y + i);
112 const RealAccumVec v_z = RealAccumVec::aligned_load(soa_v_z + i);
114 const RealAccumVec v2 = RealAccumVec::scal_prod(v_x, v_y, v_z, v_x, v_y, v_z);
115 sum_summv2 = sum_summv2 + v2;
118 const MaskCalcVec remainderMask = vcp_simd_getRemainderMask(soa.getMolNum());
119 if (remainderMask.movemask()) {
120 const RealAccumVec v_x = RealAccumVec::aligned_load_mask(soa_v_x + i, remainderMask);
121 const RealAccumVec v_y = RealAccumVec::aligned_load_mask(soa_v_y + i, remainderMask);
122 const RealAccumVec v_z = RealAccumVec::aligned_load_mask(soa_v_z + i, remainderMask);
124 const RealAccumVec v2 = RealAccumVec::scal_prod(v_x, v_y, v_z, v_x, v_y, v_z);
125 sum_summv2 = sum_summv2 + v2;
127 sum_summv2.aligned_load_add_store(&(my_threadData._thread_summv2V[0]));
133 _thread_summv2V.resize(VCP_VEC_SIZE);
135 for (
size_t j = 0; j < VCP_VEC_SIZE; ++j) {
136 _thread_summv2V[j] = 0.0;
142 unsigned long _thread_N;
145 unsigned long getN()
const {
149 double getSummv2()
const {
155 std::vector<ThreadData *> _threadData;
An aligned array.
Definition: AlignedArray.h:75
Structure of Arrays for single-center lennard-Jones molecules for the RMM run.
Definition: CellDataSoARMM.h:16
Definition: CellProcessor.h:29
FullMolecule modeled as LJ sphere with point polarities.
Definition: FullMolecule.h:18
FullParticleCell data structure. Renamed from ParticleCell.
Definition: FullParticleCell.h:49
Definition: ParticleCellRMM.h:8
Definition: VelocityCellProcessorRMM.h:130
Definition: VelocityCellProcessorRMM.h:18
void processCellPair(ParticleCell &cell1, ParticleCell &cell2, bool sumAll=false)
Definition: VelocityCellProcessorRMM.h:46
void processCell(ParticleCell &cell)
Definition: VelocityCellProcessorRMM.h:89
void preprocessCell(ParticleCell &cell)
Definition: VelocityCellProcessorRMM.h:44
void endTraversal()
Definition: VelocityCellProcessorRMM.h:64
void postprocessCell(ParticleCell &cell)
Definition: VelocityCellProcessorRMM.h:50
void initTraversal()
Definition: VelocityCellProcessorRMM.h:52