ls1-MarDyn
ls1-MarDyn molecular dynamics code
UniformPseudoParticleContainer.h
1/*
2 * UniformPseudoParticleContainer.h
3 *
4 * Created on: Feb 5, 2015
5 * Author: tchipevn
6 */
7
8#ifndef UNIFORMPSEUDOPARTICLECONTAINER_H_
9#define UNIFORMPSEUDOPARTICLECONTAINER_H_
10
11#include "PseudoParticleContainer.h"
12#include "LeafNodesContainer.h"
13#include "parallel/DomainDecompBase.h"
14#include "bhfmm/utils/WignerMatrix.h"
15#include "bhfmm/utils/RotationParameter.h"
16
17#ifdef FMM_FFT
18#include "bhfmm/fft/FFTAccelerationAPI.h"
19#include "bhfmm/fft/FFTAccelerationAPI_extensions.h"
20#include "bhfmm/fft/FFTSettings.h"
21#include "bhfmm/fft/FFTFactory.h"
22#include "bhfmm/fft/FFTOrderReduction.h"
23#include "bhfmm/fft/TransferFunctionManagerAPI.h"
24#include <bhfmm/FastMultipoleMethod.h>
25#endif /* FMM_FFT */
26
27#include <vector>
28#include <map>
29#include "bhfmm/HaloBufferNoOverlap.h"
30#include "bhfmm/HaloBufferOverlap.h"
31
32class Domain;
34
35namespace bhfmm {
37public:
38 UniformPseudoParticleContainer(double domainLength[3],
39 double bBoxMin[3],
40 double bBoxMax[3],
41 double LJCellLength[3],
42 unsigned LJSubdivisionFactor,
43 int orderOfExpansions,
44 ParticleContainer* ljContainer,
45 bool periodic = true
46#ifdef QUICKSCHED
47 , qsched *scheduler = nullptr
48#endif
49 );
51
52 void clear();
53 void build(ParticleContainer* pc);
54 void upwardPass(P2MCellProcessor * cp);
55 void horizontalPass(VectorizedChargeP2PCellProcessor * cp);
56 void downwardPass(L2PCellProcessor *cp);
57
58 // P2M
59 void processMultipole(ParticleCellPointers& cell);
60
61 // L2P
62 void processFarField(ParticleCellPointers& cell);
63
64 // M2M, M2L, L2L
65 void processTree();
66
67 //prints timer values to the standard output
68 void printTimers();
69
70 std::vector<std::vector<MpCell>> &getMpCellGlobalTop() ;
71
72#ifdef FMM_FFT
73 FFTAccelerationAPI *getFFTAcceleration() ;
74#endif
75
76 template<bool UseVectorization, bool UseTFMemoization, bool UseM2L_2way, bool UseOrderReduction>
77 void M2LTowerPlateStep(int m1Loop, int mpCells, int curLevel);
78
79// stuff used by Quicksched
80 void M2MCompleteCell(int targetId, int level, int cellsPerDim);
81 void P2MCompleteCell(int sourceId);
82 void M2LCompleteCell(int targetId, int level, int cellsPerDimension);
83 void M2LPair2Way(int cellA, int cellB, int level, int cellsPerDimension);
84 void L2LCompleteCell(int sourceId, int level, int cellsPerDimension);
85 void L2PCompleteCell(int targetId);
86 enum taskModelTypesM2L {
87 CompleteTarget,
88 Pair2Way
89 };
90
91private:
92 LeafNodesContainer* _leafContainer;
93 int _wellSep;
94 int _maxLevel; //number of tree levels
95 int _globalLevel; //number of levels in global tree
96 //StopLevel (only valid if avoidAllreduce=true):
97 //level in global tree that decides where to change between global abd local allreduce
98 //stopLevel = 1 -> only local reduces
99 //stopLevel = globalLevel + 1 -> only global reduce
100 //else local reduces from globalLevel till stopLevel and global reduce above
101 int _stopLevel;
102 //In the parallel version the octree is divided into two trees:
103 //- A local subtree starting at _globalLevel + 1 which contains only the
104 //ancestor nodes from the node that was assigned to the MPI process on
105 //the _globalLevel
106 //- A global tree which contains all the octree elements of the FMM tree
107 //from level 0 to _globalLevel
108 std::vector<std::vector<MpCell> > _mpCellGlobalTop;
109 std::vector<std::vector<MpCell> > _mpCellLocal;
110 double _cellLength[3];
111 int _globalNumCellsPerDim;
112 Domain* _domain;
113 int _globalNumCells; //total amount of cells
114 int* _occVector; // array for MPI allgather
115
116 int _coeffVectorLength; //size of MPI buffer for multipole coefficients
117 int _expansionSize; //size of one local or multipole expansion in doubles
118 double* _coeffVector; // array for MPI allgather
119 double* _coeffVector_me;
120#ifdef ENABLE_MPI
121 HaloBufferNoOverlap<double> * _multipoleRecBuffer, *_multipoleBuffer; //Buffer with use for non-overlapping communication
122 HaloBufferOverlap<double> * _multipoleRecBufferOverlap, * _multipoleBufferOverlap, * _multipoleBufferOverlapGlobal, * _multipoleRecBufferOverlapGlobal; //Buffers for receiving and sending of global and local tree halos
123 MPI_Request _allReduceRequest; //request that is used to check if Iallreduce of global reduce has finished
124#endif
125 bool _periodicBC;
126 bool _avoidAllReduce; //if true then local reduces are performed according to stopping level; if false global allreduce is performed
127 bool _importWholeGlobalRegion; //indicates if import for whole parent region should be imported (currently used when number of processor not a power of 2)
128 bool _fuseGlobalCommunication; //indicates if fuse algorithm should be applied to reduce communication partners -> collect all values of parent region
129 Vector3<int> _numProcessorsPerDim; //number of processors in every dimension
130 Vector3<int> _numCellsOnGlobalLevel; //number of cells every processor owns on global level for each dimension (rectangular region)
131 Vector3<int> _processorPositionGlobalLevel; //position of the processor on the global level
132 Vector3<double> _bBoxMin; //minimum coordinates of the bounding box
133 std::vector<int> _neighbours; //vector storing MPI ranks of neighbour ranks
134 std::vector<std::vector<std::vector<int>>> _allRanks; //3 dimensional vector storing the whole 3 dimensional grid of mpi ranks
135 int _globalLevelNumCells; //number of cells on the complete global level
136
137#ifdef FMM_FFT
139 FFTAccelerationAPI* _FFTAcceleration;
140#endif /* FMM_FFT */
141
142
143 // M2M
144 void CombineMpCell_Global(double *cellWid, int mpCells, int curLevel);
145
146 // M2M
147 void CombineMpCell_Local(double *cellWid, Vector3<int> localMpCells, int curLevel, Vector3<int> offset);
148
149 // M2L
150 void GatherWellSepLo_Global(double *cellWid, int mpCells, int curLevel);
151
152 // M2L
153 void GatherWellSepLo_Local(double *cellWid, Vector3<int> localMpCells, int curLevel, int doHalos);
154
155
156#ifdef FMM_FFT
157 // M2L
158 void GatherWellSepLo_FFT_Global(double *cellWid, int mpCells, int curLevel);
159 // M2L
160 void GatherWellSepLo_FFT_Local(double *cellWid, Vector3<int> localMpCells, int curLevel, int doHalos);
161
162 template<bool UseVectorization, bool UseTFMemoization, bool UseM2L_2way, bool UseOrderReduction>
163 void GatherWellSepLo_FFT_Global_template(double *cellWid, int mpCells, int curLevel);
164
165 template<bool UseVectorization, bool UseTFMemoization, bool UseM2L_2way, bool UseOrderReduction>
166 void GatherWellSepLo_FFT_Local_template(double *cellWid, Vector3<int> localMpCells, int curLevel, int doHalos);
167#endif /* FMM_FFT */
168
169 // L2L
170 void PropagateCellLo_Global(double *cellWid, int mpCells, int curLevel);
171
172 // L2L
173 void PropagateCellLo_Local(double *cellWid, Vector3<int> localMpCells, int curLevel, Vector3<int> offset);
174
175 // for parallelization
176 void AllReduceMultipoleMoments();
177 void AllReduceLocalMoments(int mpCells, int _curLevel);
184 void AllReduceMultipoleMomentsLevelToTop(int mpCells, int _curLevel);
190 void AllReduceMultipoleMomentsSetValues(int mpCells, int _curLevel);
191
202 void getHaloValues(Vector3<int> localMpCellsBottom,int bottomLevel, double *buffer,
203 int xLow, int xHigh, int yLow, int yHigh, int zLow, int zHigh, bool doLocalExpansion);
214 void setHaloValues(Vector3<int> localMpCellsBottom,int bottomLevel, double *bufferRec,
215 int xLow, int xHigh, int yLow, int yHigh, int zLow, int zHigh, bool doLocalExpansion);
216 //for parallelization
217 void communicateHalosNoOverlap();
218 void communicateHalosOverlapStart(); //start communication with overlap for local tree
219 void communicateHalosOverlapPostProcessingStart(); //backward communication start (asynchronous)
220 void communicateHalosOverlapPostProcessingSetHalos(); //backward communication finish (set values)
221 //has to be called after receive finished
222 void communicateHalosOverlapSetHalos(); //finish communication with overlap (set values)
223 void communicateHalos();
228 void communicateOwnGlobalValue(int stopLevel = 1, bool receive = false);
233 void communicateHaloGlobalValues(int stopLevel = 1, bool send = false);
234
235 void communicateHalosX();
236 void communicateHalosY();
237 void communicateHalosZ();
238 void communicateHalosAlongAxis(double * lowerNeighbourBuffer, double * higherNeighbourBuffer,
239 double * lowerNeighbourBufferRec, double * higherNeighbourBufferRec,
240 int lowerNeighbour, int higherNeighbour, int haloSize
241 );
242 bool _doNTLocal, _doNTGlobal; //indicate if NT method should be applied to the local tree part and/or the global tree part
243
249 int busyWaiting();
250
254 void initBusyWaiting(){
255 if((_avoidAllReduce && _stopLevel == 1) or _globalLevel == 0){
256 _allReduceProcessed = 1;
257 }
258 else{
259 _allReduceProcessed = 0;
260 }
261 _halosProcessed = 0;
262 _sendLocalProcessed = 0;
263 if(_globalLevel < 1 or !_avoidAllReduce or (_globalLevel == 1 and _fuseGlobalCommunication)){ // in this case only allreduce or nothing
264 _sendGlobalProcessed = 1;
265 }
266 else{
267 _sendGlobalProcessed = 0;
268 }
269 if(_doNTLocal){
270 _backCommunicationLocalProcessed = 0;
271 }
272 else{
273 _backCommunicationLocalProcessed = 1;
274 }
275 if(_doNTGlobal and _avoidAllReduce and not (_globalLevel == 1 and _fuseGlobalCommunication)){
276 _backCommunicationGlobalProcessed = 0;
277 }
278 else{
279 _backCommunicationGlobalProcessed = 1;
280 }
281 _backCommunicationLocalStarted = 0;
282 _backCommunicationGlobalStarted = 0;
283 if(_globalLevel < 1 or !_avoidAllReduce){
284 _globalHalosProcessed = 1;
285 }
286 else{
287 _globalHalosProcessed = 0;
288 }
289 }
290 //filter methods that detect if certain combinations of cells are not allowed with current strategy (NT or not NT) for local and global tree parts
291 bool filterM1Local(bool doHalos, int m1, int m1x, int m1y, int m1z, Vector3<int> localMpCells, int curLevel);
292 bool filterM2Local(bool doHalos, int m1, int m1x, int m1y, int m1z, int m2, int m2x, int m2y, int m2z, Vector3<int> localMpCells, int curLevel, bool inHaloz, bool inHaloy, bool inHalox);
293 bool filterM2Global(int curLevel, int *m2v, int *m1v, int m2x, int m2y, int m2z, int m2, int yOffset);
294
295 //returns optimal value for the stopLevel of local to global Allreduce
296 int optimizeAllReduce(/*ParticleContainer* ljContainer*/);
297 //bool flags to indicate which communications have started and which have already been finished
298 int _allReduceProcessed;
299 int _globalHalosProcessed;
300 int _halosProcessed; //local halos
301 int _sendLocalProcessed;
302 int _sendGlobalProcessed;
303 int _backCommunicationLocalProcessed;
304 int _backCommunicationLocalStarted;
305 int _backCommunicationGlobalProcessed;
306 int _backCommunicationGlobalStarted;
307
308#ifdef ENABLE_MPI
309
310 MPI_Comm _comm; //MPI communicator from domain decomposition
311 MPI_Comm * _neighbourhoodComms; //MPI communicator that stores for each level the 8 MPI ranks that need to communicate for local reduces
312 MPI_Comm * _allReduceComms; //MPI communicator that stores all MPI rank that need to communicate in global reduce for each possible stoplevel
313#endif
314 int _overlapComm; //indicates if overlap of communication is desired; Must be true currently!
315
316#ifdef QUICKSCHED
317 void generateResources(qsched *scheduler);
321 void generateP2MTasks(qsched* scheduler);
325 void generateM2MTasks(qsched* scheduler);
329 void generateP2PTasks(qsched *scheduler);
333 void generateM2LTasks(qsched *scheduler, taskModelTypesM2L taskModelM2L);
337 void generateL2LTasks(qsched *scheduler);
341 void generateL2PTasks(qsched *scheduler);
342#endif
343
344};
345
346} /* namespace bhfmm */
347
348#endif /* UNIFORMPSEUDOPARTICLECONTAINER_H_ */
handle boundary region and multiple processes
Definition: DomainDecompBase.h:51
This class is used to read in the phasespace and to handle macroscopic values.
Definition: Domain.h:47
Definition: FFTAccelerationAPI.h:22
Definition: HaloBufferNoOverlap.h:11
This Interface is used to get access to particles and pairs of particles.
Definition: ParticleContainer.h:69
Definition: TransferFunctionManagerAPI.h:11
Definition: HaloBufferOverlap.h:19
Definition: L2PCellProcessor.h:17
Definition: LeafNodesContainer.h:25
Definition: P2MCellProcessor.h:18
Definition: ParticleCellPointers.h:44
Definition: PseudoParticleContainer.h:46
Definition: UniformPseudoParticleContainer.h:36
Definition: Vector3.h:34
Vectorized calculation of the force.
Definition: VectorizedChargeP2PCellProcessor.h:32
Definition: L2PCellProcessor.cpp:15
::xsd::cxx::tree::buffer< char > buffer
Binary buffer type.
Definition: vtk-punstructured.h:363