8#ifndef UNIFORMPSEUDOPARTICLECONTAINER_H_
9#define UNIFORMPSEUDOPARTICLECONTAINER_H_
11#include "PseudoParticleContainer.h"
12#include "LeafNodesContainer.h"
13#include "parallel/DomainDecompBase.h"
14#include "bhfmm/utils/WignerMatrix.h"
15#include "bhfmm/utils/RotationParameter.h"
18#include "bhfmm/fft/FFTAccelerationAPI.h"
19#include "bhfmm/fft/FFTAccelerationAPI_extensions.h"
20#include "bhfmm/fft/FFTSettings.h"
21#include "bhfmm/fft/FFTFactory.h"
22#include "bhfmm/fft/FFTOrderReduction.h"
23#include "bhfmm/fft/TransferFunctionManagerAPI.h"
24#include <bhfmm/FastMultipoleMethod.h>
29#include "bhfmm/HaloBufferNoOverlap.h"
30#include "bhfmm/HaloBufferOverlap.h"
41 double LJCellLength[3],
42 unsigned LJSubdivisionFactor,
43 int orderOfExpansions,
47 , qsched *scheduler =
nullptr
70 std::vector<std::vector<MpCell>> &getMpCellGlobalTop() ;
76 template<
bool UseVectorization,
bool UseTFMemoization,
bool UseM2L_2way,
bool UseOrderReduction>
77 void M2LTowerPlateStep(
int m1Loop,
int mpCells,
int curLevel);
80 void M2MCompleteCell(
int targetId,
int level,
int cellsPerDim);
81 void P2MCompleteCell(
int sourceId);
82 void M2LCompleteCell(
int targetId,
int level,
int cellsPerDimension);
83 void M2LPair2Way(
int cellA,
int cellB,
int level,
int cellsPerDimension);
84 void L2LCompleteCell(
int sourceId,
int level,
int cellsPerDimension);
85 void L2PCompleteCell(
int targetId);
86 enum taskModelTypesM2L {
108 std::vector<std::vector<MpCell> > _mpCellGlobalTop;
109 std::vector<std::vector<MpCell> > _mpCellLocal;
110 double _cellLength[3];
111 int _globalNumCellsPerDim;
116 int _coeffVectorLength;
118 double* _coeffVector;
119 double* _coeffVector_me;
122 HaloBufferOverlap<double> * _multipoleRecBufferOverlap, * _multipoleBufferOverlap, * _multipoleBufferOverlapGlobal, * _multipoleRecBufferOverlapGlobal;
123 MPI_Request _allReduceRequest;
126 bool _avoidAllReduce;
127 bool _importWholeGlobalRegion;
128 bool _fuseGlobalCommunication;
133 std::vector<int> _neighbours;
134 std::vector<std::vector<std::vector<int>>> _allRanks;
135 int _globalLevelNumCells;
144 void CombineMpCell_Global(
double *cellWid,
int mpCells,
int curLevel);
150 void GatherWellSepLo_Global(
double *cellWid,
int mpCells,
int curLevel);
153 void GatherWellSepLo_Local(
double *cellWid,
Vector3<int> localMpCells,
int curLevel,
int doHalos);
158 void GatherWellSepLo_FFT_Global(
double *cellWid,
int mpCells,
int curLevel);
160 void GatherWellSepLo_FFT_Local(
double *cellWid,
Vector3<int> localMpCells,
int curLevel,
int doHalos);
162 template<
bool UseVectorization,
bool UseTFMemoization,
bool UseM2L_2way,
bool UseOrderReduction>
163 void GatherWellSepLo_FFT_Global_template(
double *cellWid,
int mpCells,
int curLevel);
165 template<
bool UseVectorization,
bool UseTFMemoization,
bool UseM2L_2way,
bool UseOrderReduction>
166 void GatherWellSepLo_FFT_Local_template(
double *cellWid,
Vector3<int> localMpCells,
int curLevel,
int doHalos);
170 void PropagateCellLo_Global(
double *cellWid,
int mpCells,
int curLevel);
176 void AllReduceMultipoleMoments();
177 void AllReduceLocalMoments(
int mpCells,
int _curLevel);
184 void AllReduceMultipoleMomentsLevelToTop(
int mpCells,
int _curLevel);
190 void AllReduceMultipoleMomentsSetValues(
int mpCells,
int _curLevel);
203 int xLow,
int xHigh,
int yLow,
int yHigh,
int zLow,
int zHigh,
bool doLocalExpansion);
214 void setHaloValues(
Vector3<int> localMpCellsBottom,
int bottomLevel,
double *bufferRec,
215 int xLow,
int xHigh,
int yLow,
int yHigh,
int zLow,
int zHigh,
bool doLocalExpansion);
217 void communicateHalosNoOverlap();
218 void communicateHalosOverlapStart();
219 void communicateHalosOverlapPostProcessingStart();
220 void communicateHalosOverlapPostProcessingSetHalos();
222 void communicateHalosOverlapSetHalos();
223 void communicateHalos();
228 void communicateOwnGlobalValue(
int stopLevel = 1,
bool receive =
false);
233 void communicateHaloGlobalValues(
int stopLevel = 1,
bool send =
false);
235 void communicateHalosX();
236 void communicateHalosY();
237 void communicateHalosZ();
238 void communicateHalosAlongAxis(
double * lowerNeighbourBuffer,
double * higherNeighbourBuffer,
239 double * lowerNeighbourBufferRec,
double * higherNeighbourBufferRec,
240 int lowerNeighbour,
int higherNeighbour,
int haloSize
242 bool _doNTLocal, _doNTGlobal;
254 void initBusyWaiting(){
255 if((_avoidAllReduce && _stopLevel == 1) or _globalLevel == 0){
256 _allReduceProcessed = 1;
259 _allReduceProcessed = 0;
262 _sendLocalProcessed = 0;
263 if(_globalLevel < 1 or !_avoidAllReduce or (_globalLevel == 1 and _fuseGlobalCommunication)){
264 _sendGlobalProcessed = 1;
267 _sendGlobalProcessed = 0;
270 _backCommunicationLocalProcessed = 0;
273 _backCommunicationLocalProcessed = 1;
275 if(_doNTGlobal and _avoidAllReduce and not (_globalLevel == 1 and _fuseGlobalCommunication)){
276 _backCommunicationGlobalProcessed = 0;
279 _backCommunicationGlobalProcessed = 1;
281 _backCommunicationLocalStarted = 0;
282 _backCommunicationGlobalStarted = 0;
283 if(_globalLevel < 1 or !_avoidAllReduce){
284 _globalHalosProcessed = 1;
287 _globalHalosProcessed = 0;
291 bool filterM1Local(
bool doHalos,
int m1,
int m1x,
int m1y,
int m1z,
Vector3<int> localMpCells,
int curLevel);
292 bool filterM2Local(
bool doHalos,
int m1,
int m1x,
int m1y,
int m1z,
int m2,
int m2x,
int m2y,
int m2z,
Vector3<int> localMpCells,
int curLevel,
bool inHaloz,
bool inHaloy,
bool inHalox);
293 bool filterM2Global(
int curLevel,
int *m2v,
int *m1v,
int m2x,
int m2y,
int m2z,
int m2,
int yOffset);
296 int optimizeAllReduce();
298 int _allReduceProcessed;
299 int _globalHalosProcessed;
301 int _sendLocalProcessed;
302 int _sendGlobalProcessed;
303 int _backCommunicationLocalProcessed;
304 int _backCommunicationLocalStarted;
305 int _backCommunicationGlobalProcessed;
306 int _backCommunicationGlobalStarted;
311 MPI_Comm * _neighbourhoodComms;
312 MPI_Comm * _allReduceComms;
317 void generateResources(qsched *scheduler);
321 void generateP2MTasks(qsched* scheduler);
325 void generateM2MTasks(qsched* scheduler);
329 void generateP2PTasks(qsched *scheduler);
333 void generateM2LTasks(qsched *scheduler, taskModelTypesM2L taskModelM2L);
337 void generateL2LTasks(qsched *scheduler);
341 void generateL2PTasks(qsched *scheduler);
handle boundary region and multiple processes
Definition: DomainDecompBase.h:51
This class is used to read in the phasespace and to handle macroscopic values.
Definition: Domain.h:47
Definition: FFTAccelerationAPI.h:22
Definition: HaloBufferNoOverlap.h:11
This Interface is used to get access to particles and pairs of particles.
Definition: ParticleContainer.h:69
Definition: TransferFunctionManagerAPI.h:11
Definition: HaloBufferOverlap.h:19
Definition: L2PCellProcessor.h:17
Definition: LeafNodesContainer.h:25
Definition: P2MCellProcessor.h:18
Definition: ParticleCellPointers.h:44
Definition: PseudoParticleContainer.h:46
Definition: UniformPseudoParticleContainer.h:36
Vectorized calculation of the force.
Definition: VectorizedChargeP2PCellProcessor.h:32
Definition: L2PCellProcessor.cpp:15
::xsd::cxx::tree::buffer< char > buffer
Binary buffer type.
Definition: vtk-punstructured.h:363