ls1-MarDyn
ls1-MarDyn molecular dynamics code
KDDecomposition.h
1#ifndef KDDECOMPOSITION2_H_
2#define KDDECOMPOSITION2_H_
3
4#include <mpi.h>
5#include <algorithm>
6#include <list>
7#include <vector>
8#include <limits>
9
10#define KDDIM 3
11
12#include "DomainDecompMPIBase.h"
13#include "parallel/CollectiveCommunication.h"
14#include "ParticleDataForwardDeclaration.h"
15#include "particleContainer/adapter/CellProcessor.h"
16#include "parallel/LoadCalc.h"
17
18class KDNode;
19
20
38
39 friend class KDDecompositionTest;
40
41 public:
55 KDDecomposition(double cutoffRadius, int numParticleTypes, int updateFrequency = 100, int fullSearchThreshold = 2);
56
57 void init(Domain* domain);
58
59 ~~KDDecomposition() override;
60
145 virtual void readXML(XMLfileUnits& xmlconfig) override;
146
147 // documentation in base class
148 virtual void prepareNonBlockingStage(bool forceRebalancing,
149 ParticleContainer* moleculeContainer, Domain* domain,
150 unsigned int stageNumber) override;
151
152 // documentation in base class
153 virtual void finishNonBlockingStage(bool forceRebalancing,
154 ParticleContainer* moleculeContainer, Domain* domain,
155 unsigned int stageNumber) override;
156
157 // documentation in base class
158 bool queryBalanceAndExchangeNonBlocking(bool forceRebalancing, ParticleContainer* moleculeContainer, Domain* domain, double etime) override;
159
160 void balanceAndExchange(double lastTraversalTime, bool forceRebalancing, ParticleContainer* moleculeContainer, Domain* domain) override;
161
163 double getBoundingBoxMin(int dimension, Domain* domain) override;
165 double getBoundingBoxMax(int dimension, Domain* domain) override;
166
167 int getUpdateFrequency() const { return _frequency; }
168 void setUpdateFrequency(int frequency) { _frequency = frequency; }
169 std::vector<int> getNeighbourRanks() override;
170 std::vector<int> getNeighbourRanksFullShell() override;
171
172 std::vector<CommunicationPartner> getNeighboursFromHaloRegion(Domain* domain, const HaloRegion& haloRegion, double cutoff) override;
173
174 /*
175 * Create a TunerTimes object with measured values from the tuner
176 *
177 * Has to be an extra function since the CellProcessor is only created after the KDDecomposition was already constructed
178 */
179 void fillTimeVecs(CellProcessor **cellProc);
180
185 void printTree(std::ostream& ostream);
186
187private:
188 void constructNewTree(KDNode *& newRoot, KDNode *& newOwnLeaf, ParticleContainer* moleculeContainer);
196 bool migrateParticles(const KDNode& newRoot, const KDNode& newOwnLeaf, ParticleContainer* moleculeContainer, Domain* domain);
197 void initCommunicationPartners(double cutoffRadius, Domain * domain, ParticleContainer* moleculeContainer);
198
199
200 //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
201 //$ sonstige Methoden
202 //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
203
204 void calculateCostsPar(KDNode* area, std::vector<std::vector<double> >& costsLeft, std::vector<std::vector<double> >& costsRight, MPI_Comm commGroup);
205
206
220 unsigned int getGlobalIndex(int divDim, int dim1, int dim2, int index_dim, int index_dim1, int index_dim2, KDNode* localArea);
221
228 int ownMod(int number, int modulo) const;
229
230 void getCellBorderFromIntCoords(double * lC, double * hC, int lo[3], int hi[3]) const;
231
232 void getCellIntCoordsFromRegionPeriodic(int * lo, int * hi, const double lC[3], const double hC[3], const Domain* domain) const;
233
242 void completeTreeInfo(KDNode*& root, KDNode*& ownArea);
243
244
263 void getOwningProcs(int low[KDDIM], int high[KDDIM], KDNode* decompTree, KDNode* testNode, std::vector<int>* procIDs, std::vector<int>* neighbHaloAreas) const;
264
265
268 void calcNumParticlesPerCell(ParticleContainer* moleculeContainer);
269
270 bool decompose(KDNode* fatherNode, KDNode*& ownArea, MPI_Comm commGroup);
271
272 bool decompose(KDNode* fatherNode, KDNode*& ownArea, MPI_Comm commGroup, double globalMinimalDeviation);
273
283 bool heteroDecompose(KDNode* fatherNode, KDNode*& ownArea, MPI_Comm commGroup);
284
291 bool calculateAllPossibleSubdivisions(KDNode* node, std::list<KDNode*>& subdividedNodes, MPI_Comm commGroup);
292
299 bool calculateHeteroSubdivision(KDNode* node, KDNode*& subdivededNode, MPI_Comm commGroup);
300
301 void updateMeanProcessorSpeeds(std::vector<double>& processorSpeeds,
302 std::vector<double>& accumulatedProcessorSpeeds, ParticleContainer* moleculeContainer);
303
309 void collectMoleculesInRegion(ParticleContainer* moleculeContainer, const double startRegion[3], const double endRegion[3], std::vector<Molecule*>& mols) const;
310
311 /*
312 * Determines the partition rank that is needed for the "cluster" heterogeneous decomposition
313 */
314 int calculatePartitionRank();
315 bool checkNeedRebalance(double lastTraversalTime) const;
316
317 //######################################
318 //### private member variables ###
319 //######################################
322 double _cellSize[KDDIM]{};
323
326 int _globalCellsPerDim[KDDIM]{};
327
329 int _globalNumCells{};
330
331 // Pointer to the root element of the decomposition tree
332 KDNode* _decompTree{nullptr};
333
334 // each process owns an area in the decomposition
335 KDNode* _ownArea{nullptr};
336
338 std::vector<unsigned int> _numParticlesPerCell;
339
340 /* TODO: This may not be equal to the number simulation steps if balanceAndExchange
341 * is not called exactly once in every simulation step! */
343 size_t _steps{0ul};
344
346 int _frequency;
347
348 double _cutoffRadius;
349 int _cellsInCutoffRadius{1};
350
351 /*
352 * Threshold for full tree search. If a node has more than _fullSearchThreshold processors,
353 * it is for each dimension divided in the middle only. Otherwise, all possible subdivisions
354 * are created. // TODO hetero: change description
355 */
356 int _fullSearchThreshold;
357
358
359 std::vector<double> _processorSpeeds;
360 std::vector<double> _accumulatedProcessorSpeeds;//length nprocs+1, first element is 0.
361 double _totalMeanProcessorSpeed{1.};
362 double _totalProcessorSpeed{1.};
363 int _processorSpeedUpdateCount{0};
364 bool _heterogeneousSystems{false};
365 bool _clusteredHeterogeneouseSystems{false};
366 bool _splitBiggest{true}; // indicates, whether a subdomain is to be split along its biggest size
367 bool _forceRatio{false}; // if you want to enable forcing the above ratio, enable this.
368
369 bool _doMeasureLoadCalc {false}; // specifies if measureLoad should be used.
370 int _measureLoadInterpolationStartsAt{1}; // specifies at which number of particles per cell measureLoad should start using interpolation.
371 bool _measureLoadIncreasingTimeValues{true}; // specifies if the time values should be increasing if the number of particles increases.
372
377 int _splitThreshold{std::numeric_limits<int>::max()};
378 int _numParticleTypes;
379
380 /*
381 * Stores the maximum number of particles encountered in a cell by the processor (per particle type).
382 * This information is max-reduced in the destructor to print the overall maximum number of particles (per type).
383 *
384 * This is can be useful for the vectorization tuner to know how many values should be measured
385 * and comes at the cost of only one additional conditional move per cell per load balancing.
386 */
387 int _maxPars{std::numeric_limits<int>::min()};
388 int _maxPars2{std::numeric_limits<int>::min()};
389
390 /*
391 * The partition rank that is used in the "clustered" heterogeneous decomposition to determine
392 * the mpi ranks associated with each cluster.
393 *
394 * Cluster 1: (0; partitionRank(
395 * Cluster 2: (partitionRank; maxRank)
396 */
397 const int _partitionRank;
398 LoadCalc* _loadCalc; // stores the times (and constants) measured by the vectorization tuner
399 MeasureLoad* _measureLoadCalc; // stores the measured times of the real-world simulations
400
401 /*
402 * The following Variables are only used for as parameters for the Vectorization tuner constructor.
403 * They are stored here since the tuner is still partly a optional OutputPlugin which doesn't
404 * really fit to its use now as essential part of the load balancing of the k-d-tree.
405 *
406 * For documentation see public tune function of the vectorization tuner
407 *
408 * TODO These should probably be transferred into the tuner at some point,
409 * when a decision is made whether the tuner should still be a Outputplugin or simply be a part of the
410 * KDDecomposition (or both)
411 */
412 std::vector<int> _vecTunParticleNums;
413 bool _generateNewFiles{true};
414 bool _useExistingFiles{true};
415 bool _vecTunerAllowMPIReduce{true};
416
417
418 double _rebalanceLimit{0.};
419
425 MPI_Op _deviationReductionOperation{MPI_SUM};
426};
427
428
429#endif /* KDDECOMPOSITION2_H_ */
Definition: CellProcessor.h:29
Definition: DomainDecompMPIBase.h:30
This class is used to read in the phasespace and to handle macroscopic values.
Definition: Domain.h:47
KD tree based domain decomposition for better load balancing.
Definition: KDDecomposition.h:37
double getBoundingBoxMin(int dimension, Domain *domain) override
Definition: KDDecomposition.cpp:674
bool queryBalanceAndExchangeNonBlocking(bool forceRebalancing, ParticleContainer *moleculeContainer, Domain *domain, double etime) override
Checks whether the balance and exchange step can be performed non-blocking.
Definition: KDDecomposition.cpp:235
virtual void readXML(XMLfileUnits &xmlconfig) override
Read in XML configuration for KDDecomposition and all its included objects.
Definition: KDDecomposition.cpp:101
void balanceAndExchange(double lastTraversalTime, bool forceRebalancing, ParticleContainer *moleculeContainer, Domain *domain) override
balance the load (and optimize communication) and exchange boundary particles
Definition: KDDecomposition.cpp:261
void printTree(std::ostream &ostream)
Definition: KDDecomposition.cpp:1856
double getBoundingBoxMax(int dimension, Domain *domain) override
Definition: KDDecomposition.cpp:688
virtual void prepareNonBlockingStage(bool forceRebalancing, ParticleContainer *moleculeContainer, Domain *domain, unsigned int stageNumber) override
Definition: KDDecomposition.cpp:206
KDDecomposition(double cutoffRadius, int numParticleTypes, int updateFrequency=100, int fullSearchThreshold=2)
create an initial decomposition tree
Definition: KDDecomposition.cpp:34
virtual void finishNonBlockingStage(bool forceRebalancing, ParticleContainer *moleculeContainer, Domain *domain, unsigned int stageNumber) override
Definition: KDDecomposition.cpp:218
represents a node in the decomposition tree when using KDDecomposition
Definition: KDNode.h:28
Definition: LoadCalc.h:24
Definition: LoadCalc.h:227
This Interface is used to get access to particles and pairs of particles.
Definition: ParticleContainer.h:69
XML file with unit attributes abstraction.
Definition: xmlfileUnits.h:25
Definition: HaloRegion.h:10