ls1-MarDyn
ls1-MarDyn molecular dynamics code
FlopCounter.h
Go to the documentation of this file.
1
40#ifndef FLOPCOUNTER_H_
41#define FLOPCOUNTER_H_
42
43#include "CellProcessor.h"
44#include <memory>
45#include <vector>
46#include <stddef.h>
47#include <string>
48#include <sstream>
49
50#include "molecules/MoleculeForwardDeclaration.h"
51
52class CellDataSoA;
53class CellDataSoARMM;
54
61public:
70 FlopCounter(double cutoffRadius, double LJcutoffRadius);
71
76
80 void initTraversal();
81
86
87
88 double processSingleMolecule(Molecule* /*m1*/, ParticleCell& /*cell2*/) { return 0.0; } // why 0.0 flops???
89
93 void processCell(ParticleCell& cell);
94
95 void processCellPair(ParticleCell& c1, ParticleCell& c2, bool sumAll = false);
96
101
105 void endTraversal();
106
107 double getTotalFlopCount() const {
108 return _totalFlopCount;
109 }
110
111 double getTotalMoleculeDistanceFlopCount() const {
112 return _currentCounts.getMoleculeDistanceFlops();
113 }
114
115 void resetCounters() {
116 _currentCounts.clear();
117// _totalCounts.clear();
118 _totalFlopCount = 0.;
119 _myFlopCount = 0.;
120 }
121
122 double getMyFlopCount() const {
123 return _myFlopCount;
124 }
125
126 void printStats() const;
127
128private:
129 template<class ForcePolicy, bool CalculateMacroscopic>
130 void _calculatePairs(const CellDataSoA & soa1, const CellDataSoA & soa2);
131 template<class ForcePolicy, bool CalculateMacroscopic>
132 void _calculatePairs(const CellDataSoARMM & soa1, const CellDataSoARMM & soa2);
133
134 void handlePair(const Molecule& Mi, const Molecule& Mj,
135 bool addMacro = true);
136
137 // used for indices within an array!
138 enum PotentialIndices {
139 I_LJ = 0,
140 I_CHARGE,
141 I_CHARGE_DIPOLE,
142 I_DIPOLE,
143 I_CHARGE_QUADRUPOLE,
144 I_DIPOLE_QUADRUPOLE,
145 I_QUADRUPOLE,
146 NUM_POTENTIALS
147 };
148
149 class _PotentialCounts {
150 public:
151 void init(const std::string& n, int kM, int mM, int sFTM, int sMM) {
152 _name = n;
153 clear();
154 _kernelMultiplier = kM;
155 _macroMultiplier = mM;
156 _sumForceTorqueMultiplier = sFTM;
157 _sumMacroMultiplier = sMM;
158 }
159 void clear() {
160 _numKernelCalls = 0.0;
161 _numMacroCalls = 0.0;
162 }
163 void addPotentialCounts(const _PotentialCounts& pc) {
164 _numKernelCalls += pc._numKernelCalls;
165 _numMacroCalls += pc._numMacroCalls;
166 }
167 void addKernelAndMacro(double valueBoth, bool addMacro) {
168 _numKernelCalls += valueBoth;
169 if (addMacro)
170 _numMacroCalls += valueBoth;
171 }
172 void collCommAppend();
173 void collCommGet();
174 double getKernelAndMacroFlops() const {
175 return _numKernelCalls * _kernelMultiplier + _numMacroCalls * _macroMultiplier;
176 }
177 double getForceTorqueSums() const {
178 return _numKernelCalls * _sumForceTorqueMultiplier;
179 }
180 double getMacroValueSums() const {
181 return _numMacroCalls * _sumMacroMultiplier;
182 }
183 std::string printNameKernelAndMacroCalls() const {
184 std::ostringstream ostr;
185
186 if (_numKernelCalls == 0) { return ostr.str(); } // potential is very likely not present
187
188 ostr << " " << _name
189 << ": kernel calls: " << _numKernelCalls
190 << " macro calls: " << _numMacroCalls
191 << std::endl;
192 return ostr.str();
193 }
194
195 /* Fields */
196 double _numKernelCalls;
197 double _numMacroCalls;
198
199 /* Multipliers */
200 int _kernelMultiplier;
201 int _macroMultiplier;
202 int _sumForceTorqueMultiplier;
203 int _sumMacroMultiplier;
204
205 /* name */
206 std::string _name;
207 };
208
209 class _Counts {
210 public:
211 _Counts();
212 void clear() {
213 _moleculeDistances = 0;
214
215 for (int i = 0; i < NUM_POTENTIALS; ++i) {
216 _potCounts[i].clear();
217 }
218 }
219 void addCounts(const _Counts& c) {
220 _moleculeDistances += c._moleculeDistances;
221
222 for (int i = 0; i < NUM_POTENTIALS; ++i) {
223 _potCounts[i].addPotentialCounts(c._potCounts[i]);
224 }
225 }
226 void allReduce();
227 double sumKernelCalls() const {
228 double ret = 0.;
229 for (int i = 0; i < NUM_POTENTIALS; ++i) {
230 ret += _potCounts[i]._numKernelCalls;
231 }
232 return ret;
233 }
234 double sumMacros() const {
235 double ret = 0.;
236 for (int i = 0; i < NUM_POTENTIALS; ++i) {
237 ret += _potCounts[i]._numMacroCalls;
238 }
239 return ret;
240 }
241 void print() const;
242
243 double getMoleculeDistanceFlops() const {
244 return _moleculeDistances * _distanceMultiplier;
245 }
246 double getCenterDistanceFlops() const {
247 return sumKernelCalls() * _distanceMultiplier;
248 }
249 double getForceTorqueSumFlops() const {
250 double ret = 0.;
251 for (int i = 0; i < NUM_POTENTIALS; ++i) {
252 ret += _potCounts[i].getForceTorqueSums();
253 }
254 return ret;
255 }
256 double getMacroValueSumFlops() const {
257 double ret = 0.;
258 for (int i = 0; i < NUM_POTENTIALS; ++i) {
259 ret += _potCounts[i].getMacroValueSums();
260 }
261 return ret;
262 }
263 double getTotalFlops() const {
264 double ret = getMoleculeDistanceFlops() + getCenterDistanceFlops();
265 for (int i = 0; i < NUM_POTENTIALS; ++i) {
266 ret += _potCounts[i].getKernelAndMacroFlops();
267 }
268 ret += getForceTorqueSumFlops() + getMacroValueSumFlops();
269 return ret;
270 }
271 void addKernelAndMacro(PotentialIndices i, double valueBoth, bool addMacro) {
272 _potCounts[i].addKernelAndMacro(valueBoth, addMacro);
273 }
274 void initPotCounter(PotentialIndices i, const std::string& n, int kM, int mM, int sFTM, int sMM) {
275 _potCounts[i].init(n, kM, mM, sFTM, sMM);
276 }
277
278 /* Fields which are summed explicitly */
279 double _moleculeDistances;
280 int _distanceMultiplier;
281
282 _PotentialCounts _potCounts[NUM_POTENTIALS];
283 };
284
285 std::vector<_Counts *> _threadData;
286
287 _Counts _currentCounts;
288// _Counts _totalCounts; TODO: is this needed?
289 double _totalFlopCount;
290 double _myFlopCount;
291 bool _synchronized;
292};
293
294#endif /* FLOPCOUNTER_H_ */
Structure of Arrays for single-center lennard-Jones molecules for the RMM run.
Definition: CellDataSoARMM.h:16
Structure of Arrays for vectorized force calculation.
Definition: CellDataSoA.h:22
Definition: CellProcessor.h:29
Definition: FlopCounter.h:60
~FlopCounter()
Definition: FlopCounter.cpp:143
void initTraversal()
Initializes the internal counters.
Definition: FlopCounter.cpp:154
void processCell(ParticleCell &cell)
Count flops for this cell.
Definition: FlopCounter.cpp:207
FlopCounter(double cutoffRadius, double LJcutoffRadius)
Set up the counter.
Definition: FlopCounter.cpp:124
void endTraversal()
Print results.
Definition: FlopCounter.cpp:163
void processCellPair(ParticleCell &c1, ParticleCell &c2, bool sumAll=false)
Definition: FlopCounter.cpp:227
void preprocessCell(ParticleCell &cell)
Only pass through to child.
Definition: FlopCounter.h:85
void postprocessCell(ParticleCell &cell)
Only pass through to child.
Definition: FlopCounter.h:100
FullMolecule modeled as LJ sphere with point polarities.
Definition: FullMolecule.h:18
FullParticleCell data structure. Renamed from ParticleCell.
Definition: FullParticleCell.h:49
::xsd::cxx::tree::string< char, simple_type > string
C++ type corresponding to the string XML Schema built-in type.
Definition: vtk-punstructured.h:270