ls1-MarDyn
ls1-MarDyn molecular dynamics code
HaloBufferOverlap.h
1/*
2 * HaloBufferOverlap.h
3 *
4 * Created on: Apr 26, 2016
5 * Author: obi
6 */
7
8#ifndef HALOBUFFEROVERLAP_H_
9#define HALOBUFFEROVERLAP_H_
10#include "bhfmm/utils/Vector3.h"
11#include <math.h>
12#include <vector>
13#ifdef ENABLE_MPI
14
15#include "mpi.h"
16
17namespace bhfmm{
18template <class T>
20public:
21 HaloBufferOverlap(Vector3<int> areaHaloSize, Vector3<int> edgeHaloSize,
22 int cornerHaloSize, MPI_Comm comm, std::vector<int>& areaNeighbours,std::vector<int>& edgeNeighbours,std::vector<int>& cornerNeighbours, bool isSend, bool doNT,
23 int areaNumber = 6, int edgeNumber = 12, int cornerNumber = 8 , std::vector<std::vector<std::vector<int>>> allRanks = std::vector<std::vector<std::vector<int>>>(0), Vector3<int> numCellsOnGlobalLevel = Vector3<int>(1), bool fuseGlobalCommunication = false);
24 virtual ~~HaloBufferOverlap();
25 void startCommunication();
26 //communicate without persistent sends and receives
27 void communicate(bool postProcessing);
28 void communicateGlobalLevels(int globalLevel, int stopLevel = 1, bool backCommunication = false);
29
30 void wait();
31 int testIfFinished();
32 std::vector<T *>& getAreaBuffers(){
33 return _areaBuffers;
34 }
35 std::vector<T *>& getEdgeBuffers(){
36 return _edgeBuffers;
37 }
38 std::vector<T *>& getCornerBuffers(){
39 return _cornerBuffers;
40 }
41
42 void setNumberOfGlobalLevelsInBuffer(int number){
43 _globalLevelsInBuffer = number;
44 }
45
46 void clear();
47private:
48 void communicateLevelGlobal(int level, int globalLevel, int offset, bool backCommunication);
49 void initCommunicationDouble();
50 void fillArraySizes(Vector3<int> areaSizes, Vector3<int> edgeSizes);
51 std::vector<T *> _areaBuffers, _edgeBuffers, _cornerBuffers; //arrays for MPI halo transfer (send)
52 Vector3<int> _areaHaloSize,_edgeHaloSize;
53 int _cornerHaloSize;
54 //these arrays save for every halo element the specific size -> if neighbour rank order is changed these arrays have to be changed too but nothing else
55 std::vector<int> _areaHaloSizes, _edgeHaloSizes; //corner Arrays are always of the same size!
56 std::vector<int> _areaNeighbours, _edgeNeighbours,_cornerNeighbours;
57 MPI_Request * _areaRequests, *_edgeRequests, *_cornerRequests;
58 MPI_Comm _comm;
59 bool _isSend;
60 bool _doNT;
61 bool _isGlobal;
62 bool _importWholeGlobalRegion;
63 std::vector<std::vector<std::vector<int>>> _allRanks;
64 Vector3<int> _numCellsOnGlobalLevel;
65 int _globalLevelsInBuffer;
66 int _offsetFactor;
67 //if this flag is on then instead of 216 only 26 communications are performed and every processor sends 8 cell values instead of 1
68 bool _fuseGlobalCommunication;
69};
70
71#include <algorithm>
72
73
74template <class T>
76 int cornerHaloSize, MPI_Comm comm, std::vector<int>& areaNeighbours,std::vector<int>& edgeNeighbours,std::vector<int>& cornerNeighbours, bool isSend, bool doNT, int areaNumber, int edgeNumber, int cornerNumber, std::vector<std::vector<std::vector<int>>> allRanks, Vector3<int> numCellsOnGlobalLevel, bool fuseGlobalCommunication):
77_areaBuffers(areaNumber), _edgeBuffers(edgeNumber), _cornerBuffers(cornerNumber), _areaHaloSize(areaHaloSize), _edgeHaloSize(edgeHaloSize), _areaNeighbours(areaNeighbours), _edgeNeighbours(edgeNeighbours), _cornerNeighbours(cornerNeighbours), _doNT(doNT), _allRanks(allRanks), _numCellsOnGlobalLevel(numCellsOnGlobalLevel), _fuseGlobalCommunication(fuseGlobalCommunication) {
78
79 _cornerHaloSize = cornerHaloSize;
80 if(edgeNumber == 0){
81 _isGlobal = true;
82 if(numCellsOnGlobalLevel[0] > 1 or numCellsOnGlobalLevel[1] > 1 or numCellsOnGlobalLevel[2] > 1){
83 _importWholeGlobalRegion = 1;
84 }
85 else{
86 _importWholeGlobalRegion = 0;
87
88 }
89 }
90 else{
91 _isGlobal = false;
92 }
93
94 if(areaNumber != 0){
95
96 _areaRequests = new MPI_Request[_areaBuffers.size()];
97// std::cout << "areaBufferSize: "<<_areaBuffers.size() << "\n";
98 }
99 if(edgeNumber != 0){
100 _edgeRequests = new MPI_Request[_edgeBuffers.size()];
101 }
102 if(cornerNumber != 0){
103 _cornerRequests = new MPI_Request[_cornerBuffers.size()];
104 }
105
106 fillArraySizes(areaHaloSize,edgeHaloSize);
107
108// _cornerHaloSizes = new int[_cornerBuffers.size()];
109 _comm = comm;
110 if(areaNumber != 0){
111 for(unsigned int i=0; i<_areaBuffers.size();i++){
112 if(!_isGlobal){
113 _areaBuffers[i] = new T[_areaHaloSizes[i]];
114 }
115 else{
116 _areaBuffers[i] = new T[_areaHaloSizes[0]];
117 }
118 }
119 }
120
121 if(edgeNumber != 0){
122 for(unsigned int i=0; i<_edgeBuffers.size();i++){
123 _edgeBuffers[i] = new T[_edgeHaloSizes[i]];
124 }
125 }
126 if(cornerNumber != 0){
127 for(unsigned int i=0; i<_cornerBuffers.size();i++){
128 _cornerBuffers[i] = new T[cornerHaloSize];
129 }
130 }
131 _isSend = isSend;
132 clear();
133 //initCommunicationDouble();
134 if(_isGlobal){
135 if(_importWholeGlobalRegion and not _fuseGlobalCommunication){
136 if(_doNT){
137 _offsetFactor = 25;
138 if(_numCellsOnGlobalLevel[1] == 2){
139 _offsetFactor += 19 ;
140 }
141 if(_numCellsOnGlobalLevel[0] == 2){
142 _offsetFactor += 5 - _numCellsOnGlobalLevel[1] + 1;
143 }
144 if(_numCellsOnGlobalLevel[2] == 2){
145 _offsetFactor += 5 - _numCellsOnGlobalLevel[1] + 1 ;
146 }
147 }
148 else{
149 _offsetFactor = (_fuseGlobalCommunication)? 26 : 216;
150 }
151 }
152 else{
153 if(_doNT){
154 _offsetFactor = (_fuseGlobalCommunication)? 7 : 25;
155 }
156 else{
157 _offsetFactor = (_fuseGlobalCommunication)? 26 : 189;
158 }
159 }
160 }
161 else{
162 _offsetFactor = 0;
163 }
164}
165
166template <class T>
167void HaloBufferOverlap<T>::fillArraySizes(Vector3<int> areaSizes, Vector3<int> edgeSizes){
168 if(!_isGlobal){
169 _areaHaloSizes.resize(_areaBuffers.size());
170 for(unsigned int i = 0; i < _areaBuffers.size(); i++){
171 _areaHaloSizes[i] = areaSizes[i/2];
172 }
173 }
174 else{
175 _areaHaloSizes.resize(1);
176 _areaHaloSizes[0] = areaSizes[0];
177
178 }
179 if(!_isGlobal){
180 _edgeHaloSizes.resize(_edgeBuffers.size());
181 for(unsigned int i = 0; i < _edgeBuffers.size(); i++){
182 _edgeHaloSizes[i] = edgeSizes[2-i/4];
183 }
184 }
185 else{
186// _edgeHaloSizes = new int[_edgeBuffers.size()];
187// for(unsigned int i = 0; i < _edgeBuffers.size(); i++){
188// _edgeHaloSizes[i] = edgeSizes[0];
189// }
190 }
191}
192template <class T>
193HaloBufferOverlap<T>::~HaloBufferOverlap() {
194
195 for(unsigned int i=0; i<_areaBuffers.size();i++){
196// MPI_Request_free(&_areaRequests[i]);
197 delete[] (_areaBuffers[i]);
198 }
199 if(!_isGlobal){
200
201 for(unsigned int i=0; i<_edgeBuffers.size();i++){
202 // MPI_Request_free(&_edgeRequests[i]);
203 delete[] _edgeBuffers[i];
204 }
205 for(unsigned int i=0; i<_cornerBuffers.size();i++){
206 // MPI_Request_free(&_cornerRequests[i]);
207 delete[] _cornerBuffers[i];
208 }
209 }
210}
211
212template <class T>
213void HaloBufferOverlap<T>::clear(){
214 for(unsigned int i = 0; i < _areaBuffers.size(); i++){
215 if(!_isGlobal){
216 std::fill(_areaBuffers[i], _areaBuffers[i] + _areaHaloSizes[i] , 0.0);
217 }
218 else{
219 std::fill(_areaBuffers[i], _areaBuffers[i] + _areaHaloSizes[0] , 0.0);
220 }
221 }
222 for(unsigned int i = 0; i < _edgeBuffers.size(); i++){
223 std::fill(_edgeBuffers[i], _edgeBuffers[i] + _edgeHaloSizes[i] , 0.0);
224 }
225 for(unsigned int i = 0; i < _cornerBuffers.size(); i++){
226 std::fill(_cornerBuffers[i], _cornerBuffers[i] + _cornerHaloSize , 0.0);
227 }
228}
229
230//we assume here that the neighbour arrays are sorted in the way that sites and their opposite sites are always alternating in the array
231template <class T>
232void HaloBufferOverlap<T>::initCommunicationDouble(){
233 for (unsigned int i = 0; i < _areaBuffers.size(); i++){
234 if(_isSend){
235 MPI_Rsend_init(_areaBuffers[i], _areaHaloSizes[i], MPI_DOUBLE, _areaNeighbours[i], i + 42, _comm, &_areaRequests[i]);
236 //MPI_Rsend_init(_areaBuffers[i], _areaHaloSize, MPI_DOUBLE, _areaNeighbours[i], i + 42, _comm, &_areaRequests[i]);
237
238 }
239 else{
240 //adjusts that the tag of receive corresponds to send
241 int indexShift = (i%2 == 0)? +1: -1;
242 MPI_Recv_init(_areaBuffers[i], _areaHaloSizes[i], MPI_DOUBLE, _areaNeighbours[i], i + 42 + indexShift, _comm, &_areaRequests[i]);
243 }
244 }
245 for (unsigned int i = 0; i < _edgeBuffers.size(); i++){
246 if(_isSend){
247 MPI_Rsend_init(_edgeBuffers[i], _edgeHaloSizes[i], MPI_DOUBLE, _edgeNeighbours[i], i + 42, _comm, &_edgeRequests[i]);
248 //MPI_Rsend_init(_edgeBuffers[i], _edgeHaloSize, MPI_DOUBLE, _edgeNeighbours[i], i + 42, _comm, &_edgeRequests[i]);
249
250 }
251 else{
252 int indexShift = (i%2 == 0)? +1: -1;
253 MPI_Recv_init(_edgeBuffers[i], _edgeHaloSizes[i], MPI_DOUBLE, _edgeNeighbours[i], i + 42 + indexShift, _comm, &_edgeRequests[i]);
254 }
255 }
256 for (unsigned int i = 0; i < _cornerBuffers.size(); i++){
257 if(_isSend){
258 MPI_Rsend_init(_cornerBuffers[i], _cornerHaloSize, MPI_DOUBLE, _cornerNeighbours[i], i + 42, _comm, &_cornerRequests[i]);
259 // MPI_Rsend_init(_cornerBuffers[i], _cornerHaloSize, MPI_DOUBLE, _cornerNeighbours[i], i + 42, _comm, &_cornerRequests[i]);
260 }
261 else{
262 int indexShift = (i%2 == 0)? +1: -1;
263 MPI_Recv_init(_cornerBuffers[i], _cornerHaloSize, MPI_DOUBLE, _cornerNeighbours[i], i + 42 + indexShift, _comm, &_cornerRequests[i]);
264 }
265 }
266}
267
268template <class T>
269void HaloBufferOverlap<T>::communicate(bool postProcessing){
270 int requestIndex = 0;
271 for (unsigned int i = 0; i < _areaBuffers.size(); i++){
272 if(_isSend){
273 if(_doNT){
274 if(postProcessing){
275 if(i == 0 or i == 4){
276 continue;
277 }
278 }
279 else{
280 if(i == 1 or i == 5){
281 continue;
282 }
283 }
284 }
285 if(postProcessing)
286 MPI_Isend(_areaBuffers[i], _areaHaloSizes[i], MPI_DOUBLE, _areaNeighbours[i], i + 42, _comm, &_areaRequests[requestIndex]);
287 else
288 MPI_Irsend(_areaBuffers[i], _areaHaloSizes[i], MPI_DOUBLE, _areaNeighbours[i], i + 42, _comm, &_areaRequests[requestIndex]);
289
290 requestIndex++;
291 //MPI_Rsend_init(_areaBuffers[i], _areaHaloSize, MPI_DOUBLE, _areaNeighbours[i], i + 42, _comm, &_areaRequests[i]);
292
293 }
294 else{
295 if(_doNT){
296 if(postProcessing){
297 if(i == 1 or i == 5){
298 continue;
299 }
300 }
301 else{
302 if(i == 0 or i == 4){
303 continue;
304 }
305 }
306 }
307 //adjusts that the tag of receive corresponds to send
308 int indexShift = (i%2 == 0)? +1: -1;
309 MPI_Irecv(_areaBuffers[i], _areaHaloSizes[i], MPI_DOUBLE, _areaNeighbours[i], i + 42 + indexShift, _comm, &_areaRequests[requestIndex]);
310 requestIndex++;
311 }
312 }
313 requestIndex = 0;
314 for (unsigned int i = 0; i < _edgeBuffers.size(); i++){
315 if(_isSend){
316 if(_doNT){
317 if(not(postProcessing)){
318 if(not(i == 4 or i == 6)){
319 continue;
320 }
321 }
322 else{
323 if(not(i == 5 or i == 7)){
324 continue;
325 }
326 }
327 }
328 if(postProcessing)
329 MPI_Isend(_edgeBuffers[i], _edgeHaloSizes[i], MPI_DOUBLE, _edgeNeighbours[i], i + 42, _comm, &_edgeRequests[requestIndex]);
330 else
331 MPI_Irsend(_edgeBuffers[i], _edgeHaloSizes[i], MPI_DOUBLE, _edgeNeighbours[i], i + 42, _comm, &_edgeRequests[requestIndex]);
332 requestIndex++;
333 //MPI_Rsend_init(_edgeBuffers[i], _edgeHaloSize, MPI_DOUBLE, _edgeNeighbours[i], i + 42, _comm, &_edgeRequests[i]);
334
335 }
336 else{
337 if(_doNT){
338 if(not(postProcessing)){
339 if(not(i == 5 or i == 7)){
340 continue;
341 }
342 }
343 else{
344 if(not(i == 4 or i == 6)){
345 continue;
346 }
347 }
348 }
349 int indexShift = (i%2 == 0)? +1: -1;
350 MPI_Irecv(_edgeBuffers[i], _edgeHaloSizes[i], MPI_DOUBLE, _edgeNeighbours[i], i + 42 + indexShift, _comm, &_edgeRequests[requestIndex]);
351 requestIndex++;
352 }
353 }
354 requestIndex = 0;
355 if(not(_doNT)){
356 for (unsigned int i = 0; i < _cornerBuffers.size(); i++){
357 if(_isSend){
358 MPI_Irsend(_cornerBuffers[i], _cornerHaloSize, MPI_DOUBLE, _cornerNeighbours[i], i + 42, _comm, &_cornerRequests[requestIndex]);
359 requestIndex++;
360 // MPI_Rsend_init(_cornerBuffers[i], _cornerHaloSize, MPI_DOUBLE, _cornerNeighbours[i], i + 42, _comm, &_cornerRequests[i]);
361 }
362 else{
363 int indexShift = (i%2 == 0)? +1: -1;
364 MPI_Irecv(_cornerBuffers[i], _cornerHaloSize, MPI_DOUBLE, _cornerNeighbours[i], i + 42 + indexShift, _comm, &_cornerRequests[requestIndex]);
365 requestIndex++;
366 }
367 }
368 }
369
370}
371
372template <class T>
373void HaloBufferOverlap<T>::communicateGlobalLevels(int globalLevel, int stopLevel, bool backCommunication){
374 int minimumLevel = (_fuseGlobalCommunication)? 2:1;
375 stopLevel = (stopLevel < minimumLevel)? minimumLevel: stopLevel;
376 for(int l = globalLevel; l >= stopLevel ; l--){
377 int offset;
378 if(_doNT and not _fuseGlobalCommunication){
379 offset = (globalLevel == l) ? 0 : _offsetFactor + (globalLevel - l - 1) * 25;
380 }
381 else{
382 offset = _offsetFactor * (globalLevel - l);
383 }
384 communicateLevelGlobal(l,globalLevel,offset, backCommunication);
385 }
386}
387
388template <class T>
389void HaloBufferOverlap<T>::communicateLevelGlobal(int level, int globalLevel, int offset, bool backCommunication){
390
391 int stride = pow(2,globalLevel - level);
392 int myRank;
393 int coords[3];
394 int coordsFloored[3];
395 int coordsLevel[3];
396 int coordsRemainder[3];
397 MPI_Comm_rank(_comm,&myRank);
398 int indexPosition = 0;
399 int rank;
400 MPI_Cart_coords(_comm, myRank, 3, coords);
401 for(int d = 0; d < 3; d++){
402 coordsFloored[d] = ((coords[d] * _numCellsOnGlobalLevel[d]) / (2 * stride)) * 2 * stride;
403 coordsRemainder[d] = (coords[d] * _numCellsOnGlobalLevel[d]) % (stride);
404 coordsLevel[d] = ((coords[d] * _numCellsOnGlobalLevel[d]) / (stride));
405
406 }
407 int start, end;
408 if(_fuseGlobalCommunication){
409 start = -1;
410 end = 1;
411 }
412 else{
413 start = -2;
414 end = 3;
415 }
416 for(int x = start; x <= end; x++ ){
417 for(int y = start; y <= end; y++){
418 for(int z = start; z <= end; z++){
419 bool condition;
420 if(_doNT){ //allow only cells in plate or tower
421 int cellsX = _numCellsOnGlobalLevel[0];
422 int cellsY = _numCellsOnGlobalLevel[1];
423 int cellsZ = _numCellsOnGlobalLevel[2];
424
425 if((_isSend and !backCommunication) or (backCommunication and !_isSend)){ //reversed plate in this case
426 if(!_fuseGlobalCommunication){
427 condition = ( floor((x * stride + coordsFloored[0] + coordsRemainder[0])/(1.0 * cellsX)) == coords[0] and floor((z * stride + coordsFloored[2] + coordsRemainder[2])/(1.0 * cellsZ)) == coords[2]) //tower
428 or (((y < 2 and y >= 0 and cellsY == 2 and level == globalLevel) or floor((y * stride + coordsFloored[1] + coordsRemainder[1])/(1.0 * cellsY)) == coords[1]) and x < 2 and not (x >= 0 and z >= 2)); //plate (reversed for send)
429 }
430 else{
431 condition = (x == 0 and z == 0) // tower
432 or (y == 0 and x < 1 and not ( x==0 and z == 1)); //plate (reversed for send)
433 }
434 }
435 else{
436 if(!_fuseGlobalCommunication){
437
438 condition = ( floor((x * stride + coordsFloored[0] + coordsRemainder[0])/(1.0 * cellsX)) == coords[0] and floor((z * stride + coordsFloored[2] + coordsRemainder[2])/(1.0 * cellsZ)) == coords[2]) //tower
439 or (((y < 2 and y >= 0 and cellsY == 2 and level == globalLevel) or floor((y * stride + coordsFloored[1] + coordsRemainder[1])/(1.0 * cellsY)) == coords[1]) and x >= 0 and not (x < 2 and z < 0)); //plate
440 }
441 else{
442 condition = (x == 0 and z == 0) // tower
443 or (y == 0 and x >= 0 and not ( x < 1 and z == -1)); //plate
444 }
445 }
446// condition = condition and (x >= 2 or x < 0 or z >= 2 or z < 0 or y >= 2 or y < 0); // do not send within parent cell
447
448 }
449 else{
450 if(!_fuseGlobalCommunication){//import all 189 (if ranks are a power of 8) or 216 (if multiple cells on global level) values
451 condition = _importWholeGlobalRegion or abs(x +(coordsFloored[0] - coords[0])/stride) >= 2 or abs(y +(coordsFloored[1] - coords[1])/stride) >= 2 or abs(z + (coordsFloored[2] - coords[2])/stride) >= 2;
452 }
453 else{ //imort all cells from 26 neighbours (each one sends 8 cells)
454 condition = x != 0 or y != 0 or z != 0;
455 }
456 }
457 if(condition){
458 int xIndex, yIndex, zIndex;
459 if(!_fuseGlobalCommunication){
460 xIndex = (int) floor(((coordsFloored[0] + (x * stride + coordsRemainder[0]) * 1.0) / _numCellsOnGlobalLevel[0]) + _allRanks.size()) % _allRanks.size();
461 yIndex = (int) floor(((coordsFloored[1] + (y * stride + coordsRemainder[1]) * 1.0) / _numCellsOnGlobalLevel[1]) + _allRanks[0].size()) % _allRanks[0].size();
462 zIndex = (int) floor(((coordsFloored[2] + (z * stride + coordsRemainder[2]) * 1.0) / _numCellsOnGlobalLevel[2]) + _allRanks[0][0].size()) % _allRanks[0][0].size();
463 }
464 else{
465 int xLocal = coordsLevel[0] % 2;
466 int yLocal = coordsLevel[1] % 2;
467 int zLocal = coordsLevel[2] % 2;
468 xIndex = (int) floor(((coordsFloored[0] + ((2 * x + xLocal) * stride + coordsRemainder[0]) * 1.0) / _numCellsOnGlobalLevel[0]) + _allRanks.size()) % _allRanks.size();
469 yIndex = (int) floor(((coordsFloored[1] + ((2 * y + yLocal) * stride + coordsRemainder[1]) * 1.0) / _numCellsOnGlobalLevel[1]) + _allRanks[0].size()) % _allRanks[0].size();
470 zIndex = (int) floor(((coordsFloored[2] + ((2 * z + zLocal) * stride + coordsRemainder[2]) * 1.0) / _numCellsOnGlobalLevel[2]) + _allRanks[0][0].size()) % _allRanks[0][0].size();
471 }
472 rank = _allRanks[xIndex][yIndex][zIndex];
473
474 const int xOffset = (_fuseGlobalCommunication)? 0 : abs(x * stride + coordsRemainder[0]) % _numCellsOnGlobalLevel[0];
475 const int yOffset = (_fuseGlobalCommunication)? 0 : abs(y * stride + coordsRemainder[1]) % _numCellsOnGlobalLevel[1];
476 const int zOffset = (_fuseGlobalCommunication)? 0 : abs(z * stride + coordsRemainder[2]) % _numCellsOnGlobalLevel[2];
477// if(myRank == 1){
479// }
480 //number of cells the every processor is responsible for on current level
481 int numCellsOnLevel = (level == globalLevel)? _numCellsOnGlobalLevel[0] * _numCellsOnGlobalLevel[1] * _numCellsOnGlobalLevel[2] : 1;
482 if(_isSend){
483 if(!backCommunication){
484 MPI_Irsend(_areaBuffers[8*(globalLevel-level) + 4*zOffset + 2*yOffset + xOffset], _areaHaloSizes[0], MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
485 }
486 else{
487 if(_fuseGlobalCommunication){ //in the back communication only 1 (or equal to the number of cells one owns on global level) cell is send instead of 8
488 MPI_Isend(_areaBuffers[indexPosition + offset], _areaHaloSizes[0] / 8 * numCellsOnLevel , MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
489 }
490 else{
491 MPI_Isend(_areaBuffers[indexPosition + offset], _areaHaloSizes[0], MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
492 }
493 }
494 indexPosition++;
495 // MPI_Rsend_init(_cornerBuffers[i], _cornerHaloSize, MPI_DOUBLE, _cornerNeighbours[i], i + 42, _comm, &_cornerRequests[i]);
496 }
497 else{
498// std::cout << indexPosition << "\n";
499// MPI_Barrier(_comm);
500 if(!backCommunication){
501 MPI_Irecv(_areaBuffers[indexPosition + offset], _areaHaloSizes[0], MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
502 }
503 else{
504 if(_fuseGlobalCommunication){ //in the back communication only 1 (or equal to the number of cells one owns on global level) cell is send instead if 8
505 MPI_Irecv(_areaBuffers[indexPosition + offset], _areaHaloSizes[0] / 8 * numCellsOnLevel, MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
506 }
507 else{
508 MPI_Irecv(_areaBuffers[indexPosition + offset], _areaHaloSizes[0], MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
509 }
510 }
511 indexPosition++;
512 }
513 }
514 }
515 }
516 }
517// if(_doNT)
518// std::cout << " indexposition: "<< indexPosition <<" " <<_isSend<< " "<< backCommunication << " offset "<< offset<< " offsetFactor: "<< _offsetFactor << " rank: "<< myRank <<"\n";
519 if((indexPosition != _offsetFactor and (not(_doNT) or _fuseGlobalCommunication or level == globalLevel)) or(indexPosition != 25 and _doNT and level != globalLevel and not _fuseGlobalCommunication)){
520 std::cout << "Error offsetFactor is calculated wrong or too few sends!!! -> synchronization possibly broken!!! \n";
521 std::cout << indexPosition << " " << _offsetFactor << " \n";
522 }
523// MPI_Barrier(_comm);
524// std::cout << indexPosition << "\n";
525}
526
527template <class T>
528void HaloBufferOverlap<T>::startCommunication(){
529 //outdated!!!
530 if(not(_doNT)){
531 MPI_Startall(_areaBuffers.size(), _areaRequests);
532 MPI_Startall(_edgeBuffers.size(), _edgeRequests);
533 MPI_Startall(_cornerBuffers.size(), _cornerRequests);
534 }
535 else{
536 MPI_Startall(4, _areaRequests);
537 MPI_Startall(2, _edgeRequests);
538 }
539// std::cout << _areaBuffers.size() << _edgeBuffers.size() << _cornerBuffers.size() <<"\n";
540}
541
542template <class T>
543void HaloBufferOverlap<T>::wait(){
544 //outdated!!!
545 if(not(_doNT)){
546 MPI_Status * areaStatusArray = new MPI_Status[_areaBuffers.size()];
547 MPI_Waitall(_areaBuffers.size(),_areaRequests, areaStatusArray);
548
549 MPI_Status * edgeStatusArray = new MPI_Status[_edgeBuffers.size()];
550 MPI_Waitall(_edgeBuffers.size(),_edgeRequests, edgeStatusArray);
551
552 MPI_Status * cornerStatusArray = new MPI_Status[_cornerBuffers.size()];
553 MPI_Waitall(_cornerBuffers.size(),_cornerRequests, cornerStatusArray);
554 }
555 else{
556 MPI_Status * areaStatusArray = new MPI_Status[4];
557 MPI_Waitall(4,_areaRequests, areaStatusArray);
558
559 MPI_Status * edgeStatusArray = new MPI_Status[2];
560 MPI_Waitall(2,_edgeRequests, edgeStatusArray);
561 }
562}
563
564template <class T>
565int HaloBufferOverlap<T>::testIfFinished(){
566 int areaFlag, edgeFlag, cornerFlag;
567 if(not(_doNT)){
568 if(!_isGlobal){
569
570 std::vector<MPI_Status> areaStatusArray(_areaBuffers.size());
571 MPI_Testall(_areaBuffers.size(),_areaRequests, &areaFlag, areaStatusArray.data());
572
573 std::vector<MPI_Status> edgeStatusArray(_edgeBuffers.size());
574 MPI_Testall(_edgeBuffers.size(),_edgeRequests, &edgeFlag, edgeStatusArray.data());
575
576 std::vector<MPI_Status> cornerStatusArray(_cornerBuffers.size());
577 MPI_Testall(_cornerBuffers.size(),_cornerRequests, &cornerFlag, cornerStatusArray.data());
578
579 return areaFlag * edgeFlag * cornerFlag;
580 }
581 else{
582// std::cout << _areaBuffers.size() << "\n";
583 if(_areaBuffers.size() == 0) return true;
584 std::vector<MPI_Status> areaStatusArray(_areaBuffers.size());
585 MPI_Testall(_areaBuffers.size(),_areaRequests, &areaFlag, areaStatusArray.data());
586 return areaFlag;
587 }
588 }
589 else{
590 if(!_isGlobal){
591 MPI_Status areaStatusArray[4];
592 MPI_Testall(4,_areaRequests, &areaFlag, areaStatusArray);
593
594 MPI_Status edgeStatusArray[2];
595 MPI_Testall(2,_edgeRequests, &edgeFlag, edgeStatusArray);
596 return areaFlag * edgeFlag;
597 }
598 else{
599 int numRequests;
600// std::cout << "Testing! \n" ;
601 if(!_fuseGlobalCommunication){
602 numRequests = (_globalLevelsInBuffer == 1) ? _offsetFactor : _offsetFactor + (_globalLevelsInBuffer - 1) * 25;
603 }
604 else{
605 numRequests = _globalLevelsInBuffer * _offsetFactor ;
606 }
607 if(numRequests == 0) return true;
608 std::vector<MPI_Status> areaStatusArray(numRequests);
609 MPI_Testall(numRequests,_areaRequests, &areaFlag, areaStatusArray.data());
610// MPI_Status status;
611//
612// for(int i= 0; i<numberOfSends; i++){
613// MPI_Test(&_areaRequests[i],&areaFlag,&status);
614// if(!areaFlag){
616// return areaFlag;
617// }
618// }
619
620 return areaFlag;
621 }
622 }
623}
624}
625#endif
626#endif /* HALOBUFFEROVERLAP_H_ */
Definition: HaloBufferOverlap.h:19
Definition: Vector3.h:34
Definition: L2PCellProcessor.cpp:15