8#ifndef HALOBUFFEROVERLAP_H_
9#define HALOBUFFEROVERLAP_H_
10#include "bhfmm/utils/Vector3.h"
22 int cornerHaloSize, MPI_Comm comm, std::vector<int>& areaNeighbours,std::vector<int>& edgeNeighbours,std::vector<int>& cornerNeighbours,
bool isSend,
bool doNT,
23 int areaNumber = 6,
int edgeNumber = 12,
int cornerNumber = 8 , std::vector<std::vector<std::vector<int>>> allRanks = std::vector<std::vector<std::vector<int>>>(0),
Vector3<int> numCellsOnGlobalLevel =
Vector3<int>(1),
bool fuseGlobalCommunication =
false);
25 void startCommunication();
27 void communicate(
bool postProcessing);
28 void communicateGlobalLevels(
int globalLevel,
int stopLevel = 1,
bool backCommunication =
false);
32 std::vector<T *>& getAreaBuffers(){
35 std::vector<T *>& getEdgeBuffers(){
38 std::vector<T *>& getCornerBuffers(){
39 return _cornerBuffers;
42 void setNumberOfGlobalLevelsInBuffer(
int number){
43 _globalLevelsInBuffer = number;
48 void communicateLevelGlobal(
int level,
int globalLevel,
int offset,
bool backCommunication);
49 void initCommunicationDouble();
51 std::vector<T *> _areaBuffers, _edgeBuffers, _cornerBuffers;
55 std::vector<int> _areaHaloSizes, _edgeHaloSizes;
56 std::vector<int> _areaNeighbours, _edgeNeighbours,_cornerNeighbours;
57 MPI_Request * _areaRequests, *_edgeRequests, *_cornerRequests;
62 bool _importWholeGlobalRegion;
63 std::vector<std::vector<std::vector<int>>> _allRanks;
65 int _globalLevelsInBuffer;
68 bool _fuseGlobalCommunication;
76 int cornerHaloSize, MPI_Comm comm, std::vector<int>& areaNeighbours,std::vector<int>& edgeNeighbours,std::vector<int>& cornerNeighbours,
bool isSend,
bool doNT,
int areaNumber,
int edgeNumber,
int cornerNumber, std::vector<std::vector<std::vector<int>>> allRanks,
Vector3<int> numCellsOnGlobalLevel,
bool fuseGlobalCommunication):
77_areaBuffers(areaNumber), _edgeBuffers(edgeNumber), _cornerBuffers(cornerNumber), _areaHaloSize(areaHaloSize), _edgeHaloSize(edgeHaloSize), _areaNeighbours(areaNeighbours), _edgeNeighbours(edgeNeighbours), _cornerNeighbours(cornerNeighbours), _doNT(doNT), _allRanks(allRanks), _numCellsOnGlobalLevel(numCellsOnGlobalLevel), _fuseGlobalCommunication(fuseGlobalCommunication) {
79 _cornerHaloSize = cornerHaloSize;
82 if(numCellsOnGlobalLevel[0] > 1 or numCellsOnGlobalLevel[1] > 1 or numCellsOnGlobalLevel[2] > 1){
83 _importWholeGlobalRegion = 1;
86 _importWholeGlobalRegion = 0;
96 _areaRequests =
new MPI_Request[_areaBuffers.size()];
100 _edgeRequests =
new MPI_Request[_edgeBuffers.size()];
102 if(cornerNumber != 0){
103 _cornerRequests =
new MPI_Request[_cornerBuffers.size()];
106 fillArraySizes(areaHaloSize,edgeHaloSize);
111 for(
unsigned int i=0; i<_areaBuffers.size();i++){
113 _areaBuffers[i] =
new T[_areaHaloSizes[i]];
116 _areaBuffers[i] =
new T[_areaHaloSizes[0]];
122 for(
unsigned int i=0; i<_edgeBuffers.size();i++){
123 _edgeBuffers[i] =
new T[_edgeHaloSizes[i]];
126 if(cornerNumber != 0){
127 for(
unsigned int i=0; i<_cornerBuffers.size();i++){
128 _cornerBuffers[i] =
new T[cornerHaloSize];
135 if(_importWholeGlobalRegion and not _fuseGlobalCommunication){
138 if(_numCellsOnGlobalLevel[1] == 2){
139 _offsetFactor += 19 ;
141 if(_numCellsOnGlobalLevel[0] == 2){
142 _offsetFactor += 5 - _numCellsOnGlobalLevel[1] + 1;
144 if(_numCellsOnGlobalLevel[2] == 2){
145 _offsetFactor += 5 - _numCellsOnGlobalLevel[1] + 1 ;
149 _offsetFactor = (_fuseGlobalCommunication)? 26 : 216;
154 _offsetFactor = (_fuseGlobalCommunication)? 7 : 25;
157 _offsetFactor = (_fuseGlobalCommunication)? 26 : 189;
167void HaloBufferOverlap<T>::fillArraySizes(Vector3<int> areaSizes, Vector3<int> edgeSizes){
169 _areaHaloSizes.resize(_areaBuffers.size());
170 for(
unsigned int i = 0; i < _areaBuffers.size(); i++){
171 _areaHaloSizes[i] = areaSizes[i/2];
175 _areaHaloSizes.resize(1);
176 _areaHaloSizes[0] = areaSizes[0];
180 _edgeHaloSizes.resize(_edgeBuffers.size());
181 for(
unsigned int i = 0; i < _edgeBuffers.size(); i++){
182 _edgeHaloSizes[i] = edgeSizes[2-i/4];
193HaloBufferOverlap<T>::~HaloBufferOverlap() {
195 for(
unsigned int i=0; i<_areaBuffers.size();i++){
197 delete[] (_areaBuffers[i]);
201 for(
unsigned int i=0; i<_edgeBuffers.size();i++){
203 delete[] _edgeBuffers[i];
205 for(
unsigned int i=0; i<_cornerBuffers.size();i++){
207 delete[] _cornerBuffers[i];
213void HaloBufferOverlap<T>::clear(){
214 for(
unsigned int i = 0; i < _areaBuffers.size(); i++){
216 std::fill(_areaBuffers[i], _areaBuffers[i] + _areaHaloSizes[i] , 0.0);
219 std::fill(_areaBuffers[i], _areaBuffers[i] + _areaHaloSizes[0] , 0.0);
222 for(
unsigned int i = 0; i < _edgeBuffers.size(); i++){
223 std::fill(_edgeBuffers[i], _edgeBuffers[i] + _edgeHaloSizes[i] , 0.0);
225 for(
unsigned int i = 0; i < _cornerBuffers.size(); i++){
226 std::fill(_cornerBuffers[i], _cornerBuffers[i] + _cornerHaloSize , 0.0);
232void HaloBufferOverlap<T>::initCommunicationDouble(){
233 for (
unsigned int i = 0; i < _areaBuffers.size(); i++){
235 MPI_Rsend_init(_areaBuffers[i], _areaHaloSizes[i], MPI_DOUBLE, _areaNeighbours[i], i + 42, _comm, &_areaRequests[i]);
241 int indexShift = (i%2 == 0)? +1: -1;
242 MPI_Recv_init(_areaBuffers[i], _areaHaloSizes[i], MPI_DOUBLE, _areaNeighbours[i], i + 42 + indexShift, _comm, &_areaRequests[i]);
245 for (
unsigned int i = 0; i < _edgeBuffers.size(); i++){
247 MPI_Rsend_init(_edgeBuffers[i], _edgeHaloSizes[i], MPI_DOUBLE, _edgeNeighbours[i], i + 42, _comm, &_edgeRequests[i]);
252 int indexShift = (i%2 == 0)? +1: -1;
253 MPI_Recv_init(_edgeBuffers[i], _edgeHaloSizes[i], MPI_DOUBLE, _edgeNeighbours[i], i + 42 + indexShift, _comm, &_edgeRequests[i]);
256 for (
unsigned int i = 0; i < _cornerBuffers.size(); i++){
258 MPI_Rsend_init(_cornerBuffers[i], _cornerHaloSize, MPI_DOUBLE, _cornerNeighbours[i], i + 42, _comm, &_cornerRequests[i]);
262 int indexShift = (i%2 == 0)? +1: -1;
263 MPI_Recv_init(_cornerBuffers[i], _cornerHaloSize, MPI_DOUBLE, _cornerNeighbours[i], i + 42 + indexShift, _comm, &_cornerRequests[i]);
269void HaloBufferOverlap<T>::communicate(
bool postProcessing){
270 int requestIndex = 0;
271 for (
unsigned int i = 0; i < _areaBuffers.size(); i++){
275 if(i == 0 or i == 4){
280 if(i == 1 or i == 5){
286 MPI_Isend(_areaBuffers[i], _areaHaloSizes[i], MPI_DOUBLE, _areaNeighbours[i], i + 42, _comm, &_areaRequests[requestIndex]);
288 MPI_Irsend(_areaBuffers[i], _areaHaloSizes[i], MPI_DOUBLE, _areaNeighbours[i], i + 42, _comm, &_areaRequests[requestIndex]);
297 if(i == 1 or i == 5){
302 if(i == 0 or i == 4){
308 int indexShift = (i%2 == 0)? +1: -1;
309 MPI_Irecv(_areaBuffers[i], _areaHaloSizes[i], MPI_DOUBLE, _areaNeighbours[i], i + 42 + indexShift, _comm, &_areaRequests[requestIndex]);
314 for (
unsigned int i = 0; i < _edgeBuffers.size(); i++){
317 if(not(postProcessing)){
318 if(not(i == 4 or i == 6)){
323 if(not(i == 5 or i == 7)){
329 MPI_Isend(_edgeBuffers[i], _edgeHaloSizes[i], MPI_DOUBLE, _edgeNeighbours[i], i + 42, _comm, &_edgeRequests[requestIndex]);
331 MPI_Irsend(_edgeBuffers[i], _edgeHaloSizes[i], MPI_DOUBLE, _edgeNeighbours[i], i + 42, _comm, &_edgeRequests[requestIndex]);
338 if(not(postProcessing)){
339 if(not(i == 5 or i == 7)){
344 if(not(i == 4 or i == 6)){
349 int indexShift = (i%2 == 0)? +1: -1;
350 MPI_Irecv(_edgeBuffers[i], _edgeHaloSizes[i], MPI_DOUBLE, _edgeNeighbours[i], i + 42 + indexShift, _comm, &_edgeRequests[requestIndex]);
356 for (
unsigned int i = 0; i < _cornerBuffers.size(); i++){
358 MPI_Irsend(_cornerBuffers[i], _cornerHaloSize, MPI_DOUBLE, _cornerNeighbours[i], i + 42, _comm, &_cornerRequests[requestIndex]);
363 int indexShift = (i%2 == 0)? +1: -1;
364 MPI_Irecv(_cornerBuffers[i], _cornerHaloSize, MPI_DOUBLE, _cornerNeighbours[i], i + 42 + indexShift, _comm, &_cornerRequests[requestIndex]);
373void HaloBufferOverlap<T>::communicateGlobalLevels(
int globalLevel,
int stopLevel,
bool backCommunication){
374 int minimumLevel = (_fuseGlobalCommunication)? 2:1;
375 stopLevel = (stopLevel < minimumLevel)? minimumLevel: stopLevel;
376 for(
int l = globalLevel; l >= stopLevel ; l--){
378 if(_doNT and not _fuseGlobalCommunication){
379 offset = (globalLevel == l) ? 0 : _offsetFactor + (globalLevel - l - 1) * 25;
382 offset = _offsetFactor * (globalLevel - l);
384 communicateLevelGlobal(l,globalLevel,offset, backCommunication);
389void HaloBufferOverlap<T>::communicateLevelGlobal(
int level,
int globalLevel,
int offset,
bool backCommunication){
391 int stride = pow(2,globalLevel - level);
394 int coordsFloored[3];
396 int coordsRemainder[3];
397 MPI_Comm_rank(_comm,&myRank);
398 int indexPosition = 0;
400 MPI_Cart_coords(_comm, myRank, 3, coords);
401 for(
int d = 0; d < 3; d++){
402 coordsFloored[d] = ((coords[d] * _numCellsOnGlobalLevel[d]) / (2 * stride)) * 2 * stride;
403 coordsRemainder[d] = (coords[d] * _numCellsOnGlobalLevel[d]) % (stride);
404 coordsLevel[d] = ((coords[d] * _numCellsOnGlobalLevel[d]) / (stride));
408 if(_fuseGlobalCommunication){
416 for(
int x = start; x <= end; x++ ){
417 for(
int y = start; y <= end; y++){
418 for(
int z = start; z <= end; z++){
421 int cellsX = _numCellsOnGlobalLevel[0];
422 int cellsY = _numCellsOnGlobalLevel[1];
423 int cellsZ = _numCellsOnGlobalLevel[2];
425 if((_isSend and !backCommunication) or (backCommunication and !_isSend)){
426 if(!_fuseGlobalCommunication){
427 condition = ( floor((x * stride + coordsFloored[0] + coordsRemainder[0])/(1.0 * cellsX)) == coords[0] and floor((z * stride + coordsFloored[2] + coordsRemainder[2])/(1.0 * cellsZ)) == coords[2])
428 or (((y < 2 and y >= 0 and cellsY == 2 and level == globalLevel) or floor((y * stride + coordsFloored[1] + coordsRemainder[1])/(1.0 * cellsY)) == coords[1]) and x < 2 and not (x >= 0 and z >= 2));
431 condition = (x == 0 and z == 0)
432 or (y == 0 and x < 1 and not ( x==0 and z == 1));
436 if(!_fuseGlobalCommunication){
438 condition = ( floor((x * stride + coordsFloored[0] + coordsRemainder[0])/(1.0 * cellsX)) == coords[0] and floor((z * stride + coordsFloored[2] + coordsRemainder[2])/(1.0 * cellsZ)) == coords[2])
439 or (((y < 2 and y >= 0 and cellsY == 2 and level == globalLevel) or floor((y * stride + coordsFloored[1] + coordsRemainder[1])/(1.0 * cellsY)) == coords[1]) and x >= 0 and not (x < 2 and z < 0));
442 condition = (x == 0 and z == 0)
443 or (y == 0 and x >= 0 and not ( x < 1 and z == -1));
450 if(!_fuseGlobalCommunication){
451 condition = _importWholeGlobalRegion or abs(x +(coordsFloored[0] - coords[0])/stride) >= 2 or abs(y +(coordsFloored[1] - coords[1])/stride) >= 2 or abs(z + (coordsFloored[2] - coords[2])/stride) >= 2;
454 condition = x != 0 or y != 0 or z != 0;
458 int xIndex, yIndex, zIndex;
459 if(!_fuseGlobalCommunication){
460 xIndex = (int) floor(((coordsFloored[0] + (x * stride + coordsRemainder[0]) * 1.0) / _numCellsOnGlobalLevel[0]) + _allRanks.size()) % _allRanks.size();
461 yIndex = (int) floor(((coordsFloored[1] + (y * stride + coordsRemainder[1]) * 1.0) / _numCellsOnGlobalLevel[1]) + _allRanks[0].size()) % _allRanks[0].size();
462 zIndex = (int) floor(((coordsFloored[2] + (z * stride + coordsRemainder[2]) * 1.0) / _numCellsOnGlobalLevel[2]) + _allRanks[0][0].size()) % _allRanks[0][0].size();
465 int xLocal = coordsLevel[0] % 2;
466 int yLocal = coordsLevel[1] % 2;
467 int zLocal = coordsLevel[2] % 2;
468 xIndex = (int) floor(((coordsFloored[0] + ((2 * x + xLocal) * stride + coordsRemainder[0]) * 1.0) / _numCellsOnGlobalLevel[0]) + _allRanks.size()) % _allRanks.size();
469 yIndex = (int) floor(((coordsFloored[1] + ((2 * y + yLocal) * stride + coordsRemainder[1]) * 1.0) / _numCellsOnGlobalLevel[1]) + _allRanks[0].size()) % _allRanks[0].size();
470 zIndex = (int) floor(((coordsFloored[2] + ((2 * z + zLocal) * stride + coordsRemainder[2]) * 1.0) / _numCellsOnGlobalLevel[2]) + _allRanks[0][0].size()) % _allRanks[0][0].size();
472 rank = _allRanks[xIndex][yIndex][zIndex];
474 const int xOffset = (_fuseGlobalCommunication)? 0 : abs(x * stride + coordsRemainder[0]) % _numCellsOnGlobalLevel[0];
475 const int yOffset = (_fuseGlobalCommunication)? 0 : abs(y * stride + coordsRemainder[1]) % _numCellsOnGlobalLevel[1];
476 const int zOffset = (_fuseGlobalCommunication)? 0 : abs(z * stride + coordsRemainder[2]) % _numCellsOnGlobalLevel[2];
481 int numCellsOnLevel = (level == globalLevel)? _numCellsOnGlobalLevel[0] * _numCellsOnGlobalLevel[1] * _numCellsOnGlobalLevel[2] : 1;
483 if(!backCommunication){
484 MPI_Irsend(_areaBuffers[8*(globalLevel-level) + 4*zOffset + 2*yOffset + xOffset], _areaHaloSizes[0], MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
487 if(_fuseGlobalCommunication){
488 MPI_Isend(_areaBuffers[indexPosition + offset], _areaHaloSizes[0] / 8 * numCellsOnLevel , MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
491 MPI_Isend(_areaBuffers[indexPosition + offset], _areaHaloSizes[0], MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
500 if(!backCommunication){
501 MPI_Irecv(_areaBuffers[indexPosition + offset], _areaHaloSizes[0], MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
504 if(_fuseGlobalCommunication){
505 MPI_Irecv(_areaBuffers[indexPosition + offset], _areaHaloSizes[0] / 8 * numCellsOnLevel, MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
508 MPI_Irecv(_areaBuffers[indexPosition + offset], _areaHaloSizes[0], MPI_DOUBLE, rank, 1000 + zOffset * 4 + yOffset * 2 + xOffset + 8 * (globalLevel - level), _comm, &_areaRequests[indexPosition + offset]);
519 if((indexPosition != _offsetFactor and (not(_doNT) or _fuseGlobalCommunication or level == globalLevel)) or(indexPosition != 25 and _doNT and level != globalLevel and not _fuseGlobalCommunication)){
520 std::cout <<
"Error offsetFactor is calculated wrong or too few sends!!! -> synchronization possibly broken!!! \n";
521 std::cout << indexPosition <<
" " << _offsetFactor <<
" \n";
528void HaloBufferOverlap<T>::startCommunication(){
531 MPI_Startall(_areaBuffers.size(), _areaRequests);
532 MPI_Startall(_edgeBuffers.size(), _edgeRequests);
533 MPI_Startall(_cornerBuffers.size(), _cornerRequests);
536 MPI_Startall(4, _areaRequests);
537 MPI_Startall(2, _edgeRequests);
543void HaloBufferOverlap<T>::wait(){
546 MPI_Status * areaStatusArray =
new MPI_Status[_areaBuffers.size()];
547 MPI_Waitall(_areaBuffers.size(),_areaRequests, areaStatusArray);
549 MPI_Status * edgeStatusArray =
new MPI_Status[_edgeBuffers.size()];
550 MPI_Waitall(_edgeBuffers.size(),_edgeRequests, edgeStatusArray);
552 MPI_Status * cornerStatusArray =
new MPI_Status[_cornerBuffers.size()];
553 MPI_Waitall(_cornerBuffers.size(),_cornerRequests, cornerStatusArray);
556 MPI_Status * areaStatusArray =
new MPI_Status[4];
557 MPI_Waitall(4,_areaRequests, areaStatusArray);
559 MPI_Status * edgeStatusArray =
new MPI_Status[2];
560 MPI_Waitall(2,_edgeRequests, edgeStatusArray);
565int HaloBufferOverlap<T>::testIfFinished(){
566 int areaFlag, edgeFlag, cornerFlag;
570 std::vector<MPI_Status> areaStatusArray(_areaBuffers.size());
571 MPI_Testall(_areaBuffers.size(),_areaRequests, &areaFlag, areaStatusArray.data());
573 std::vector<MPI_Status> edgeStatusArray(_edgeBuffers.size());
574 MPI_Testall(_edgeBuffers.size(),_edgeRequests, &edgeFlag, edgeStatusArray.data());
576 std::vector<MPI_Status> cornerStatusArray(_cornerBuffers.size());
577 MPI_Testall(_cornerBuffers.size(),_cornerRequests, &cornerFlag, cornerStatusArray.data());
579 return areaFlag * edgeFlag * cornerFlag;
583 if(_areaBuffers.size() == 0)
return true;
584 std::vector<MPI_Status> areaStatusArray(_areaBuffers.size());
585 MPI_Testall(_areaBuffers.size(),_areaRequests, &areaFlag, areaStatusArray.data());
591 MPI_Status areaStatusArray[4];
592 MPI_Testall(4,_areaRequests, &areaFlag, areaStatusArray);
594 MPI_Status edgeStatusArray[2];
595 MPI_Testall(2,_edgeRequests, &edgeFlag, edgeStatusArray);
596 return areaFlag * edgeFlag;
601 if(!_fuseGlobalCommunication){
602 numRequests = (_globalLevelsInBuffer == 1) ? _offsetFactor : _offsetFactor + (_globalLevelsInBuffer - 1) * 25;
605 numRequests = _globalLevelsInBuffer * _offsetFactor ;
607 if(numRequests == 0)
return true;
608 std::vector<MPI_Status> areaStatusArray(numRequests);
609 MPI_Testall(numRequests,_areaRequests, &areaFlag, areaStatusArray.data());
Definition: HaloBufferOverlap.h:19
Definition: L2PCellProcessor.cpp:15