diff --git a/DataManager.cpp b/DataManager.cpp index ecdc4dcb..482e9f46 100644 --- a/DataManager.cpp +++ b/DataManager.cpp @@ -456,7 +456,6 @@ void DataManager::resetReadOnly(Parameters param, const CkCallback &cb) verbosity = param.iVerbosity; dExtraStore = param.dExtraStore; dMaxBalance = param.dMaxBalance; - dFracLoadBalance = param.dFracLoadBalance; nIOProcessor = param.nIOProcessor; theta = param.dTheta; thetaMono = theta*theta*theta*theta; diff --git a/ParallelGravity.ci b/ParallelGravity.ci index 51ae1d1d..13e102f4 100644 --- a/ParallelGravity.ci +++ b/ParallelGravity.ci @@ -31,7 +31,6 @@ mainmodule ParallelGravity { readonly DomainsDec domainDecomposition; readonly double dExtraStore; readonly double dMaxBalance; - readonly double dFracLoadBalance; readonly double dGlassDamper; readonly int bUseCkLoopPar; readonly int peanoKey; @@ -514,13 +513,8 @@ mainmodule ParallelGravity { entry [local] void receiveParticlesFullCallback(GravityParticle *egp, int num, int chunk, int reqID, Tree::NodeKey &remoteBucket, int awi, void *source); // jetley - entry void startlb(const CkCallback &cb, int activeRung); + entry void startlb(const CkCallback &cb, int activeRung, bool bDoLB); entry void ResumeFromSync(); - entry [reductiontarget] void getParticleInfoForLB(int64_t active_part, - int64_t total_part); - //jetley - // entry void receiveProxy(CkGroupID); - entry void doAtSync(); entry void outputASCII(CkReference, int bParaWrite, const CkCallback& cb); diff --git a/ParallelGravity.cpp b/ParallelGravity.cpp index 8624335f..4d772d41 100644 --- a/ParallelGravity.cpp +++ b/ParallelGravity.cpp @@ -101,8 +101,6 @@ unsigned int _yieldPeriod; DomainsDec domainDecomposition; double dExtraStore; ///< fraction of extra particle storage double dMaxBalance; ///< Max piece imbalance for load balancing -double dFracLoadBalance; ///< Min fraction of particles active - /// for doing load balancing. double dGlassDamper; // Damping inverse timescale for making glasses int iGasModel; ///< For backward compatibility int peanoKey; @@ -976,7 +974,6 @@ Main::Main(CkArgMsg* m) { thetaMono = theta*theta*theta*theta; dExtraStore = param.dExtraStore; dMaxBalance = param.dMaxBalance; - dFracLoadBalance = param.dFracLoadBalance; dGlassDamper = param.dGlassDamper; _cacheLineDepth = param.cacheLineDepth; verbosity = param.iVerbosity; @@ -1698,14 +1695,25 @@ Main::loadBalance(int iPhase) } else { double startTime = CkWallTimer(); - if(iPhase == PHASE_FEEDBACK) { - CkPrintf("Load balancer for star formation/feedback... "); + + bool bDoLB = true; + if(iPhase != -1) { + int64_t nActivePart; + if(iPhase == PHASE_FEEDBACK) { + CkPrintf("Load balancer for star formation/feedback... "); + nActivePart = nTotalSPH + nTotalStar; + } + else { + CkPrintf("Load balancer ... "); + nActivePart = nActiveGrav; + if(nActiveSPH > nActivePart) nActivePart = nActiveSPH; + } + bDoLB = ((float)nActivePart/nTotalParticles > param.dFracLoadBalance) ? + true : false; } - else { + else CkPrintf("Load balancer ... "); - } - - treeProxy.startlb(CkCallbackResumeThread(), iPhase); + treeProxy.startlb(CkCallbackResumeThread(), iPhase, bDoLB); double tLB = CkWallTimer()-startTime; timings[iPhase].tLoadB += tLB; CkPrintf("took %g seconds.\n", tLB); diff --git a/ParallelGravity.h b/ParallelGravity.h index c1dcd3a9..97304447 100644 --- a/ParallelGravity.h +++ b/ParallelGravity.h @@ -149,7 +149,6 @@ extern unsigned int _yieldPeriod; extern DomainsDec domainDecomposition; extern double dExtraStore; extern double dMaxBalance; -extern double dFracLoadBalance; extern double dGlassDamper; extern int bUseCkLoopPar; extern GenericTrees useTree; @@ -1972,8 +1971,7 @@ class TreePiece : public CBase_TreePiece { void flushSmoothParticles(CkCacheFillMsg *msg); void processReqSmoothParticles(); - void getParticleInfoForLB(int64_t active_part, int64_t total_part); - void startlb(const CkCallback &cb, int activeRung); + void startlb(const CkCallback &cb, int activeRung, bool bDoLB); void setTreePieceLoad(int activeRung); void populateSavedPhaseData(int phase, double tpload, unsigned int activeparts); bool havePhaseData(int phase); @@ -2034,7 +2032,6 @@ class TreePiece : public CBase_TreePiece { void receiveNodeCallback(GenericTreeNode *node, int chunk, int reqID, int awi, void *source); void receiveParticlesCallback(ExternalGravityParticle *egp, int num, int chunk, int reqID, Tree::NodeKey &remoteBucket, int awi, void *source); void receiveParticlesFullCallback(GravityParticle *egp, int num, int chunk, int reqID, Tree::NodeKey &remoteBucket, int awi, void *source); - void doAtSync(); void balanceBeforeInitialForces(const CkCallback &cb); diff --git a/TreePiece.cpp b/TreePiece.cpp index 4d2b8130..00ea44d3 100644 --- a/TreePiece.cpp +++ b/TreePiece.cpp @@ -1885,14 +1885,24 @@ void TreePiece::countActive(int activeRung, const CkCallback& cb) { int64_t nActive[2]; nActive[0] = nActive[1] = 0; - for(unsigned int i = 1; i <= myNumParticles; ++i) { - if(myParticles[i].rung >= activeRung) { - nActive[0]++; - if(TYPETest(&myParticles[i], TYPE_GAS)) { - nActive[1]++; - } - } + if(activeRung == 0){ + nActive[0] = myNumParticles; + nActive[1] = myNumSPH; + } + else if(activeRung == PHASE_FEEDBACK) { + nActive[0] = myNumSPH + myNumStar; + } + else{ + for(unsigned int i = 1; i <= myNumParticles; ++i) { + if(myParticles[i].rung >= activeRung) { + nActive[0]++; + if(TYPETest(&myParticles[i], TYPE_GAS)) { + nActive[1]++; + } + } } + } + numActiveParticles = nActive[0]; contribute(2*sizeof(int64_t), nActive, CkReduction::sum_long, cb); } @@ -5413,9 +5423,11 @@ void TreePiece::setTreePieceLoad(int activeRung) { setObjTime(dLoadExp); } - // jetley - contribute your centroid. AtSync is now called by the load balancer (broadcast) when it has - // all centroids. -void TreePiece::startlb(const CkCallback &cb, int activeRung){ +/// @brief Save piece loads and call AtSync() if we should load balance. +/// @param cb Callback for ResumeFromSync(). +/// @param activeRung Rung we are load balancing for. +/// @param bDoLB Whether we should call AtSync() +void TreePiece::startlb(const CkCallback &cb, int activeRung, bool bDoLB){ if(verbosity > 1) CkPrintf("[%d] actual load: %g\n", thisIndex, getObjTime()); @@ -5424,34 +5436,9 @@ void TreePiece::startlb(const CkCallback &cb, int activeRung){ iActiveRungLB = activeRung; if(verbosity > 1) CkPrintf("[%d] TreePiece %d calling AtSync()\n",CkMyPe(),thisIndex); - - unsigned int i; - if(activeRung == 0){ - numActiveParticles = myNumParticles; - } - else if(activeRung == PHASE_FEEDBACK) { - numActiveParticles = myNumSPH + myNumStar; - } - else{ - for(numActiveParticles = 0, i = 1; i <= myNumParticles; i++) - if(myParticles[i].rung >= activeRung) - numActiveParticles++; - } - - int64_t active_tp[2]; - active_tp[0] = numActiveParticles; - active_tp[1] = myNumParticles; - - contribute(2*sizeof(int64_t), &active_tp, CkReduction::sum_long, - CkCallback(CkReductionTarget(TreePiece,getParticleInfoForLB),thisProxy)); -} - -// This is called by startlb to check whether to call the load balancer -void TreePiece::getParticleInfoForLB(int64_t active_part, int64_t total_part) { - bool doLB = ((float)active_part/total_part > dFracLoadBalance) ? true : false; - // Don't do LB - if (!doLB) { + // Don't do LB; just save and reset loads. + if (!bDoLB) { setTreePieceLoad(iActiveRungLB); iPrevRungLB = iActiveRungLB; setObjTime(0.0); @@ -5459,6 +5446,23 @@ void TreePiece::getParticleInfoForLB(int64_t active_part, int64_t total_part) { return; } + // We need to recount the number of active particles since DD has + // moved particles around + if(activeRung == 0){ // Everybody is active; no need to count + numActiveParticles = myNumParticles; + } + else if(activeRung == PHASE_FEEDBACK) { // Also no need to recount + numActiveParticles = myNumSPH + myNumStar; + } + else{ + numActiveParticles = 0; + for(unsigned int i = 1; i <= myNumParticles; ++i) { + if(myParticles[i].rung >= activeRung) { + numActiveParticles++; + } + } + } + LDObjHandle myHandle = myRec->getLdHandle(); TaggedVector3D tv(savedCentroid, myHandle, numActiveParticles, myNumParticles, @@ -5474,11 +5478,8 @@ void TreePiece::getParticleInfoForLB(int64_t active_part, int64_t total_part) { *(TaggedVector3D *) data = tv; } } - thisProxy[thisIndex].doAtSync(); iPrevRungLB = iActiveRungLB; -} -void TreePiece::doAtSync(){ if(verbosity > 1) CkPrintf("[%d] TreePiece %d calling AtSync() at %g\n",CkMyPe(),thisIndex, CkWallTimer()); AtSync(); @@ -6577,10 +6578,9 @@ void TreePiece::balanceBeforeInitialForces(const CkCallback &cb){ *(TaggedVector3D *)data = tv; } } - thisProxy[thisIndex].doAtSync(); - // this will be called in resumeFromSync() callback = cb; + AtSync(); } // Choose a piece from among the owners from which to