Skip to content

Commit

Permalink
GPU: Fix copying of cluster data to GPU when not all processing steps…
Browse files Browse the repository at this point in the history
… are running on GPU
  • Loading branch information
davidrohr committed Jan 9, 2025
1 parent f70fb9b commit 926d344
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 14 deletions.
1 change: 1 addition & 0 deletions GPU/GPUTracking/Global/GPUChainTracking.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega
void RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType);
void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType);
void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function<o2::tpc::ClusterNative*(size_t)> allocator, bool applyClusterCuts);
bool NeedTPCClustersOnGPU();

std::atomic_flag mLockAtomicOutputBuffer = ATOMIC_FLAG_INIT;
std::mutex mMutexUpdateCalib;
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)

auto* digitsMC = propagateMCLabels ? processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC : nullptr;

bool buildNativeGPU = (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression);
bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU();
bool buildNativeHost = (mRec->GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCClusters) || GetProcessingSettings().deterministicGPUReconstruction; // TODO: Should do this also when clusters are needed for later steps on the host but not requested as output

mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mRec->MemoryScalers()->nTPCHits * tpcHitLowOccupancyScalingFactor;
Expand Down
29 changes: 16 additions & 13 deletions GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
using namespace GPUCA_NAMESPACE::gpu;
using namespace o2::tpc;

bool GPUChainTracking::NeedTPCClustersOnGPU()
{
return (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression);
}

int32_t GPUChainTracking::ConvertNativeToClusterData()
{
#ifdef GPUCA_HAVE_O2HEADERS
Expand All @@ -42,19 +47,17 @@ int32_t GPUChainTracking::ConvertNativeToClusterData()
GPUTPCConvert& convertShadow = doGPU ? processorsShadow()->tpcConverter : convert;

bool transferClusters = false;
if (doGPU) {
if (!(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding)) {
mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mIOPtrs.clustersNative->nClustersTotal;
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;
WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
*mInputsHost->mPclusterNativeAccess = *mIOPtrs.clustersNative;
mInputsHost->mPclusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;
mInputsHost->mPclusterNativeAccess->setOffsetPtrs();
GPUMemCpy(RecoStep::TPCConversion, mInputsShadow->mPclusterNativeBuffer, mIOPtrs.clustersNative->clustersLinear, sizeof(mIOPtrs.clustersNative->clustersLinear[0]) * mIOPtrs.clustersNative->nClustersTotal, 0, true);
TransferMemoryResourceLinkToGPU(RecoStep::TPCConversion, mInputsHost->mResourceClusterNativeAccess, 0);
transferClusters = true;
}
if (mRec->IsGPU() && !(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding) && NeedTPCClustersOnGPU()) {
mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mIOPtrs.clustersNative->nClustersTotal;
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;
WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
*mInputsHost->mPclusterNativeAccess = *mIOPtrs.clustersNative;
mInputsHost->mPclusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;
mInputsHost->mPclusterNativeAccess->setOffsetPtrs();
GPUMemCpy(RecoStep::TPCConversion, mInputsShadow->mPclusterNativeBuffer, mIOPtrs.clustersNative->clustersLinear, sizeof(mIOPtrs.clustersNative->clustersLinear[0]) * mIOPtrs.clustersNative->nClustersTotal, 0, true);
TransferMemoryResourceLinkToGPU(RecoStep::TPCConversion, mInputsHost->mResourceClusterNativeAccess, 0);
transferClusters = true;
}
if (!param().par.earlyTpcTransform) {
if (GetProcessingSettings().debugLevel >= 3) {
Expand Down

0 comments on commit 926d344

Please sign in to comment.