From 498231e6efd5a76bd610a42ce37328fddea364a4 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 13 Mar 2019 10:04:32 -0500 Subject: [PATCH] Next prototype of the framework integration (cms-patatrack#100) Provide a mechanism for a chain of modules to share a resource, that can be e.g. CUDA device memory or a CUDA stream. Minimize data movements between the CPU and the device, and support multiple devices. Allow the same job configuration to be used on all hardware combinations. See HeterogeneousCore/CUDACore/README.md for a more detailed description and examples. --- .../TrackerHitAssociation/plugins/ClusterSLOnGPU.cu | 10 ++++------ .../TrackerHitAssociation/plugins/ClusterSLOnGPU.h | 5 ++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu index b402daef07a05..dfa08c1fa2043 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu @@ -3,8 +3,6 @@ #include #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" @@ -14,7 +12,7 @@ using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU; __global__ -void simLink(const SiPixelDigisCUDA::DeviceConstView *dd, uint32_t ndigis, const SiPixelClustersCUDA::DeviceConstView *cc, clusterSLOnGPU::HitsOnGPU const * hhp, ClusterSLGPU const * slp, uint32_t n) +void simLink(const SiPixelDigisCUDA::DeviceConstView *dd, uint32_t ndigis, clusterSLOnGPU::HitsOnGPU const * hhp, ClusterSLGPU const * slp, uint32_t n) { assert(slp == slp->me_d); @@ -35,7 +33,7 @@ void simLink(const SiPixelDigisCUDA::DeviceConstView *dd, uint32_t ndigis, const auto ch = pixelgpudetails::pixelToChannel(dd->xx(i), dd->yy(i)); auto first = hh.hitsModuleStart_d[id]; - auto cl = first + cc->clus(i); + auto cl = first + dd->clus(i); assert(cl < 2000 * blockDim.x); const std::array me{{id, ch, 0, 0}}; @@ -162,7 +160,7 @@ namespace clusterSLOnGPU { cudaCheck(cudaMemsetAsync(slgpu.n2_d, 0, (ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream)); } - void Kernel::algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream) { + void Kernel::algo(SiPixelDigisCUDA const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream) { zero(stream.id()); ClusterSLGPU const & sl = slgpu; @@ -177,7 +175,7 @@ namespace clusterSLOnGPU { blocks = (ndigis + threadsPerBlock - 1) / threadsPerBlock; assert(sl.me_d); - simLink<<>>(dd.digis_d.view(), ndigis, dd.clusters_d.view(), hh.gpu_d, sl.me_d, n); + simLink<<>>(dd.view(), ndigis, hh.gpu_d, sl.me_d, n); cudaCheck(cudaGetLastError()); if (doDump) { diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h index 00b0e34b301c8..23976cb418e16 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h @@ -4,8 +4,8 @@ #include #include +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h" #include "RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h" #include "trackerHitAssociationHeterogeneousProduct.h" @@ -15,7 +15,6 @@ namespace clusterSLOnGPU { using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU; using GPUProduct = trackerHitAssociationHeterogeneousProduct::GPUProduct; - using DigisOnGPU = siPixelRawToClusterHeterogeneousProduct::GPUProduct; using HitsOnGPU = siPixelRecHitsHeterogeneousProduct::HitsOnGPU; using HitsOnCPU = siPixelRecHitsHeterogeneousProduct::HitsOnCPU; @@ -23,7 +22,7 @@ namespace clusterSLOnGPU { public: Kernel(cuda::stream_t<>& stream, bool dump); ~Kernel() {deAlloc();} - void algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream); + void algo(SiPixelDigisCUDA const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream); GPUProduct getProduct() { return GPUProduct{slgpu.me_d};} private: