Skip to content

Commit

Permalink
Add infrastructure around cub CachingDeviceAllocator, and use it in S…
Browse files Browse the repository at this point in the history
…iPixelRawToCluster (#172)

Add infrastructure around cub CachingDeviceAllocator for device
memory allocations, and CachingHostAllocator for pinned (or managed)
host memory.

CUDAService uses the CachingHostAllocator to allocate requested
GPU->CPU/CPU->GPU buffers and data products.
Configuration options can be used to request:
  - to print all memory (re)allocations and frees;
  - to preallocate device and host buffers.

SiPixelRawToCluster uses the CachingDeviceAllocator for temporary
buffers and data products.

Fix a memory problem with SiPixelFedCablingMapGPUWrapper::ModulesToUnpack.
  • Loading branch information
fwyzard committed Nov 27, 2020
1 parent 6d4266d commit adc1c70
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <mutex>

#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h"
#include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h"
#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
Expand All @@ -12,29 +14,28 @@
using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU;

__global__
void simLink(clusterSLOnGPU::DigisOnGPU const * ddp, uint32_t ndigis, clusterSLOnGPU::HitsOnGPU const * hhp, ClusterSLGPU const * slp, uint32_t n)
void simLink(const SiPixelDigisCUDA::DeviceConstView *dd, uint32_t ndigis, const SiPixelClustersCUDA::DeviceConstView *cc, clusterSLOnGPU::HitsOnGPU const * hhp, ClusterSLGPU const * slp, uint32_t n)
{
assert(slp == slp->me_d);

constexpr int32_t invTK = 0; // std::numeric_limits<int32_t>::max();
constexpr uint16_t InvId = 9999; // must be > MaxNumModules

auto const & dd = *ddp;
auto const & hh = *hhp;
auto const & sl = *slp;
auto i = blockIdx.x * blockDim.x + threadIdx.x;

if (i >= ndigis)
return;

auto id = dd.moduleInd_d[i];
auto id = dd->moduleInd(i);
if (InvId == id)
return;
assert(id < 2000);

auto ch = pixelgpudetails::pixelToChannel(dd.xx_d[i], dd.yy_d[i]);
auto ch = pixelgpudetails::pixelToChannel(dd->xx(i), dd->yy(i));
auto first = hh.hitsModuleStart_d[id];
auto cl = first + dd.clus_d[i];
auto cl = first + cc->clus(i);
assert(cl < 2000 * blockDim.x);

const std::array<uint32_t, 4> me{{id, ch, 0, 0}};
Expand Down Expand Up @@ -176,7 +177,7 @@ namespace clusterSLOnGPU {
blocks = (ndigis + threadsPerBlock - 1) / threadsPerBlock;

assert(sl.me_d);
simLink<<<blocks, threadsPerBlock, 0, stream.id()>>>(dd.me_d, ndigis, hh.gpu_d, sl.me_d, n);
simLink<<<blocks, threadsPerBlock, 0, stream.id()>>>(dd.digis_d.view(), ndigis, dd.clusters_d.view(), hh.gpu_d, sl.me_d, n);
cudaCheck(cudaGetLastError());

if (doDump) {
Expand Down

0 comments on commit adc1c70

Please sign in to comment.