diff --git a/CUDADataFormats/BeamSpot/BuildFile.xml b/CUDADataFormats/BeamSpot/BuildFile.xml
new file mode 100644
index 0000000000000..b4a05240b567d
--- /dev/null
+++ b/CUDADataFormats/BeamSpot/BuildFile.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h b/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h
new file mode 100644
index 0000000000000..70c3f1b3f9d11
--- /dev/null
+++ b/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h
@@ -0,0 +1,32 @@
+#ifndef CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h
+#define CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h
+
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+
+#include
+
+class BeamSpotCUDA {
+public:
+ // alignas(128) doesn't really make sense as there is only one
+ // beamspot per event?
+ struct Data {
+ float x,y,z; // position
+ // TODO: add covariance matrix
+
+ float sigmaZ;
+ float beamWidthX, beamWidthY;
+ float dxdz, dydz;
+ float emittanceX, emittanceY;
+ float betaStar;
+ };
+
+ BeamSpotCUDA() = default;
+ BeamSpotCUDA(Data const* data_h, cuda::stream_t<>& stream);
+
+ Data const* data() const { return data_d_.get(); }
+
+private:
+ cudautils::device::unique_ptr data_d_;
+};
+
+#endif
diff --git a/CUDADataFormats/BeamSpot/src/BeamSpotCUDA.cc b/CUDADataFormats/BeamSpot/src/BeamSpotCUDA.cc
new file mode 100644
index 0000000000000..2714df51d2456
--- /dev/null
+++ b/CUDADataFormats/BeamSpot/src/BeamSpotCUDA.cc
@@ -0,0 +1,11 @@
+#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
+
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+
+BeamSpotCUDA::BeamSpotCUDA(Data const* data_h, cuda::stream_t<>& stream) {
+ edm::Service cs;
+
+ data_d_ = cs->make_device_unique(stream);
+ cuda::memory::async::copy(data_d_.get(), data_h, sizeof(Data), stream.id());
+}
diff --git a/CUDADataFormats/BeamSpot/src/classes.h b/CUDADataFormats/BeamSpot/src/classes.h
new file mode 100644
index 0000000000000..62f990c0ba3b3
--- /dev/null
+++ b/CUDADataFormats/BeamSpot/src/classes.h
@@ -0,0 +1,8 @@
+#ifndef CUDADataFormats_BeamSpot_classes_h
+#define CUDADataFormats_BeamSpot_classes_h
+
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+
+#endif
diff --git a/CUDADataFormats/BeamSpot/src/classes_def.xml b/CUDADataFormats/BeamSpot/src/classes_def.xml
new file mode 100644
index 0000000000000..29a0eafa04005
--- /dev/null
+++ b/CUDADataFormats/BeamSpot/src/classes_def.xml
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/CUDADataFormats/Common/interface/CUDAProduct.h b/CUDADataFormats/Common/interface/CUDAProduct.h
index 181024f068c7a..fb90496f33661 100644
--- a/CUDADataFormats/Common/interface/CUDAProduct.h
+++ b/CUDADataFormats/Common/interface/CUDAProduct.h
@@ -45,6 +45,12 @@ class CUDAProduct: public CUDAProductBase {
data_(std::move(data))
{}
+ template
+ explicit CUDAProduct(int device, std::shared_ptr> stream, std::shared_ptr event, Args&&... args):
+ CUDAProductBase(device, std::move(stream), std::move(event)),
+ data_(std::forward(args)...)
+ {}
+
T data_; //!
};
diff --git a/Configuration/StandardSequences/python/Reconstruction_cff.py b/Configuration/StandardSequences/python/Reconstruction_cff.py
index 36ac26b2197c6..8bb2e5cb2afcb 100644
--- a/Configuration/StandardSequences/python/Reconstruction_cff.py
+++ b/Configuration/StandardSequences/python/Reconstruction_cff.py
@@ -198,9 +198,9 @@
reconstruction_trackingOnly = cms.Sequence(localreco*globalreco_tracking)
reconstruction_pixelTrackingOnly = cms.Sequence(
pixeltrackerlocalreco*
- offlineBeamSpot*
siPixelClusterShapeCachePreSplitting*
- recopixelvertexing
+ recopixelvertexing,
+ offlineBeamSpotTask
)
#need a fully expanded sequence copy
diff --git a/HeterogeneousCore/CUDAUtilities/BuildFile.xml b/HeterogeneousCore/CUDAUtilities/BuildFile.xml
index 289f0208fd5e7..153430997064d 100644
--- a/HeterogeneousCore/CUDAUtilities/BuildFile.xml
+++ b/HeterogeneousCore/CUDAUtilities/BuildFile.xml
@@ -1,5 +1,6 @@
+
diff --git a/HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h b/HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h
new file mode 100644
index 0000000000000..c9f9aff89d975
--- /dev/null
+++ b/HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h
@@ -0,0 +1,65 @@
+#ifndef HeterogeneousCore_CUDAUtilities_interface_host_noncached_unique_ptr_h
+#define HeterogeneousCore_CUDAUtilities_interface_host_noncached_unique_ptr_h
+
+#include
+
+#include
+#include
+
+namespace cudautils {
+ namespace host {
+ namespace noncached {
+ namespace impl {
+ // Additional layer of types to distinguish from host::unique_ptr
+ class HostDeleter {
+ public:
+ void operator()(void *ptr) {
+ cuda::throw_if_error(cudaFreeHost(ptr));
+ }
+ };
+ }
+
+ template
+ using unique_ptr = std::unique_ptr;
+
+ namespace impl {
+ template
+ struct make_host_unique_selector { using non_array = cudautils::host::noncached::unique_ptr; };
+ template
+ struct make_host_unique_selector { using unbounded_array = cudautils::host::noncached::unique_ptr; };
+ template
+ struct make_host_unique_selector { struct bounded_array {}; };
+ }
+ }
+ }
+
+ /**
+ * The difference wrt. CUDAService::make_host_unique is that these
+ * do not cache, so they should not be called per-event.
+ */
+ template
+ typename host::noncached::impl::make_host_unique_selector::non_array
+ make_host_noncached_unique(unsigned int flags = cudaHostAllocDefault) {
+ static_assert(std::is_trivially_constructible::value, "Allocating with non-trivial constructor on the pinned host memory is not supported");
+ void *mem;
+ cuda::throw_if_error(cudaHostAlloc(&mem, sizeof(T), flags));
+ return typename cudautils::host::noncached::impl::make_host_unique_selector::non_array(reinterpret_cast(mem));
+ }
+
+ template
+ typename host::noncached::impl::make_host_unique_selector::unbounded_array
+ make_host_noncached_unique(size_t n, unsigned int flags = cudaHostAllocDefault) {
+ using element_type = typename std::remove_extent::type;
+ static_assert(std::is_trivially_constructible::value, "Allocating with non-trivial constructor on the pinned host memory is not supported");
+ void *mem;
+ cuda::throw_if_error(cudaHostAlloc(&mem, n*sizeof(element_type), flags));
+ return typename cudautils::host::noncached::impl::make_host_unique_selector::unbounded_array(reinterpret_cast(mem));
+ }
+
+ template
+ typename cudautils::host::noncached::impl::make_host_unique_selector::bounded_array
+ make_host_noncached_unique(Args&&...) = delete;
+}
+
+#endif
+
diff --git a/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml b/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml
index 0f48f95a8e4ad..0d06484b72a5e 100644
--- a/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml
+++ b/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml
@@ -68,7 +68,7 @@
-
+
diff --git a/HeterogeneousCore/CUDAUtilities/test/host_noncached_unique_ptr_t.cpp b/HeterogeneousCore/CUDAUtilities/test/host_noncached_unique_ptr_t.cpp
new file mode 100644
index 0000000000000..ae9e3c9a3849d
--- /dev/null
+++ b/HeterogeneousCore/CUDAUtilities/test/host_noncached_unique_ptr_t.cpp
@@ -0,0 +1,22 @@
+#include "catch.hpp"
+
+#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
+
+TEST_CASE("host_noncached_unique_ptr", "[cudaMemTools]") {
+ exitSansCUDADevices();
+
+ SECTION("Single element") {
+ auto ptr1 = cudautils::make_host_noncached_unique();
+ REQUIRE(ptr1 != nullptr);
+ auto ptr2 = cudautils::make_host_noncached_unique(cudaHostAllocPortable | cudaHostAllocWriteCombined);
+ REQUIRE(ptr2 != nullptr);
+ }
+
+ SECTION("Multiple elements") {
+ auto ptr1 = cudautils::make_host_noncached_unique(10);
+ REQUIRE(ptr1 != nullptr);
+ auto ptr2 = cudautils::make_host_noncached_unique(10, cudaHostAllocPortable | cudaHostAllocWriteCombined);
+ REQUIRE(ptr2 != nullptr);
+ }
+}
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc
index b23faad9e78d3..f2dacd5fbc415 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc
@@ -24,7 +24,9 @@
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h"
#include "RecoTracker/Record/interface/CkfComponentsRecord.h"
@@ -62,6 +64,7 @@ class SiPixelRawToClusterCUDA: public edm::stream::EDProducer
std::unique_ptr regions_;
pixelgpudetails::SiPixelRawToClusterGPUKernel gpuAlgo_;
+ std::unique_ptr wordFedAppender_;
PixelDataFormatter::Errors errors_;
const bool includeErrors_;
@@ -88,6 +91,11 @@ SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfi
}
if(usePilotBlade_) edm::LogInfo("SiPixelRawToCluster") << " Use pilot blade data (FED 40)";
+
+ edm::Service cs;
+ if(cs->enabled()) {
+ wordFedAppender_ = std::make_unique();
+ }
}
void SiPixelRawToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
@@ -161,7 +169,6 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::Event
// In CPU algorithm this loop is part of PixelDataFormatter::interpretRawData()
ErrorChecker errorcheck;
- auto wordFedAppender = pixelgpudetails::SiPixelRawToClusterGPUKernel::WordFedAppender(ctx.stream());
for(int fedId: fedIds_) {
if (!usePilotBlade_ && (fedId==40) ) continue; // skip pilot blade data
if (regions_ && !regions_->mayUnpackFED(fedId)) continue;
@@ -209,13 +216,13 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::Event
const cms_uint32_t * ew = (const cms_uint32_t *)(trailer);
assert(0 == (ew-bw)%2);
- wordFedAppender.initializeWordFed(fedId, wordCounterGPU, bw, (ew-bw));
+ wordFedAppender_->initializeWordFed(fedId, wordCounterGPU, bw, (ew-bw));
wordCounterGPU+=(ew-bw);
} // end of for loop
gpuAlgo_.makeClustersAsync(gpuMap, gpuModulesToUnpack, gpuGains,
- wordFedAppender,
+ *wordFedAppender_,
std::move(errors_),
wordCounterGPU, fedCounter,
useQuality_, includeErrors_,
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu
index 3d4e377eb8221..8fdb2ed8c90d5 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu
@@ -45,10 +45,9 @@ namespace pixelgpudetails {
// number of words for all the FEDs
constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD;
- SiPixelRawToClusterGPUKernel::WordFedAppender::WordFedAppender(cuda::stream_t<>& cudaStream) {
- edm::Service cs;
- word_ = cs->make_host_unique(MAX_FED_WORDS, cudaStream);
- fedId_ = cs->make_host_unique(MAX_FED_WORDS, cudaStream);
+ SiPixelRawToClusterGPUKernel::WordFedAppender::WordFedAppender() {
+ word_ = cudautils::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined);
+ fedId_ = cudautils::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined);
}
void SiPixelRawToClusterGPUKernel::WordFedAppender::initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length) {
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h
index a0f89dc241c64..0d2b6a8c7fc65 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h
@@ -11,6 +11,7 @@
#include "FWCore/Utilities/interface/typedefs.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
#include "DataFormats/SiPixelDigi/interface/PixelErrors.h"
struct SiPixelFedCablingMapGPU;
@@ -159,7 +160,7 @@ namespace pixelgpudetails {
public:
class WordFedAppender {
public:
- WordFedAppender(cuda::stream_t<>& cudaStream);
+ WordFedAppender();
~WordFedAppender() = default;
void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length);
@@ -168,8 +169,8 @@ namespace pixelgpudetails {
const unsigned char *fedId() const { return fedId_.get(); }
private:
- cudautils::host::unique_ptr word_;
- cudautils::host::unique_ptr fedId_;
+ cudautils::host::noncached::unique_ptr word_;
+ cudautils::host::noncached::unique_ptr fedId_;
};
SiPixelRawToClusterGPUKernel() = default;
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml
index a8af0c8a7c4f9..27ee3af86e102 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml
@@ -1,3 +1,4 @@
+
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu
index 80be13dedd26b..6ac70fce95a88 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu
@@ -36,7 +36,6 @@ namespace pixelgpudetails {
constexpr auto MAX_HITS = siPixelRecHitsHeterogeneousProduct::maxHits();
- cudaCheck(cudaMalloc((void **) & gpu_.bs_d, 3 * sizeof(float)));
cudaCheck(cudaMalloc((void **) & gpu_.hitsLayerStart_d, 11 * sizeof(uint32_t)));
// Coalesce all 32bit and 16bit arrays to two big blobs
@@ -111,7 +110,6 @@ namespace pixelgpudetails {
#endif
}
PixelRecHitGPUKernel::~PixelRecHitGPUKernel() {
- cudaCheck(cudaFree(gpu_.bs_d));
cudaCheck(cudaFree(gpu_.hitsLayerStart_d));
cudaCheck(cudaFree(gpu_.owner_32bit_));
cudaCheck(cudaFree(gpu_.owner_16bit_));
@@ -131,11 +129,10 @@ namespace pixelgpudetails {
void PixelRecHitGPUKernel::makeHitsAsync(SiPixelDigisCUDA const& digis_d,
SiPixelClustersCUDA const& clusters_d,
- float const * bs,
+ BeamSpotCUDA const& bs_d,
pixelCPEforGPU::ParamsOnGPU const * cpeParams,
bool transferToCPU,
cuda::stream_t<>& stream) {
- cudaCheck(cudaMemcpyAsync(gpu_.bs_d, bs, 3 * sizeof(float), cudaMemcpyDefault, stream.id()));
gpu_.hitsModuleStart_d = clusters_d.clusModuleStart();
gpu_.cpeParams = cpeParams; // copy it for use in clients
cudaCheck(cudaMemcpyAsync(gpu_d, &gpu_, sizeof(HitsOnGPU), cudaMemcpyDefault, stream.id()));
@@ -148,7 +145,7 @@ namespace pixelgpudetails {
#endif
gpuPixelRecHits::getHits<<>>(
cpeParams,
- gpu_.bs_d,
+ bs_d.data(),
digis_d.moduleInd(),
digis_d.xx(), digis_d.yy(), digis_d.adc(),
clusters_d.moduleStart(),
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h
index 49164d24ab335..8e5599c239789 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h
@@ -1,6 +1,7 @@
#ifndef RecoLocalTracker_SiPixelRecHits_plugins_PixelRecHits_h
#define RecoLocalTracker_SiPixelRecHits_plugins_PixelRecHits_h
+#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusteringConstants.h"
@@ -34,7 +35,7 @@ namespace pixelgpudetails {
void makeHitsAsync(SiPixelDigisCUDA const& digis_d,
SiPixelClustersCUDA const& clusters_d,
- float const * bs,
+ BeamSpotCUDA const& bs_d,
pixelCPEforGPU::ParamsOnGPU const * cpeParams,
bool transferToCPU,
cuda::stream_t<>& stream);
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitHeterogeneous.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitHeterogeneous.cc
index e26ec84ddf5b5..be4ca76f23ad5 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitHeterogeneous.cc
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitHeterogeneous.cc
@@ -1,9 +1,9 @@
#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
#include "DataFormats/Common/interface/DetSetVectorNew.h"
#include "DataFormats/Common/interface/Handle.h"
-#include "DataFormats/BeamSpot/interface/BeamSpot.h"
#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h"
#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h"
#include "FWCore/Framework/interface/ESHandle.h"
@@ -64,8 +64,8 @@ class SiPixelRecHitHeterogeneous: public HeterogeneousEDProducer& inputhandle, SiPixelRecHitCollectionNew &output, const pixelgpudetails::HitsOnCPU& hoc) const;
- edm::EDGetTokenT tBeamSpot;
// The mess with inputs will be cleaned up when migrating to the new framework
+ edm::EDGetTokenT> tBeamSpot;
edm::EDGetTokenT> token_;
edm::EDGetTokenT> tokenDigi_;
edm::EDGetTokenT clusterToken_;
@@ -82,7 +82,7 @@ class SiPixelRecHitHeterogeneous: public HeterogeneousEDProducer(iConfig.getParameter("beamSpot"))),
+ tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))),
token_(consumes>(iConfig.getParameter("heterogeneousSrc"))),
tokenDigi_(consumes>(iConfig.getParameter("heterogeneousSrc"))),
cpeName_(iConfig.getParameter("CPE"))
@@ -100,7 +100,7 @@ SiPixelRecHitHeterogeneous::SiPixelRecHitHeterogeneous(const edm::ParameterSet&
void SiPixelRecHitHeterogeneous::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
- desc.add("beamSpot", edm::InputTag("offlineBeamSpot"));
+ desc.add("beamSpot", edm::InputTag("offlineBeamSpotCUDA"));
desc.add("heterogeneousSrc", edm::InputTag("siPixelClustersCUDAPreSplitting"));
desc.add("src", edm::InputTag("siPixelClustersPreSplitting"));
desc.add("CPE", "PixelCPEFast");
@@ -183,23 +183,22 @@ void SiPixelRecHitHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEvent& i
iEvent.getByToken(tokenDigi_, hdigis);
auto const& digis = ctx.get(*hdigis);
+ edm::Handle> hbs;
+ iEvent.getByToken(tBeamSpot, hbs);
+ auto const& bs = ctx.get(*hbs);
+
// We're processing in a stream given by base class, so need to
// synchronize explicitly (implementation is from
// CUDAScopedContext). In practice these should not be needed
// (because of synchronizations upstream), but let's play generic.
- if(not hclusters->isAvailable() && hclusters->event()->has_occurred()) {
+ if(not hclusters->isAvailable() and not hclusters->event()->has_occurred()) {
cudaCheck(cudaStreamWaitEvent(cudaStream.id(), hclusters->event()->id(), 0));
}
- if(not hdigis->isAvailable() && hdigis->event()->has_occurred()) {
+ if(not hdigis->isAvailable() and not hdigis->event()->has_occurred()) {
cudaCheck(cudaStreamWaitEvent(cudaStream.id(), hclusters->event()->id(), 0));
}
-
- edm::Handle bsHandle;
- iEvent.getByToken( tBeamSpot, bsHandle);
- float bs[3] = {0.f};
- if(bsHandle.isValid()) {
- const auto & bsh = *bsHandle;
- bs[0]=bsh.x0(); bs[1]=bsh.y0(); bs[2]=bsh.z0();
+ if(not hbs->isAvailable() and not hbs->event()->has_occurred()) {
+ cudaCheck(cudaStreamWaitEvent(cudaStream.id(), hbs->event()->id(), 0));
}
gpuAlgo_->makeHitsAsync(digis, clusters, bs, fcpe->getGPUProductAsync(cudaStream), enableTransfer_, cudaStream);
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h
index 6864a046bf1dc..cbd354e71143e 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h
@@ -5,6 +5,7 @@
#include
#include
+#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
#include "DataFormats/Math/interface/approx_atan2.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
@@ -15,7 +16,7 @@ namespace gpuPixelRecHits {
__global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * __restrict__ cpeParams,
- float const * __restrict__ bs,
+ BeamSpotCUDA::Data const * __restrict__ bs,
uint16_t const * __restrict__ id,
uint16_t const * __restrict__ x,
uint16_t const * __restrict__ y,
@@ -143,9 +144,9 @@ namespace gpuPixelRecHits {
// to global and compute phi...
cpeParams->detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]);
// here correct for the beamspot...
- xg[h]-=bs[0];
- yg[h]-=bs[1];
- zg[h]-=bs[2];
+ xg[h]-=bs->x;
+ yg[h]-=bs->y;
+ zg[h]-=bs->z;
rg[h] = std::sqrt(xg[h]*xg[h]+yg[h]*yg[h]);
iph[h] = unsafe_atan2s<7>(yg[h],xg[h]);
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h b/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h
index ea6eaf8458dde..5bed073c01c3a 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h
@@ -22,7 +22,6 @@ namespace siPixelRecHitsHeterogeneousProduct {
struct HitsOnGPU{
pixelCPEforGPU::ParamsOnGPU const * cpeParams = nullptr; // forwarded from setup, NOT owned
- float * bs_d;
const uint32_t * hitsModuleStart_d; // forwarded from clusters
uint32_t * hitsLayerStart_d;
int32_t * charge_d;
diff --git a/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc b/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc
new file mode 100644
index 0000000000000..9b15f08b8dfc2
--- /dev/null
+++ b/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc
@@ -0,0 +1,88 @@
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
+#include "DataFormats/BeamSpot/interface/BeamSpot.h"
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/MakerMacros.h"
+#include "FWCore/Framework/interface/global/EDProducer.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
+
+#include
+
+namespace {
+ class BSHost {
+ public:
+ BSHost():
+ bs{cudautils::make_host_noncached_unique(cudaHostAllocWriteCombined)}
+ {}
+ BeamSpotCUDA::Data *get() { return bs.get(); }
+
+ private:
+ cudautils::host::noncached::unique_ptr bs;
+ };
+}
+
+class BeamSpotToCUDA: public edm::global::EDProducer > {
+public:
+ explicit BeamSpotToCUDA(const edm::ParameterSet& iConfig);
+ ~BeamSpotToCUDA() override = default;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+ std::unique_ptr beginStream(edm::StreamID) const {
+ edm::Service cs;
+ if(cs->enabled()) {
+ return std::make_unique();
+ }
+ else {
+ return nullptr;
+ }
+ }
+ void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override;
+
+private:
+ edm::EDGetTokenT bsGetToken_;
+ edm::EDPutTokenT> bsPutToken_;
+};
+
+BeamSpotToCUDA::BeamSpotToCUDA(const edm::ParameterSet& iConfig):
+ bsGetToken_{consumes(iConfig.getParameter("src"))},
+ bsPutToken_{produces>()}
+{}
+
+void BeamSpotToCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+ desc.add("src", edm::InputTag("offlineBeamSpot"));
+ descriptions.addWithDefaultLabel(desc);
+}
+
+void BeamSpotToCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const {
+ CUDAScopedContext ctx{streamID};
+
+ const reco::BeamSpot& bs = iEvent.get(bsGetToken_);
+
+ BeamSpotCUDA::Data *bsHost = streamCache(streamID)->get();
+
+ bsHost->x = bs.x0();
+ bsHost->y = bs.y0();
+ bsHost->z = bs.z0();
+
+ bsHost->sigmaZ = bs.sigmaZ();
+ bsHost->beamWidthX = bs.BeamWidthX();
+ bsHost->beamWidthY = bs.BeamWidthY();
+ bsHost->dxdz = bs.dxdz();
+ bsHost->dydz = bs.dydz();
+ bsHost->emittanceX = bs.emittanceX();
+ bsHost->emittanceY = bs.emittanceY();
+ bsHost->betaStar = bs.betaStar();
+
+ ctx.emplace(iEvent, bsPutToken_, bsHost, ctx.stream());
+}
+
+DEFINE_FWK_MODULE(BeamSpotToCUDA);
+
diff --git a/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml b/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml
index bb4502c33b332..e3ce85df9a81a 100644
--- a/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml
+++ b/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml
@@ -14,6 +14,14 @@
+
+
+
+
+
+
+
+
diff --git a/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py b/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py
index deb62255199e5..9654f9ab410b8 100644
--- a/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py
+++ b/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py
@@ -1,4 +1,11 @@
import FWCore.ParameterSet.Config as cms
from RecoVertex.BeamSpotProducer.BeamSpot_cfi import *
+from RecoVertex.BeamSpotProducer.beamSpotToCUDA_cfi import beamSpotToCUDA as _beamSpotToCUDA
+offlineBeamSpotCUDA = _beamSpotToCUDA.clone()
+
+offlineBeamSpotTask = cms.Task(
+ offlineBeamSpot,
+ offlineBeamSpotCUDA
+)