diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h index 22d9ff9d103ba..ca8a75d178b6c 100644 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -49,11 +49,11 @@ class SiPixelClustersCUDA { friend SiPixelClustersCUDA; private: - uint32_t const *moduleStart_ = nullptr; - int32_t const *clus_ = nullptr; - uint32_t const *clusInModule_ = nullptr; - uint32_t const *moduleId_ = nullptr; - uint32_t const *clusModuleStart_ = nullptr; + uint32_t const *moduleStart_; + int32_t const *clus_; + uint32_t const *clusInModule_; + uint32_t const *moduleId_; + uint32_t const *clusModuleStart_; }; DeviceConstView *view() const { return view_d.get(); } diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 25e8b54a743c2..66ca680effd19 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -46,10 +46,10 @@ class SiPixelDigisCUDA { friend class SiPixelDigisCUDA; private: - uint16_t const *xx_ = nullptr; - uint16_t const *yy_ = nullptr; - uint16_t const *adc_ = nullptr; - uint16_t const *moduleInd_ = nullptr; + uint16_t const *xx_; + uint16_t const *yy_; + uint16_t const *adc_; + uint16_t const *moduleInd_; }; const DeviceConstView *view() const { return view_d.get(); } diff --git a/HeterogeneousCore/CUDAServices/interface/CUDAService.h b/HeterogeneousCore/CUDAServices/interface/CUDAService.h index feeb557042548..9555321f5153a 100644 --- a/HeterogeneousCore/CUDAServices/interface/CUDAService.h +++ b/HeterogeneousCore/CUDAServices/interface/CUDAService.h @@ -72,6 +72,7 @@ class CUDAService { template typename cudaserviceimpl::make_device_unique_selector::non_array make_device_unique(cuda::stream_t<>& stream) { + static_assert(std::is_trivially_constructible::value, "Allocating with non-trivial constructor on the device memory is not supported"); int dev = getCurrentDevice(); void *mem = allocate_device(dev, sizeof(T), stream); return typename cudaserviceimpl::make_device_unique_selector::non_array(reinterpret_cast(mem), @@ -83,8 +84,9 @@ class CUDAService { template typename cudaserviceimpl::make_device_unique_selector::unbounded_array make_device_unique(size_t n, cuda::stream_t<>& stream) { - int dev = getCurrentDevice(); using element_type = typename std::remove_extent::type; + static_assert(std::is_trivially_constructible::value, "Allocating with non-trivial constructor on the device memory is not supported"); + int dev = getCurrentDevice(); void *mem = allocate_device(dev, n*sizeof(element_type), stream); return typename cudaserviceimpl::make_device_unique_selector::unbounded_array(reinterpret_cast(mem), [this, dev](void *ptr) { @@ -100,6 +102,7 @@ class CUDAService { template typename cudaserviceimpl::make_host_unique_selector::non_array make_host_unique(cuda::stream_t<>& stream) { + static_assert(std::is_trivially_constructible::value, "Allocating with non-trivial constructor on the pinned host memory is not supported"); void *mem = allocate_host(sizeof(T), stream); return typename cudaserviceimpl::make_host_unique_selector::non_array(reinterpret_cast(mem), [this](void *ptr) { @@ -111,6 +114,7 @@ class CUDAService { typename cudaserviceimpl::make_host_unique_selector::unbounded_array make_host_unique(size_t n, cuda::stream_t<>& stream) { using element_type = typename std::remove_extent::type; + static_assert(std::is_trivially_constructible::value, "Allocating with non-trivial constructor on the pinned host memory is not supported"); void *mem = allocate_host(n*sizeof(element_type), stream); return typename cudaserviceimpl::make_host_unique_selector::unbounded_array(reinterpret_cast(mem), [this](void *ptr) { diff --git a/HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h b/HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h index 47592fd2063d6..8652aa97a709f 100644 --- a/HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h +++ b/HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h @@ -9,16 +9,15 @@ #include namespace GPU { - template struct SimpleVector { - // Constructors - constexpr SimpleVector(int capacity, T *data) // ownership of m_data stays within the caller - : m_size(0), m_capacity(capacity), m_data(data) - { - static_assert(std::is_trivially_destructible::value); - } + constexpr SimpleVector() = default; - constexpr SimpleVector() : SimpleVector(0) {} + // ownership of m_data stays within the caller + constexpr void construct(int capacity, T *data) { + m_size = 0; + m_capacity = capacity; + m_data = data; + } inline constexpr int push_back_unsafe(const T &element) { auto previousSize = m_size; @@ -98,6 +97,22 @@ template struct SimpleVector { T *m_data; }; + // ownership of m_data stays within the caller + template + SimpleVector make_SimpleVector(int capacity, T *data) { + SimpleVector ret; + ret.construct(capacity, data); + return ret; + } + + // ownership of m_data stays within the caller + template + SimpleVector *make_SimpleVector(SimpleVector *mem, int capacity, T *data) { + auto ret = new(mem) SimpleVector(); + ret->construct(capacity, data); + return ret; + } + } // namespace GPU #endif // HeterogeneousCore_CUDAUtilities_interface_GPUSimpleVector_h diff --git a/HeterogeneousCore/CUDAUtilities/test/test_GPUSimpleVector.cu b/HeterogeneousCore/CUDAUtilities/test/test_GPUSimpleVector.cu index fe65ff35864d0..d98710c00a324 100644 --- a/HeterogeneousCore/CUDAUtilities/test/test_GPUSimpleVector.cu +++ b/HeterogeneousCore/CUDAUtilities/test/test_GPUSimpleVector.cu @@ -36,10 +36,10 @@ int main() { cudaCheck(cudaMallocHost(&data_ptr, maxN * sizeof(int))); cudaCheck(cudaMalloc(&d_data_ptr, maxN * sizeof(int))); - auto v = new (obj_ptr) GPU::SimpleVector(maxN, data_ptr); + auto v = GPU::make_SimpleVector(obj_ptr, maxN, data_ptr); cudaCheck(cudaMallocHost(&tmp_obj_ptr, sizeof(GPU::SimpleVector))); - new (tmp_obj_ptr) GPU::SimpleVector(maxN, d_data_ptr); + GPU::make_SimpleVector(tmp_obj_ptr, maxN, d_data_ptr); assert(tmp_obj_ptr->size() == 0); assert(tmp_obj_ptr->capacity() == static_cast(maxN)); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index d39662f5ee955..c3f4292d4c4c4 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -432,7 +432,7 @@ namespace pixelgpudetails { if (includeErrors and skipROC) { uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug); - err->emplace_back(rID, ww, errorType, fedId); + err->push_back(pixelgpudetails::error_obj{rID, ww, errorType, fedId}); continue; } @@ -476,7 +476,7 @@ namespace pixelgpudetails { if (includeErrors) { if (not rocRowColIsValid(row, col)) { uint32_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays - err->emplace_back(rawId, ww, error, fedId); + err->push_back(pixelgpudetails::error_obj{rawId, ww, error, fedId}); if(debug) printf("BPIX1 Error status: %i\n", error); continue; } @@ -491,7 +491,7 @@ namespace pixelgpudetails { localPix.col = col; if (includeErrors and not dcolIsValid(dcol, pxid)) { uint32_t error = conversionError(fedId, 3, debug); - err->emplace_back(rawId, ww, error, fedId); + err->push_back(pixelgpudetails::error_obj{rawId, ww, error, fedId}); if(debug) printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); continue; } @@ -541,7 +541,7 @@ namespace pixelgpudetails { auto data_d = cs->make_device_unique(MAX_FED_WORDS, stream); cudaCheck(cudaMemsetAsync(data_d.get(), 0x00, MAX_ERROR_SIZE, stream.id())); auto error_h_tmp = cs->make_host_unique>(stream); - new (error_h_tmp.get()) GPU::SimpleVector(MAX_FED_WORDS, data_d.get()); // should make_host_unique() call the constructor as well? note that even if std::make_unique does that, we can't do that in make_device_unique + GPU::make_SimpleVector(error_h_tmp.get(), MAX_FED_WORDS, data_d.get()); assert(error_h_tmp->size() == 0); assert(error_h_tmp->capacity() == static_cast(MAX_FED_WORDS)); @@ -579,7 +579,7 @@ namespace pixelgpudetails { if (includeErrors) { digis_clusters_h.data = cs->make_host_unique(MAX_FED_WORDS, stream); digis_clusters_h.error = cs->make_host_unique>(stream); - new (digis_clusters_h.error.get()) GPU::SimpleVector(MAX_FED_WORDS, digis_clusters_h.data.get()); + GPU::make_SimpleVector(digis_clusters_h.error.get(), MAX_FED_WORDS, digis_clusters_h.data.get()); assert(digis_clusters_h.error->size() == 0); assert(digis_clusters_h.error->capacity() == static_cast(MAX_FED_WORDS)); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h b/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h index 0cab299582410..3b81e4a16f017 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h @@ -15,14 +15,6 @@ namespace siPixelRawToClusterHeterogeneousProduct { uint32_t word; unsigned char errorType; unsigned char fedId; - - constexpr - error_obj(uint32_t a, uint32_t b, unsigned char c, unsigned char d): - rawId(a), - word(b), - errorType(c), - fedId(d) - { } }; // FIXME split in two diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cc index 0b23943bcacdb..ea905e013d335 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cc @@ -303,11 +303,11 @@ void CAHitQuadrupletGeneratorGPU::allocateOnGPU() for (int i = 0; i < maxNumberOfRegions_; ++i) { cudaCheck(cudaMallocHost(&h_foundNtupletsData_[i], sizeof(Quadruplet) * maxNumberOfQuadruplets_)); cudaCheck(cudaMallocHost(&h_foundNtupletsVec_[i], sizeof(GPU::SimpleVector))); - new(h_foundNtupletsVec_[i]) GPU::SimpleVector(maxNumberOfQuadruplets_, h_foundNtupletsData_[i]); + GPU::make_SimpleVector(h_foundNtupletsVec_[i], maxNumberOfQuadruplets_, h_foundNtupletsData_[i]); cudaCheck(cudaMalloc(&d_foundNtupletsData_[i], sizeof(Quadruplet) * maxNumberOfQuadruplets_)); cudaCheck(cudaMemset(d_foundNtupletsData_[i], 0x00, sizeof(Quadruplet) * maxNumberOfQuadruplets_)); cudaCheck(cudaMalloc(&d_foundNtupletsVec_[i], sizeof(GPU::SimpleVector))); - GPU::SimpleVector tmp_foundNtuplets(maxNumberOfQuadruplets_, d_foundNtupletsData_[i]); + auto tmp_foundNtuplets = GPU::make_SimpleVector(maxNumberOfQuadruplets_, d_foundNtupletsData_[i]); cudaCheck(cudaMemcpy(d_foundNtupletsVec_[i], & tmp_foundNtuplets, sizeof(GPU::SimpleVector), cudaMemcpyDefault)); }