Skip to content

Commit

Permalink
Require allocated type to have only a trivial constructor for make_de…
Browse files Browse the repository at this point in the history
…vice_unique and make_host_unique (#204)
  • Loading branch information
makortel authored and fwyzard committed Dec 7, 2018
1 parent a67b49d commit 13a99e3
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 35 deletions.
10 changes: 5 additions & 5 deletions CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ class SiPixelClustersCUDA {
friend SiPixelClustersCUDA;

private:
uint32_t const *moduleStart_ = nullptr;
int32_t const *clus_ = nullptr;
uint32_t const *clusInModule_ = nullptr;
uint32_t const *moduleId_ = nullptr;
uint32_t const *clusModuleStart_ = nullptr;
uint32_t const *moduleStart_;
int32_t const *clus_;
uint32_t const *clusInModule_;
uint32_t const *moduleId_;
uint32_t const *clusModuleStart_;
};

DeviceConstView *view() const { return view_d.get(); }
Expand Down
8 changes: 4 additions & 4 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ class SiPixelDigisCUDA {
friend class SiPixelDigisCUDA;

private:
uint16_t const *xx_ = nullptr;
uint16_t const *yy_ = nullptr;
uint16_t const *adc_ = nullptr;
uint16_t const *moduleInd_ = nullptr;
uint16_t const *xx_;
uint16_t const *yy_;
uint16_t const *adc_;
uint16_t const *moduleInd_;
};

const DeviceConstView *view() const { return view_d.get(); }
Expand Down
6 changes: 5 additions & 1 deletion HeterogeneousCore/CUDAServices/interface/CUDAService.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class CUDAService {
template <typename T>
typename cudaserviceimpl::make_device_unique_selector<T>::non_array
make_device_unique(cuda::stream_t<>& stream) {
static_assert(std::is_trivially_constructible<T>::value, "Allocating with non-trivial constructor on the device memory is not supported");
int dev = getCurrentDevice();
void *mem = allocate_device(dev, sizeof(T), stream);
return typename cudaserviceimpl::make_device_unique_selector<T>::non_array(reinterpret_cast<T *>(mem),
Expand All @@ -83,8 +84,9 @@ class CUDAService {
template <typename T>
typename cudaserviceimpl::make_device_unique_selector<T>::unbounded_array
make_device_unique(size_t n, cuda::stream_t<>& stream) {
int dev = getCurrentDevice();
using element_type = typename std::remove_extent<T>::type;
static_assert(std::is_trivially_constructible<element_type>::value, "Allocating with non-trivial constructor on the device memory is not supported");
int dev = getCurrentDevice();
void *mem = allocate_device(dev, n*sizeof(element_type), stream);
return typename cudaserviceimpl::make_device_unique_selector<T>::unbounded_array(reinterpret_cast<element_type *>(mem),
[this, dev](void *ptr) {
Expand All @@ -100,6 +102,7 @@ class CUDAService {
template <typename T>
typename cudaserviceimpl::make_host_unique_selector<T>::non_array
make_host_unique(cuda::stream_t<>& stream) {
static_assert(std::is_trivially_constructible<T>::value, "Allocating with non-trivial constructor on the pinned host memory is not supported");
void *mem = allocate_host(sizeof(T), stream);
return typename cudaserviceimpl::make_host_unique_selector<T>::non_array(reinterpret_cast<T *>(mem),
[this](void *ptr) {
Expand All @@ -111,6 +114,7 @@ class CUDAService {
typename cudaserviceimpl::make_host_unique_selector<T>::unbounded_array
make_host_unique(size_t n, cuda::stream_t<>& stream) {
using element_type = typename std::remove_extent<T>::type;
static_assert(std::is_trivially_constructible<element_type>::value, "Allocating with non-trivial constructor on the pinned host memory is not supported");
void *mem = allocate_host(n*sizeof(element_type), stream);
return typename cudaserviceimpl::make_host_unique_selector<T>::unbounded_array(reinterpret_cast<element_type *>(mem),
[this](void *ptr) {
Expand Down
31 changes: 23 additions & 8 deletions HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,15 @@
#include <cuda.h>

namespace GPU {

template <class T> struct SimpleVector {
// Constructors
constexpr SimpleVector(int capacity, T *data) // ownership of m_data stays within the caller
: m_size(0), m_capacity(capacity), m_data(data)
{
static_assert(std::is_trivially_destructible<T>::value);
}
constexpr SimpleVector() = default;

constexpr SimpleVector() : SimpleVector(0) {}
// ownership of m_data stays within the caller
constexpr void construct(int capacity, T *data) {
m_size = 0;
m_capacity = capacity;
m_data = data;
}

inline constexpr int push_back_unsafe(const T &element) {
auto previousSize = m_size;
Expand Down Expand Up @@ -98,6 +97,22 @@ template <class T> struct SimpleVector {
T *m_data;
};

// ownership of m_data stays within the caller
template <class T>
SimpleVector<T> make_SimpleVector(int capacity, T *data) {
SimpleVector<T> ret;
ret.construct(capacity, data);
return ret;
}

// ownership of m_data stays within the caller
template <class T>
SimpleVector<T> *make_SimpleVector(SimpleVector<T> *mem, int capacity, T *data) {
auto ret = new(mem) SimpleVector<T>();
ret->construct(capacity, data);
return ret;
}

} // namespace GPU

#endif // HeterogeneousCore_CUDAUtilities_interface_GPUSimpleVector_h
4 changes: 2 additions & 2 deletions HeterogeneousCore/CUDAUtilities/test/test_GPUSimpleVector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ int main() {
cudaCheck(cudaMallocHost(&data_ptr, maxN * sizeof(int)));
cudaCheck(cudaMalloc(&d_data_ptr, maxN * sizeof(int)));

auto v = new (obj_ptr) GPU::SimpleVector<int>(maxN, data_ptr);
auto v = GPU::make_SimpleVector(obj_ptr, maxN, data_ptr);

cudaCheck(cudaMallocHost(&tmp_obj_ptr, sizeof(GPU::SimpleVector<int>)));
new (tmp_obj_ptr) GPU::SimpleVector<int>(maxN, d_data_ptr);
GPU::make_SimpleVector(tmp_obj_ptr, maxN, d_data_ptr);
assert(tmp_obj_ptr->size() == 0);
assert(tmp_obj_ptr->capacity() == static_cast<int>(maxN));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ namespace pixelgpudetails {
if (includeErrors and skipROC)
{
uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug);
err->emplace_back(rID, ww, errorType, fedId);
err->push_back(pixelgpudetails::error_obj{rID, ww, errorType, fedId});
continue;
}

Expand Down Expand Up @@ -476,7 +476,7 @@ namespace pixelgpudetails {
if (includeErrors) {
if (not rocRowColIsValid(row, col)) {
uint32_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays
err->emplace_back(rawId, ww, error, fedId);
err->push_back(pixelgpudetails::error_obj{rawId, ww, error, fedId});
if(debug) printf("BPIX1 Error status: %i\n", error);
continue;
}
Expand All @@ -491,7 +491,7 @@ namespace pixelgpudetails {
localPix.col = col;
if (includeErrors and not dcolIsValid(dcol, pxid)) {
uint32_t error = conversionError(fedId, 3, debug);
err->emplace_back(rawId, ww, error, fedId);
err->push_back(pixelgpudetails::error_obj{rawId, ww, error, fedId});
if(debug) printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
continue;
}
Expand Down Expand Up @@ -541,7 +541,7 @@ namespace pixelgpudetails {
auto data_d = cs->make_device_unique<pixelgpudetails::error_obj[]>(MAX_FED_WORDS, stream);
cudaCheck(cudaMemsetAsync(data_d.get(), 0x00, MAX_ERROR_SIZE, stream.id()));
auto error_h_tmp = cs->make_host_unique<GPU::SimpleVector<pixelgpudetails::error_obj>>(stream);
new (error_h_tmp.get()) GPU::SimpleVector<pixelgpudetails::error_obj>(MAX_FED_WORDS, data_d.get()); // should make_host_unique() call the constructor as well? note that even if std::make_unique does that, we can't do that in make_device_unique
GPU::make_SimpleVector(error_h_tmp.get(), MAX_FED_WORDS, data_d.get());
assert(error_h_tmp->size() == 0);
assert(error_h_tmp->capacity() == static_cast<int>(MAX_FED_WORDS));

Expand Down Expand Up @@ -579,7 +579,7 @@ namespace pixelgpudetails {
if (includeErrors) {
digis_clusters_h.data = cs->make_host_unique<pixelgpudetails::error_obj[]>(MAX_FED_WORDS, stream);
digis_clusters_h.error = cs->make_host_unique<GPU::SimpleVector<pixelgpudetails::error_obj>>(stream);
new (digis_clusters_h.error.get()) GPU::SimpleVector<pixelgpudetails::error_obj>(MAX_FED_WORDS, digis_clusters_h.data.get());
GPU::make_SimpleVector(digis_clusters_h.error.get(), MAX_FED_WORDS, digis_clusters_h.data.get());
assert(digis_clusters_h.error->size() == 0);
assert(digis_clusters_h.error->capacity() == static_cast<int>(MAX_FED_WORDS));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,6 @@ namespace siPixelRawToClusterHeterogeneousProduct {
uint32_t word;
unsigned char errorType;
unsigned char fedId;

constexpr
error_obj(uint32_t a, uint32_t b, unsigned char c, unsigned char d):
rawId(a),
word(b),
errorType(c),
fedId(d)
{ }
};

// FIXME split in two
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,11 @@ void CAHitQuadrupletGeneratorGPU::allocateOnGPU()
for (int i = 0; i < maxNumberOfRegions_; ++i) {
cudaCheck(cudaMallocHost(&h_foundNtupletsData_[i], sizeof(Quadruplet) * maxNumberOfQuadruplets_));
cudaCheck(cudaMallocHost(&h_foundNtupletsVec_[i], sizeof(GPU::SimpleVector<Quadruplet>)));
new(h_foundNtupletsVec_[i]) GPU::SimpleVector<Quadruplet>(maxNumberOfQuadruplets_, h_foundNtupletsData_[i]);
GPU::make_SimpleVector(h_foundNtupletsVec_[i], maxNumberOfQuadruplets_, h_foundNtupletsData_[i]);
cudaCheck(cudaMalloc(&d_foundNtupletsData_[i], sizeof(Quadruplet) * maxNumberOfQuadruplets_));
cudaCheck(cudaMemset(d_foundNtupletsData_[i], 0x00, sizeof(Quadruplet) * maxNumberOfQuadruplets_));
cudaCheck(cudaMalloc(&d_foundNtupletsVec_[i], sizeof(GPU::SimpleVector<Quadruplet>)));
GPU::SimpleVector<Quadruplet> tmp_foundNtuplets(maxNumberOfQuadruplets_, d_foundNtupletsData_[i]);
auto tmp_foundNtuplets = GPU::make_SimpleVector<Quadruplet>(maxNumberOfQuadruplets_, d_foundNtupletsData_[i]);
cudaCheck(cudaMemcpy(d_foundNtupletsVec_[i], & tmp_foundNtuplets, sizeof(GPU::SimpleVector<Quadruplet>), cudaMemcpyDefault));
}

Expand Down

0 comments on commit 13a99e3

Please sign in to comment.