Skip to content

Commit

Permalink
[WIP] triangle geometry is now host/device, scene next
Browse files Browse the repository at this point in the history
  • Loading branch information
freibold committed Sep 26, 2024
1 parent 0e9856f commit f1672f5
Show file tree
Hide file tree
Showing 13 changed files with 478 additions and 28 deletions.
46 changes: 45 additions & 1 deletion common/sys/alloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,38 @@ namespace embree

return ptr;
}

void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type)
{
assert(context);
assert(device);

if (size == 0)
return nullptr;

assert((align & (align-1)) == 0);
total_allocations++;

void* ptr = nullptr;
if (type == EmbreeMemoryType::SHARED) {
if (mode == EMBREE_USM_SHARED_DEVICE_READ_ONLY)
ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
else
ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
}
else if (type == EmbreeMemoryType::HOST) {
ptr = sycl::aligned_alloc_host(align,size,*context);
} else if (type == EmbreeMemoryType::DEVICE) {
ptr = sycl::aligned_alloc_device(align,size,*device,*context);
} else {
ptr = alignedMalloc(size,align);
}

if (size != 0 && ptr == nullptr)
throw std::bad_alloc();

return ptr;
}

static MutexSys g_alloc_mutex;

Expand All @@ -108,11 +140,23 @@ namespace embree
return nullptr;
}

void* alignedSYCLMalloc(size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type)
{
if (tls_context_tutorial) return alignedSYCLMalloc(tls_context_tutorial, tls_device_tutorial, size, align, mode, type);
if (tls_context_embree ) return alignedSYCLMalloc(tls_context_embree, tls_device_embree, size, align, mode, type);
return nullptr;
}

void alignedSYCLFree(sycl::context* context, void* ptr)
{
assert(context);
if (ptr) {
sycl::free(ptr,*context);
sycl::usm::alloc type = sycl::get_pointer_type(ptr, *context);
if (type == sycl::usm::alloc::host || type == sycl::usm::alloc::device || type == sycl::usm::alloc::shared)
sycl::free(ptr,*context);
else {
alignedFree(ptr);
}
}
}

Expand Down
8 changes: 8 additions & 0 deletions common/sys/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ namespace embree
EMBREE_USM_SHARED_DEVICE_READ_WRITE = 0,
EMBREE_USM_SHARED_DEVICE_READ_ONLY = 1
};

enum EmbreeMemoryType {
HOST = 0,
DEVICE = 1,
SHARED = 2,
UNKNOWN = 3
};

/*! aligned allocation */
void* alignedMalloc(size_t size, size_t align);
Expand All @@ -61,6 +68,7 @@ namespace embree

/*! aligned allocation using SYCL USM */
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode);
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type);
void alignedSYCLFree(sycl::context* context, void* ptr);

// deleter functor to use as deleter in std unique or shared pointers that
Expand Down
4 changes: 4 additions & 0 deletions include/embree4/rtcore_geometry.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,13 @@ RTC_API void rtcSetGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type,
/* Sets a shared geometry buffer. */
RTC_API void rtcSetSharedGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot, enum RTCFormat format, const void* ptr, size_t byteOffset, size_t byteStride, size_t itemCount);

RTC_API void rtcSetSharedGeometryBufferXPU(RTCGeometry geometry, enum RTCBufferType bufferType, unsigned int slot, enum RTCFormat format, const void* ptr, const void* dptr, size_t byteOffset, size_t byteStride, size_t itemCount);

/* Creates and sets a new geometry buffer. */
RTC_API void* rtcSetNewGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot, enum RTCFormat format, size_t byteStride, size_t itemCount);

RTC_API void* rtcSetNewGeometryBufferXPU(RTCGeometry geometry, enum RTCBufferType bufferType, unsigned int slot, enum RTCFormat format, size_t byteStride, size_t itemCount, void** dptr);

/* Returns the pointer to the data of a buffer. */
RTC_API void* rtcGetGeometryBufferData(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot);

Expand Down
63 changes: 56 additions & 7 deletions kernels/common/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,23 @@ namespace embree
alloc();
}
}

Buffer(Device* device, EmbreeMemoryType type, size_t numBytes_in, void* ptr_in = nullptr)
: device(device), numBytes(numBytes_in)
{
device->refInc();

if (ptr_in)
{
shared = true;
ptr = (char*)ptr_in;
}
else
{
shared = false;
alloc(type);
}
}

/*! Buffer destruction */
~Buffer() {
Expand Down Expand Up @@ -75,11 +92,11 @@ namespace embree
}

/*! allocated buffer */
void alloc()
void alloc(EmbreeMemoryType type = EmbreeMemoryType::SHARED)
{
device->memoryMonitor(this->bytes(), false);
size_t b = (this->bytes()+15) & ssize_t(-16);
ptr = (char*)device->malloc(b,16);
ptr = (char*)device->malloc(b,16,type);
}

/*! frees the buffer */
Expand Down Expand Up @@ -148,16 +165,40 @@ namespace embree
buffer = buffer_in;
}

void set(const Ref<Buffer>& buffer_in, const Ref<Buffer>& dbuffer_in, size_t offset_in, size_t stride_in, size_t num_in, RTCFormat format_in)
{
if ((offset_in + stride_in * num_in) > (stride_in * buffer_in->numBytes))
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "buffer range out of bounds");

ptr_ofs = buffer_in->ptr + offset_in;
dptr_ofs = dbuffer_in->ptr + offset_in;
stride = stride_in;
num = num_in;
format = format_in;
modCounter++;
modified = true;
buffer = buffer_in;
dbuffer = dbuffer_in;
}

/*! returns pointer to the first element */
__forceinline char* getPtr() const {
return ptr_ofs;
#if defined(__SYCL_DEVICE_ONLY__)
return dptr_ofs;
#else
return ptr_ofs;
#endif
}

/*! returns pointer to the i'th element */
__forceinline char* getPtr(size_t i) const
{
assert(i<num);
return ptr_ofs + i*stride;
#if defined(__SYCL_DEVICE_ONLY__)
assert(i<num);
return dptr_ofs + i*stride;
#else
return ptr_ofs + i*stride;
#endif
}

/*! returns the number of elements of the buffer */
Expand Down Expand Up @@ -217,13 +258,15 @@ namespace embree

public:
char* ptr_ofs; //!< base pointer plus offset
char* dptr_ofs; //!< base pointer plus offset in device memory
size_t stride; //!< stride of the buffer in bytes
size_t num; //!< number of elements in the buffer
RTCFormat format; //!< format of the buffer
unsigned int modCounter; //!< version ID of this buffer
bool modified; //!< local modified data
int userData; //!< special data
Ref<Buffer> buffer; //!< reference to the parent buffer
Ref<Buffer> dbuffer; //!< reference to the parent device buffer
};

/*! A typed contiguous range of a buffer. This class does not own the buffer content. */
Expand All @@ -233,9 +276,15 @@ namespace embree
public:
typedef T value_type;

#if defined(__SYCL_DEVICE_ONLY__)
/*! access to the ith element of the buffer */
__forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(dptr_ofs + i*stride); }
__forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(dptr_ofs + i*stride); }
#else
/*! access to the ith element of the buffer */
__forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
__forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
#endif
};

template<>
Expand All @@ -250,14 +299,14 @@ namespace embree
__forceinline const Vec3fa operator [](size_t i) const
{
assert(i<num);
return Vec3fa::loadu(ptr_ofs + i*stride);
return Vec3fa::loadu(dptr_ofs + i*stride);
}

/*! writes the i'th element */
__forceinline void store(size_t i, const Vec3fa& v)
{
assert(i<num);
Vec3fa::storeu(ptr_ofs + i*stride, v);
Vec3fa::storeu(dptr_ofs + i*stride, v);
}

#else
Expand Down
8 changes: 8 additions & 0 deletions kernels/common/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,10 @@ namespace embree
return alignedMalloc(size,align);
}

void* Device::malloc(size_t size, size_t align, EmbreeMemoryType type) {
return alignedMalloc(size,align);
}

void Device::free(void* ptr) {
alignedFree(ptr);
}
Expand Down Expand Up @@ -727,6 +731,10 @@ namespace embree
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY);
}

void* DeviceGPU::malloc(size_t size, size_t align, EmbreeMemoryType type) {
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY,type);
}

void DeviceGPU::free(void* ptr) {
alignedSYCLFree(&gpu_context,ptr);
}
Expand Down
8 changes: 6 additions & 2 deletions kernels/common/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace embree

__forceinline pointer allocate( size_type n ) {
assert(device);
return (pointer) device->malloc(n*sizeof(T),alignment);
return (pointer) device->malloc(n*sizeof(T),alignment,EmbreeMemoryType::UNKNOWN);
}

__forceinline void deallocate( pointer p, size_type n ) {
Expand Down Expand Up @@ -117,9 +117,12 @@ namespace embree
/*! leave device by setting up some global state */
virtual void leave() {}

/*! buffer allocation */
/*! buffer allocation - using USM shared */
virtual void* malloc(size_t size, size_t align);

/*! buffer allocation */
virtual void* malloc(size_t size, size_t align, EmbreeMemoryType type);

/*! buffer deallocation */
virtual void free(void* ptr);

Expand Down Expand Up @@ -171,6 +174,7 @@ namespace embree
virtual void enter() override;
virtual void leave() override;
virtual void* malloc(size_t size, size_t align) override;
virtual void* malloc(size_t size, size_t align, EmbreeMemoryType type) override;
virtual void free(void* ptr) override;

/* set SYCL device */
Expand Down
9 changes: 8 additions & 1 deletion kernels/common/geometry.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ namespace embree
/*! Base class all geometries are derived from */
class Geometry : public RefCount
{
ALIGNED_CLASS_USM_(16);
ALIGNED_CLASS_(16);

friend class Scene;
public:
Expand Down Expand Up @@ -468,6 +468,11 @@ namespace embree
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}

/*! Sets specified buffer. */
virtual void setBuffer(RTCBufferType bufferType, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, const Ref<Buffer>& dbuffer, size_t offset, size_t stride, unsigned int num) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}

/*! Gets specified buffer. */
virtual void* getBuffer(RTCBufferType type, unsigned int slot) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
Expand Down Expand Up @@ -637,6 +642,8 @@ namespace embree
public:
Device* device; //!< device this geometry belongs to

Geometry* twin; //!< representation of this geometry on the device

void* userPtr; //!< user pointer
unsigned int numPrimitives; //!< number of primitives of this geometry

Expand Down
55 changes: 54 additions & 1 deletion kernels/common/rtcore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ RTC_NAMESPACE_BEGIN;
scene->commit(false);

#if defined(EMBREE_SYCL_SUPPORT)
prefetchUSMSharedOnGPU(hscene);
//prefetchUSMSharedOnGPU(hscene);
#endif

RTC_CATCH_END2(scene);
Expand Down Expand Up @@ -1678,6 +1678,28 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
RTC_CATCH_END2(geometry);
}

RTC_API void rtcSetSharedGeometryBufferXPU(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, const void* ptr, const void* dptr, size_t byteOffset, size_t byteStride, size_t itemCount)
{
Geometry* geometry = (Geometry*) hgeometry;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcSetSharedGeometryBuffer);
RTC_VERIFY_HANDLE(hgeometry);
RTC_ENTER_DEVICE(hgeometry);

if (itemCount > 0xFFFFFFFFu)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");

Ref<Buffer> buffer = new Buffer(geometry->device, itemCount*byteStride, (char*)ptr + byteOffset);

if (dptr) {
Ref<Buffer> dbuffer = new Buffer(geometry->device, itemCount*byteStride, (char*)dptr + byteOffset);
geometry->setBuffer(type, slot, format, buffer, dbuffer, 0, byteStride, (unsigned int)itemCount);
} else {
geometry->setBuffer(type, slot, format, buffer, buffer, 0, byteStride, (unsigned int)itemCount);
}
RTC_CATCH_END2(geometry);
}

RTC_API void* rtcSetNewGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, size_t byteStride, size_t itemCount)
{
Geometry* geometry = (Geometry*) hgeometry;
Expand All @@ -1701,6 +1723,37 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
return nullptr;
}

RTC_API void* rtcSetNewGeometryBufferXPU(RTCGeometry hgeometry, RTCBufferType bufferType, unsigned int slot, RTCFormat format, size_t byteStride, size_t itemCount, void** dptr)
{
Geometry* geometry = (Geometry*) hgeometry;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcSetNewGeometryBuffer);
RTC_VERIFY_HANDLE(hgeometry);
RTC_ENTER_DEVICE(hgeometry);

if (itemCount > 0xFFFFFFFFu)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");

/* vertex buffers need to get overallocated slightly as elements are accessed using SSE loads */
size_t bytes = itemCount*byteStride;
if (bufferType == RTC_BUFFER_TYPE_VERTEX || bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
bytes += (16 - (byteStride%16))%16;

Ref<Buffer> buffer = new Buffer(geometry->device, EmbreeMemoryType::HOST, bytes);

if (dptr) {
Ref<Buffer> dbuffer = new Buffer(geometry->device, EmbreeMemoryType::DEVICE, bytes);
geometry->setBuffer(bufferType, slot, format, buffer, dbuffer, 0, byteStride, (unsigned int)itemCount);
*dptr = dbuffer->data();
} else {
geometry->setBuffer(bufferType, slot, format, buffer, buffer, 0, byteStride, (unsigned int)itemCount);
}
return buffer->data();

RTC_CATCH_END2(geometry);
return nullptr;
}

RTC_API void* rtcGetGeometryBufferData(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
{
Geometry* geometry = (Geometry*) hgeometry;
Expand Down
Loading

0 comments on commit f1672f5

Please sign in to comment.