From e4e3cbf9203be32bfaee9b5db9c16f0acab98672 Mon Sep 17 00:00:00 2001 From: Jonathan Hoffstadt Date: Thu, 12 Dec 2024 20:42:27 -0600 Subject: [PATCH] WIP --- .github/workflows/build.yml | 6 +- extensions/pl_draw_backend_ext.c | 3 +- extensions/pl_ecs_ext.c | 2 +- extensions/pl_graphics_ext.h | 37 ++++- extensions/pl_graphics_metal.m | 234 +++++++++++++++++++----------- extensions/pl_graphics_vulkan.c | 2 +- extensions/pl_renderer_ext.c | 66 +++++++-- extensions/pl_renderer_internal.c | 61 +++++--- extensions/pl_shader_ext.c | 1 + sandbox/app.c | 2 +- scripts/package.py | 15 +- shaders/panorama_to_cubemap.comp | 7 +- 12 files changed, 293 insertions(+), 143 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 70c757a7..eef1be9c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -65,7 +65,7 @@ jobs: if not exist ../out/example_9.dll exit 1 if not exist ../out/pilot_light.exe exit 1 if not exist ../out/pl_unity_ext.dll exit 1 - if not exist ../out/pl_script_camerad.dll exit 1 + if not exist ../out/pl_script_camera.dll exit 1 cd .. - name: Prep 2 @@ -188,7 +188,7 @@ jobs: test -f ./out/example_8.dylib || exit 1 test -f ./out/example_9.dylib || exit 1 test -f ./out/pl_unity_ext.dylib || exit 1 - test -f ./out/pl_script_camerad.dylib || exit 1 + test -f ./out/pl_script_camera.dylib || exit 1 - name: Prep 2 run: | @@ -325,7 +325,7 @@ jobs: test -f ./out/example_8.so || exit 1 test -f ./out/example_9.so || exit 1 test -f ./out/pl_unity_ext.so || exit 1 - test -f ./out/pl_script_camerad.so || exit 1 + test -f ./out/pl_script_camera.so || exit 1 - name: Prep 2 run: | diff --git a/extensions/pl_draw_backend_ext.c b/extensions/pl_draw_backend_ext.c index 300ebd2f..4fedcdd5 100644 --- a/extensions/pl_draw_backend_ext.c +++ b/extensions/pl_draw_backend_ext.c @@ -195,8 +195,7 @@ pl_cleanup_draw_backend(void) gptGfx->destroy_buffer(ptDevice, gptDrawBackendCtx->atBufferInfo[i].tVertexBuffer); gptGfx->destroy_buffer(ptDevice, gptDrawBackendCtx->at3DBufferInfo[i].tVertexBuffer); gptGfx->destroy_buffer(ptDevice, gptDrawBackendCtx->atLineBufferInfo[i].tVertexBuffer); - gptGfx->destroy_buffer(ptDevice, gptDrawBackendCtx->atIndexBuffer[i]); - gptGfx->destroy_buffer(ptDevice, gptDrawBackendCtx->atIndexBuffer[i]); + gptGfx->destroy_buffer(ptDevice, gptDrawBackendCtx->atIndexBuffer[i]); } gptGfx->cleanup_bind_group_pool(gptDrawBackendCtx->ptBindGroupPool); diff --git a/extensions/pl_ecs_ext.c b/extensions/pl_ecs_ext.c index d42e5320..982b71f6 100644 --- a/extensions/pl_ecs_ext.c +++ b/extensions/pl_ecs_ext.c @@ -332,7 +332,7 @@ pl_ecs_remove_entity(plComponentLibrary* ptLibrary, plEntity tEntity) { plEntity tLastEntity = pl_sb_back(ptLibrary->_ptManagers[i]->sbtEntities); pl_hm_remove(ptLibrary->_ptManagers[i]->ptHashmap, tLastEntity.uIndex); - pl_hm_get_free_index(ptLibrary->_ptManagers[i]->ptHashmap); // burn slot + uint32_t _unUsed = pl_hm_get_free_index(ptLibrary->_ptManagers[i]->ptHashmap); // burn slot pl_hm_insert(ptLibrary->_ptManagers[i]->ptHashmap, tLastEntity.uIndex, uEntityValue); } diff --git a/extensions/pl_graphics_ext.h b/extensions/pl_graphics_ext.h index 2dc604d0..1736becd 100644 --- a/extensions/pl_graphics_ext.h +++ b/extensions/pl_graphics_ext.h @@ -95,6 +95,11 @@ typedef struct _plComputeEncoder plComputeEncoder; // opaque type for command b typedef struct _plBlitEncoder plBlitEncoder; // opaque type for command buffer encoder for blit ops typedef struct _plBufferImageCopy plBufferImageCopy;// used for copying between buffers & textures with blit encoder +// new +typedef struct _plPassTextureResource plPassTextureResource; +typedef struct _plPassBufferResource plPassBufferResource; +typedef struct _plPassResources plPassResources; + // bind groups typedef struct _plBindGroup plBindGroup; // bind group resource typedef struct _plBindGroupLayout plBindGroupLayout; // bind group layout decription @@ -192,6 +197,7 @@ typedef int plVendorId; // -> enum _plVendorId // Enu typedef int plDeviceType; // -> enum _plDeviceType // Enum: device type (PL_DEVICE_TYPE_XXXX) typedef int plDeviceCapability; // -> enum _plDeviceCapability // Flags: device capabilities (PL_DEVICE_CAPABILITY_XXXX) typedef int plCommandPoolResetFlags; // -> enum _plCommandPoolResetFlags // Flags: device capabilities (PL_DEVICE_CAPABILITY_XXXX) +typedef int plPassResourceUsageFlags; // external typedef struct _plWindow plWindow; // pl_os.h @@ -281,7 +287,7 @@ typedef struct _plGraphicsI void (*bind_shader) (plRenderEncoder*, plShaderHandle); // compute encoder - plComputeEncoder* (*begin_compute_pass) (plCommandBuffer*); // do not store + plComputeEncoder* (*begin_compute_pass) (plCommandBuffer*, const plPassResources*); // do not store void (*end_compute_pass) (plComputeEncoder*); void (*dispatch) (plComputeEncoder*, uint32_t dispatchCount, const plDispatch*); void (*bind_compute_shader) (plComputeEncoder*, plComputeShaderHandle); @@ -556,6 +562,28 @@ typedef struct _plBindGroupUpdateData const plBindGroupUpdateSamplerData* atSamplerBindings; } plBindGroupUpdateData; +typedef struct _plPassTextureResource +{ + plTextureHandle tHandle; + plStageFlags tStages; + plPassResourceUsageFlags tUsage; +} plPassTextureResource; + +typedef struct _plPassBufferResource +{ + plBufferHandle tHandle; + plStageFlags tStages; + plPassResourceUsageFlags tUsage; +} plPassBufferResource; + +typedef struct _plPassResources +{ + uint32_t uBufferCount; + uint32_t uTextureCount; + const plPassBufferResource* atBuffers; + const plPassTextureResource* atTextures; +} plPassResources; + typedef struct _plBindGroupLayout { @@ -961,6 +989,13 @@ typedef struct _plDeviceInit // [SECTION] enums //----------------------------------------------------------------------------- +enum _plPassResourceUsageFlags +{ + PL_PASS_RESOURCE_USAGE_NONE = 0, + PL_PASS_RESOURCE_USAGE_READ = 1 << 0, + PL_PASS_RESOURCE_USAGE_WRITE = 1 << 1, +}; + enum _plBindGroupPoolFlags { PL_BIND_GROUP_POOL_FLAGS_NONE = 0, diff --git a/extensions/pl_graphics_metal.m b/extensions/pl_graphics_metal.m index d35229e3..0fcdb06c 100644 --- a/extensions/pl_graphics_metal.m +++ b/extensions/pl_graphics_metal.m @@ -14,6 +14,8 @@ // [SECTION] unity build */ + + //----------------------------------------------------------------------------- // [SECTION] includes //----------------------------------------------------------------------------- @@ -46,6 +48,7 @@ uint32_t uCurrentSubpass; id tEncoder; plRenderEncoder* ptNext; + uint64_t uHeapUsageMask; } plRenderEncoder; typedef struct _plComputeEncoder @@ -53,6 +56,7 @@ plCommandBuffer* ptCommandBuffer; id tEncoder; plComputeEncoder* ptNext; + uint64_t uHeapUsageMask; } plComputeEncoder; typedef struct _plBlitEncoder @@ -89,7 +93,7 @@ typedef struct _plMetalBuffer { id tBuffer; - id tHeap; + uint64_t uHeap; } plMetalBuffer; typedef struct _plCommandPool @@ -119,7 +123,7 @@ typedef struct _plMetalTexture { id tTexture; - id tHeap; + uint64_t uHeap; MTLTextureDescriptor* ptTextureDescriptor; bool bOriginalView; } plMetalTexture; @@ -141,10 +145,8 @@ { id tShaderArgumentBuffer; plBindGroupLayout tLayout; - plBufferHandle atBufferBindings[PL_MAX_BUFFERS_PER_BIND_GROUP]; plSamplerHandle atSamplerBindings[PL_MAX_TEXTURES_PER_BIND_GROUP]; - uint32_t uHeapCount; - id atRequiredHeaps[PL_MAX_TEXTURES_PER_BIND_GROUP * PL_MAX_BUFFERS_PER_BIND_GROUP]; + uint64_t uHeapUsageMask; uint32_t uOffset; plTextureHandle* sbtTextures; } plMetalBindGroup; @@ -248,6 +250,9 @@ // metal specifics id tDevice; + + id atHeaps[64]; + uint64_t* sbuFreeHeaps; } plDevice; @@ -688,7 +693,7 @@ tStorageMode = MTLResourceStorageModePrivate; } - ptMetalBuffer->tBuffer = [(id)ptAllocation->uHandle newBufferWithLength:ptAllocation->ulSize options:tStorageMode offset:ptAllocation->ulOffset]; + ptMetalBuffer->tBuffer = [ptDevice->atHeaps[ptAllocation->uHandle] newBufferWithLength:ptAllocation->ulSize options:tStorageMode offset:ptAllocation->ulOffset]; ptMetalBuffer->tBuffer.label = [NSString stringWithUTF8String:ptBuffer->tDesc.pcDebugName]; if(ptAllocation->tMemoryMode != PL_MEMORY_GPU) @@ -696,7 +701,7 @@ memset(ptMetalBuffer->tBuffer.contents, 0, ptAllocation->ulSize); ptBuffer->tMemoryAllocation.pHostMapped = ptMetalBuffer->tBuffer.contents; } - ptMetalBuffer->tHeap = (id)ptAllocation->uHandle; + ptMetalBuffer->uHeap = ptAllocation->uHandle; } static void @@ -871,7 +876,7 @@ MTLSizeAndAlign tSizeAlign = [ptDevice->tDevice heapBufferSizeAndAlignWithLength:argumentBufferLength options:MTLResourceStorageModeShared]; plMetalBindGroup tMetalBindGroup = { - .tLayout = *ptLayout + .tLayout = *ptLayout, }; tMetalBindGroup.tShaderArgumentBuffer = ptDesc->ptPool->tArgumentBuffer.tBuffer; @@ -892,7 +897,6 @@ plMetalBindGroup* ptMetalBindGroup = &ptDevice->sbtBindGroupsHot[tHandle.uIndex]; plBindGroup* ptBindGroup = pl__get_bind_group(ptDevice, tHandle); - // ptMetalBindGroup->uHeapCount = 0; const char* pcDescriptorStart = ptMetalBindGroup->tShaderArgumentBuffer.contents; uint64_t* pulDescriptorStart = (uint64_t*)&pcDescriptorStart[ptMetalBindGroup->uOffset]; @@ -903,23 +907,8 @@ plMetalBuffer* ptMetalBuffer = &ptDevice->sbtBuffersHot[ptUpdate->tBuffer.uIndex]; uint64_t* ppfDestination = &pulDescriptorStart[ptUpdate->uSlot]; *ppfDestination = ptMetalBuffer->tBuffer.gpuAddress; - ptMetalBindGroup->atBufferBindings[i] = ptUpdate->tBuffer; - bool bHeapFound = false; - for(uint32_t j = 0; j < ptMetalBindGroup->uHeapCount; j++) - { - if(ptMetalBindGroup->atRequiredHeaps[j] == ptMetalBuffer->tHeap) - { - bHeapFound = true; - break; - } - } - - if(!bHeapFound) - { - ptMetalBindGroup->atRequiredHeaps[ptMetalBindGroup->uHeapCount] = ptMetalBuffer->tHeap; - ptMetalBindGroup->uHeapCount++; - } + ptMetalBindGroup->uHeapUsageMask |= (1ULL << ptMetalBuffer->uHeap); } for(uint32_t i = 0; i < ptData->uTextureCount; i++) @@ -931,21 +920,7 @@ *pptDestination = ptMetalTexture->tTexture.gpuResourceID; pl_sb_push(ptMetalBindGroup->sbtTextures, ptUpdate->tTexture); - bool bHeapFound = false; - for(uint32_t j = 0; j < ptMetalBindGroup->uHeapCount; j++) - { - if(ptMetalBindGroup->atRequiredHeaps[j] == ptMetalTexture->tHeap) - { - bHeapFound = true; - break; - } - } - - if(!bHeapFound) - { - ptMetalBindGroup->atRequiredHeaps[ptMetalBindGroup->uHeapCount] = ptMetalTexture->tHeap; - ptMetalBindGroup->uHeapCount++; - } + ptMetalBindGroup->uHeapUsageMask |= (1ULL << ptMetalTexture->uHeap); } for(uint32_t i = 0; i < ptData->uSamplerCount; i++) @@ -1017,8 +992,8 @@ } ptMetalTexture->ptTextureDescriptor.storageMode = tStorageMode; - ptMetalTexture->tTexture = [(id)ptAllocation->uHandle newTextureWithDescriptor:ptMetalTexture->ptTextureDescriptor offset:ptAllocation->ulOffset]; - ptMetalTexture->tHeap = (id)ptAllocation->uHandle; + ptMetalTexture->tTexture = [ptDevice->atHeaps[ptAllocation->uHandle] newTextureWithDescriptor:ptMetalTexture->ptTextureDescriptor offset:ptAllocation->ulOffset]; + ptMetalTexture->uHeap = ptAllocation->uHandle; ptMetalTexture->tTexture.label = [NSString stringWithUTF8String:ptTexture->tDesc.pcDebugName]; [ptMetalTexture->ptTextureDescriptor release]; ptMetalTexture->ptTextureDescriptor = nil; @@ -1087,7 +1062,7 @@ slices:tSliceRange]; ptNewMetalTexture->tTexture.label = [NSString stringWithUTF8String:ptViewDesc->pcDebugName]; - ptNewMetalTexture->tHeap = ptOldMetalTexture->tHeap; + ptNewMetalTexture->uHeap = ptOldMetalTexture->uHeap; return tHandle; } @@ -1110,7 +1085,7 @@ pl_sprintf(atNameBuffer, "D-BUF-F%d-%d", (int)gptGraphics->uCurrentFrameIndex, (int)ptFrame->uCurrentBufferIndex); ptDynamicBuffer->tMemory = ptDevice->ptDynamicAllocator->allocate(ptDevice->ptDynamicAllocator->ptInst, 0, ptDevice->tInit.szDynamicBufferBlockSize, 0, atNameBuffer); - ptDynamicBuffer->tBuffer = [(id)ptDynamicBuffer->tMemory.uHandle newBufferWithLength:ptDevice->tInit.szDynamicBufferBlockSize options:MTLResourceStorageModeShared offset:0]; + ptDynamicBuffer->tBuffer = [ptDevice->atHeaps[ptDynamicBuffer->tMemory.uHandle] newBufferWithLength:ptDevice->tInit.szDynamicBufferBlockSize options:MTLResourceStorageModeShared offset:0]; ptDynamicBuffer->tBuffer.label = [NSString stringWithUTF8String:"buddy allocator"]; gptGraphics->szHostMemoryInUse += ptDevice->tInit.szDynamicBufferBlockSize; } @@ -1405,7 +1380,7 @@ plDeviceMemoryAllocation tAllocation = { .pHostMapped = NULL, - .uHandle = 0, + .uHandle = UINT64_MAX, .ulOffset = 0, .ulSize = ulSize, .ptAllocator = ptData->ptAllocator, @@ -1427,7 +1402,7 @@ plDeviceMemoryAllocation tBlock = {.uHandle = ptAllocation->uHandle}; pl_free_memory(ptData->ptDevice, &tBlock); gptGraphics->szHostMemoryInUse -= ptAllocation->ulSize; - ptAllocation->uHandle = 0; + ptAllocation->uHandle = UINT64_MAX; ptAllocation->ulSize = 0; ptAllocation->ulOffset = 0; } @@ -1507,6 +1482,10 @@ memset(ptDevice, 0, sizeof(plDevice)); ptDevice->tInit = *ptInit; + pl_sb_resize(ptDevice->sbuFreeHeaps, 64); + for(uint64_t i = 0; i < 64; i++) + ptDevice->sbuFreeHeaps[i] = 63 - i; + pl_sb_add(ptDevice->sbtRenderPassLayoutsHot); pl_sb_add(ptDevice->sbtRenderPassesHot); pl_sb_add(ptDevice->sbtShadersHot); @@ -1561,13 +1540,6 @@ ptDevice->ptDynamicAllocator = &tAllocator; plDeviceMemoryAllocatorI* ptDynamicAllocator = &tAllocator; - MTLHeapDescriptor* ptHeapDescriptor = [MTLHeapDescriptor new]; - ptHeapDescriptor.storageMode = MTLStorageModeShared; - ptHeapDescriptor.size = ptDevice->szDynamicArgumentBufferHeapSize; - ptHeapDescriptor.type = MTLHeapTypePlacement; - ptHeapDescriptor.hazardTrackingMode = MTLHazardTrackingModeUntracked; - // ptHeapDescriptor.sparsePageSize = MTLSparsePageSize256; - pl_sb_resize(ptDevice->sbtGarbage, gptGraphics->uFramesInFlight + 1); gptGraphics->tFence = [ptDevice->tDevice newFence]; plTempAllocator tTempAllocator = {0}; @@ -1580,7 +1552,7 @@ static char atNameBuffer[PL_MAX_NAME_LENGTH] = {0}; pl_sprintf(atNameBuffer, "D-BUF-F%d-0", (int)i); tFrame.sbtDynamicBuffers[0].tMemory = ptDevice->ptDynamicAllocator->allocate(ptDevice->ptDynamicAllocator->ptInst, 0, ptDevice->tInit.szDynamicBufferBlockSize, 0,atNameBuffer); - tFrame.sbtDynamicBuffers[0].tBuffer = [(id)tFrame.sbtDynamicBuffers[0].tMemory.uHandle newBufferWithLength:ptDevice->tInit.szDynamicBufferBlockSize options:MTLResourceStorageModeShared offset:0]; + tFrame.sbtDynamicBuffers[0].tBuffer = [ptDevice->atHeaps[tFrame.sbtDynamicBuffers[0].tMemory.uHandle] newBufferWithLength:ptDevice->tInit.szDynamicBufferBlockSize options:MTLResourceStorageModeShared offset:0]; tFrame.sbtDynamicBuffers[0].tBuffer.label = [NSString stringWithUTF8String:pl_temp_allocator_sprintf(&tTempAllocator, "Dynamic Buffer: %u, 0", i)]; pl_sb_push(ptDevice->sbtFrames, tFrame); } @@ -1852,6 +1824,7 @@ plCommandBuffer* ptCmdBuffer = ptEncoder->ptCommandBuffer; plDevice* ptDevice = ptCmdBuffer->ptDevice; ptEncoder->uCurrentSubpass++; + ptEncoder->uHeapUsageMask = 0; plMetalRenderPass* ptMetalRenderPass = &ptDevice->sbtRenderPassesHot[ptEncoder->tRenderPassHandle.uIndex]; @@ -1869,6 +1842,7 @@ { plDevice* ptDevice = ptCmdBuffer->ptDevice; plRenderEncoder* ptEncoder = pl__get_new_render_encoder(); + ptEncoder->uHeapUsageMask = 0; plRenderPass* ptRenderPass = pl_get_render_pass(ptDevice, tPass); plMetalRenderPass* ptMetalRenderPass = &ptDevice->sbtRenderPassesHot[tPass.uIndex]; @@ -1981,17 +1955,67 @@ } static plComputeEncoder* -pl_begin_compute_pass(plCommandBuffer* ptCmdBuffer) +pl_begin_compute_pass(plCommandBuffer* ptCmdBuffer, const plPassResources* ptResources) { plComputeEncoder* ptEncoder = pl__get_new_compute_encoder(); ptEncoder->tEncoder = [ptCmdBuffer->tCmdBuffer computeCommandEncoder]; ptEncoder->ptCommandBuffer = ptCmdBuffer; + ptEncoder->uHeapUsageMask = 0; + + if(ptResources) + { + for(uint32_t i = 0; i < ptResources->uBufferCount; i++) + { + const plPassBufferResource* ptResource = &ptResources->atBuffers[i]; + const plBuffer* ptBuffer = &ptCmdBuffer->ptDevice->sbtBuffersCold[ptResource->tHandle.uIndex]; + const plMetalBuffer* ptMetalBuffer = &ptCmdBuffer->ptDevice->sbtBuffersHot[ptResource->tHandle.uIndex]; + + MTLResourceUsage tUsage = 0; + if(ptResource->tUsage & PL_PASS_RESOURCE_USAGE_READ) + tUsage |= MTLResourceUsageRead; + if(ptResource->tUsage & PL_PASS_RESOURCE_USAGE_WRITE) + tUsage |= MTLResourceUsageWrite; + + if(!(ptEncoder->uHeapUsageMask & (1ULL << ptBuffer->tMemoryAllocation.uHandle))) + { + [ptEncoder->tEncoder useHeap:ptCmdBuffer->ptDevice->atHeaps[ptBuffer->tMemoryAllocation.uHandle]]; + } + + ptEncoder->uHeapUsageMask |= (1ULL << ptBuffer->tMemoryAllocation.uHandle); + + [ptEncoder->tEncoder useResource:ptMetalBuffer->tBuffer usage:tUsage]; + } + + for(uint32_t i = 0; i < ptResources->uTextureCount; i++) + { + const plPassTextureResource* ptResource = &ptResources->atTextures[i]; + const plTexture* ptTexture = &ptCmdBuffer->ptDevice->sbtTexturesCold[ptResource->tHandle.uIndex]; + const plMetalTexture* ptMetalTexture = &ptCmdBuffer->ptDevice->sbtTexturesHot[ptResource->tHandle.uIndex]; + + MTLResourceUsage tUsage = 0; + if(ptResource->tUsage & PL_PASS_RESOURCE_USAGE_READ) + tUsage |= MTLResourceUsageRead; + if(ptResource->tUsage & PL_PASS_RESOURCE_USAGE_WRITE) + tUsage |= MTLResourceUsageWrite; + + if(!(ptEncoder->uHeapUsageMask & (1ULL << ptTexture->tMemoryAllocation.uHandle))) + { + [ptEncoder->tEncoder useHeap:ptCmdBuffer->ptDevice->atHeaps[ptTexture->tMemoryAllocation.uHandle]]; + } + + ptEncoder->uHeapUsageMask |= (1ULL << ptTexture->tMemoryAllocation.uHandle); + + [ptEncoder->tEncoder useResource:ptMetalTexture->tTexture usage:tUsage]; + } + } + return ptEncoder; } static void pl_end_compute_pass(plComputeEncoder* ptEncoder) { + plDevice* ptDevice = ptEncoder->ptCommandBuffer->ptDevice; [ptEncoder->tEncoder endEncoding]; pl__return_compute_encoder(ptEncoder); } @@ -2022,20 +2046,20 @@ [ptEncoder->tEncoder setBuffer:ptFrame->sbtDynamicBuffers[ptDynamicBinding->uBufferHandle].tBuffer offset:ptDynamicBinding->uByteOffset atIndex:3]; } - // for(uint32_t i = 0; i < gptGraphics->uFramesInFlight; i++) - // { - // [ptEncoder->tEncoder useHeap:ptDevice->sbtFrames[i].tDescriptorHeap]; - // } - for(uint32_t i = 0; i < uCount; i++) { plMetalBindGroup* ptBindGroup = &ptDevice->sbtBindGroupsHot[atBindGroups[i].uIndex]; - - for(uint32 j = 0; j < ptBindGroup->uHeapCount; j++) + + ptEncoder->uHeapUsageMask |= ptBindGroup->uHeapUsageMask; + + for(uint64_t k = 0; k < 64; k++) { - [ptEncoder->tEncoder useHeap:ptBindGroup->atRequiredHeaps[j]]; + if(ptEncoder->uHeapUsageMask & (1ULL << k)) + { + [ptEncoder->tEncoder useHeap:ptDevice->atHeaps[k]]; + } } - + for(uint32_t k = 0; k < pl_sb_size(ptBindGroup->sbtTextures); k++) { const plTextureHandle tTextureHandle = ptBindGroup->sbtTextures[k]; @@ -2066,9 +2090,14 @@ { plMetalBindGroup* ptBindGroup = &ptDevice->sbtBindGroupsHot[atBindGroups[i].uIndex]; - for(uint32 j = 0; j < ptBindGroup->uHeapCount; j++) + ptEncoder->uHeapUsageMask |= ptBindGroup->uHeapUsageMask; + + for(uint64_t k = 0; k < 64; k++) { - [ptEncoder->tEncoder useHeap:ptBindGroup->atRequiredHeaps[j] stages:MTLRenderStageVertex | MTLRenderStageFragment]; + if(ptEncoder->uHeapUsageMask & (1ULL << k)) + { + [ptEncoder->tEncoder useHeap:ptDevice->atHeaps[k] stages:MTLRenderStageVertex | MTLRenderStageFragment]; + } } for(uint32_t k = 0; k < pl_sb_size(ptBindGroup->sbtTextures); k++) @@ -2193,11 +2222,6 @@ plDevice* ptDevice = ptCmdBuffer->ptDevice; plFrameContext* ptFrame = pl__get_frame_resources(ptDevice); - // for(uint32_t i = 0; i < gptGraphics->uFramesInFlight; i++) - // { - // [ptEncoder->tEncoder useHeap:ptDevice->sbtFrames[i].tDescriptorHeap stages:MTLRenderStageVertex | MTLRenderStageFragment]; - // } - for(uint32_t i = 0; i < uAreaCount; i++) { plDrawArea* ptArea = &atAreas[i]; @@ -2269,9 +2293,14 @@ const plBindGroupHandle tBindGroupHandle = {.uData = ptStream->_auStream[uCurrentStreamIndex] }; plMetalBindGroup* ptMetalBindGroup = &ptDevice->sbtBindGroupsHot[tBindGroupHandle.uIndex]; - for(uint32 j = 0; j < ptMetalBindGroup->uHeapCount; j++) + ptEncoder->uHeapUsageMask |= ptMetalBindGroup->uHeapUsageMask; + + for(uint64_t k = 0; k < 64; k++) { - [ptEncoder->tEncoder useHeap:ptMetalBindGroup->atRequiredHeaps[j] stages:MTLRenderStageVertex | MTLRenderStageFragment]; + if(ptEncoder->uHeapUsageMask & (1ULL << k)) + { + [ptEncoder->tEncoder useHeap:ptDevice->atHeaps[k] stages:MTLRenderStageVertex | MTLRenderStageFragment]; + } } for(uint32_t k = 0; k < pl_sb_size(ptMetalBindGroup->sbtTextures); k++) @@ -2291,9 +2320,14 @@ const plBindGroupHandle tBindGroupHandle = {.uData = ptStream->_auStream[uCurrentStreamIndex] }; plMetalBindGroup* ptMetalBindGroup = &ptDevice->sbtBindGroupsHot[tBindGroupHandle.uIndex]; - for(uint32 j = 0; j < ptMetalBindGroup->uHeapCount; j++) + ptEncoder->uHeapUsageMask |= ptMetalBindGroup->uHeapUsageMask; + + for(uint64_t k = 0; k < 64; k++) { - [ptEncoder->tEncoder useHeap:ptMetalBindGroup->atRequiredHeaps[j] stages:MTLRenderStageVertex | MTLRenderStageFragment]; + if(ptEncoder->uHeapUsageMask & (1ULL << k)) + { + [ptEncoder->tEncoder useHeap:ptDevice->atHeaps[k] stages:MTLRenderStageVertex | MTLRenderStageFragment]; + } } for(uint32_t k = 0; k < pl_sb_size(ptMetalBindGroup->sbtTextures); k++) @@ -2312,19 +2346,20 @@ { const plBindGroupHandle tBindGroupHandle = {.uData = ptStream->_auStream[uCurrentStreamIndex] }; plMetalBindGroup* ptMetalBindGroup = &ptDevice->sbtBindGroupsHot[tBindGroupHandle.uIndex]; - - for(uint32 j = 0; j < ptMetalBindGroup->uHeapCount; j++) + + ptEncoder->uHeapUsageMask |= ptMetalBindGroup->uHeapUsageMask; + + for(uint64_t k = 0; k < 64; k++) { - [ptEncoder->tEncoder useHeap:ptMetalBindGroup->atRequiredHeaps[j] stages:MTLRenderStageVertex | MTLRenderStageFragment]; + if(ptEncoder->uHeapUsageMask & (1ULL << k)) + { + [ptEncoder->tEncoder useHeap:ptDevice->atHeaps[k] stages:MTLRenderStageVertex | MTLRenderStageFragment]; + } } - + for(uint32_t k = 0; k < pl_sb_size(ptMetalBindGroup->sbtTextures); k++) { const plTextureHandle tTextureHandle = ptMetalBindGroup->sbtTextures[k]; - if(tTextureHandle.uIndex > 21 && tTextureHandle.uIndex < 26) - { - int a = 5; - } [ptEncoder->tEncoder useResource:ptDevice->sbtTexturesHot[tTextureHandle.uIndex].tTexture usage:MTLResourceUsageRead stages:MTLRenderStageVertex | MTLRenderStageFragment]; } @@ -2335,7 +2370,16 @@ if(uDirtyMask & PL_DRAW_STREAM_BIT_DYNAMIC_BUFFER_0) { - + ptEncoder->uHeapUsageMask |= (1ULL << ptFrame->sbtDynamicBuffers[ptStream->_auStream[uCurrentStreamIndex]].tMemory.uHandle); + + for(uint64_t k = 0; k < 64; k++) + { + if(ptEncoder->uHeapUsageMask & (1ULL << k)) + { + [ptEncoder->tEncoder useHeap:ptDevice->atHeaps[k] stages:MTLRenderStageVertex | MTLRenderStageFragment]; + } + } + [ptEncoder->tEncoder setVertexBuffer:ptFrame->sbtDynamicBuffers[ptStream->_auStream[uCurrentStreamIndex]].tBuffer offset:0 atIndex:3]; [ptEncoder->tEncoder setFragmentBuffer:ptFrame->sbtDynamicBuffers[ptStream->_auStream[uCurrentStreamIndex]].tBuffer offset:0 atIndex:3]; @@ -2947,6 +2991,7 @@ { const uint16_t iBindGroupIndex = ptGarbage->sbtBindGroups[i].uIndex; plMetalBindGroup* ptMetalResource = &ptDevice->sbtBindGroupsHot[iBindGroupIndex]; + pl_sb_reset(ptMetalResource->sbtTextures); [ptMetalResource->tShaderArgumentBuffer release]; ptMetalResource->tShaderArgumentBuffer = nil; pl_sb_push(ptDevice->sbtBindGroupFreeIndices, iBindGroupIndex); @@ -3014,7 +3059,7 @@ } plDeviceMemoryAllocation tBlock = { - .uHandle = 0, + .uHandle = UINT64_MAX, .ulSize = (uint64_t)szSize, .tMemoryMode = tMemoryMode }; @@ -3037,7 +3082,18 @@ id tNewHeap = [ptDevice->tDevice newHeapWithDescriptor:ptHeapDescriptor]; tNewHeap.label = [NSString stringWithUTF8String:pcName]; - tBlock.uHandle = (uint64_t)tNewHeap; + // tBlock.uHandle = (uint64_t)tNewHeap; + + if(pl_sb_size(ptDevice->sbuFreeHeaps) > 0) + { + uint64_t uFreeIndex = pl_sb_pop(ptDevice->sbuFreeHeaps); + ptDevice->atHeaps[uFreeIndex] = tNewHeap; + tBlock.uHandle = uFreeIndex; + } + else + { + PL_ASSERT(false && "only 64 allocations allowed"); + } [ptHeapDescriptor release]; return tBlock; @@ -3046,11 +3102,13 @@ static void pl_free_memory(plDevice* ptDevice, plDeviceMemoryAllocation* ptBlock) { - id tHeap = (id)ptBlock->uHandle; + id tHeap = ptDevice->atHeaps[ptBlock->uHandle]; + pl_sb_push(ptDevice->sbuFreeHeaps, ptBlock->uHandle); [tHeap setPurgeableState:MTLPurgeableStateEmpty]; [tHeap release]; tHeap = nil; + ptDevice->atHeaps[ptBlock->uHandle] = nil; if(ptBlock->tMemoryMode == PL_MEMORY_GPU) { @@ -3060,7 +3118,7 @@ { gptGraphics->szHostMemoryInUse -= ptBlock->ulSize; } - ptBlock->uHandle = 0; + ptBlock->uHandle = UINT64_MAX; ptBlock->pHostMapped = NULL; ptBlock->ulSize = 0; ptBlock->tMemoryMode = 0; diff --git a/extensions/pl_graphics_vulkan.c b/extensions/pl_graphics_vulkan.c index 5f6e8710..53d4ab1b 100644 --- a/extensions/pl_graphics_vulkan.c +++ b/extensions/pl_graphics_vulkan.c @@ -3336,7 +3336,7 @@ pl_cleanup_device(plDevice* ptDevice) } plComputeEncoder* -pl_begin_compute_pass(plCommandBuffer* ptCmdBuffer) +pl_begin_compute_pass(plCommandBuffer* ptCmdBuffer, const plPassResources* ptResources) { VkMemoryBarrier tMemoryBarrier = { .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, diff --git a/extensions/pl_renderer_ext.c b/extensions/pl_renderer_ext.c index aba87dc9..879c87da 100644 --- a/extensions/pl_renderer_ext.c +++ b/extensions/pl_renderer_ext.c @@ -1390,7 +1390,23 @@ pl_refr_load_skybox_from_panorama(uint32_t uSceneHandle, const char* pcPath, int plCommandBuffer* ptCommandBuffer = gptGfx->request_command_buffer(ptCmdPool); gptGfx->begin_command_recording(ptCommandBuffer, NULL); - plComputeEncoder* ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer); + + const plPassBufferResource atPassBuffers[] = { + { .tHandle = atComputeBuffers[0], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_READ }, + { .tHandle = atComputeBuffers[1], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atComputeBuffers[2], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atComputeBuffers[3], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atComputeBuffers[4], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atComputeBuffers[5], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atComputeBuffers[6], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + }; + + const plPassResources tPassResources = { + .uBufferCount = 7, + .atBuffers = atPassBuffers + }; + + plComputeEncoder* ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer, &tPassResources); gptGfx->bind_compute_bind_groups(ptComputeEncoder, tPanoramaShader, 0, 1, &tComputeBindGroup, 0, NULL); gptGfx->bind_compute_shader(ptComputeEncoder, tPanoramaShader); gptGfx->dispatch(ptComputeEncoder, 1, &tDispach); @@ -1682,7 +1698,23 @@ pl_refr_load_skybox_from_panorama(uint32_t uSceneHandle, const char* pcPath, int memset(ptLutBuffer->tMemoryAllocation.pHostMapped, 0, uFaceSize); plCommandBuffer* ptCommandBuffer = gptGfx->request_command_buffer(ptCmdPool); gptGfx->begin_command_recording(ptCommandBuffer, NULL); - plComputeEncoder* ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer); + + const plPassBufferResource atPassBuffers[] = { + { .tHandle = atLutBuffers[0], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atLutBuffers[1], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atLutBuffers[2], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atLutBuffers[3], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atLutBuffers[4], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atLutBuffers[5], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atLutBuffers[6], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + }; + + const plPassResources tPassResources = { + .uBufferCount = 7, + .atBuffers = atPassBuffers + }; + + plComputeEncoder* ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer, &tPassResources); gptGfx->bind_compute_bind_groups(ptComputeEncoder, tLUTShader, 0, 1, &tLutBindGroup, 0, NULL); gptGfx->bind_compute_shader(ptComputeEncoder, tLUTShader); gptGfx->dispatch(ptComputeEncoder, 1, &tDispach); @@ -1693,8 +1725,6 @@ pl_refr_load_skybox_from_panorama(uint32_t uSceneHandle, const char* pcPath, int gptGfx->return_command_buffer(ptCommandBuffer); gptGfx->queue_compute_shader_for_deletion(ptDevice, tLUTShader); - - const plTextureDesc tTextureDesc = { .tDimensions = {(float)iResolution, (float)iResolution, 1}, .tFormat = PL_FORMAT_R32G32B32A32_FLOAT, @@ -1709,7 +1739,7 @@ pl_refr_load_skybox_from_panorama(uint32_t uSceneHandle, const char* pcPath, int pl_begin_cpu_sample(gptProfile, 0, "step 4"); ptCommandBuffer = gptGfx->request_command_buffer(ptCmdPool); gptGfx->begin_command_recording(ptCommandBuffer, NULL); - ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer); + ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer, &tPassResources); gptGfx->bind_compute_bind_groups(ptComputeEncoder, tIrradianceShader, 0, 1, &tLutBindGroup, 0, NULL); gptGfx->bind_compute_shader(ptComputeEncoder, tIrradianceShader); gptGfx->dispatch(ptComputeEncoder, 1, &tDispach); @@ -1836,7 +1866,7 @@ pl_refr_load_skybox_from_panorama(uint32_t uSceneHandle, const char* pcPath, int { int currentWidth = iResolution >> i; - const size_t uCurrentFaceSize = (size_t)currentWidth * (size_t)currentWidth * 4 * sizeof(float); + // const size_t uCurrentFaceSize = (size_t)currentWidth * (size_t)currentWidth * 4 * sizeof(float); const plDispatch tDispach = { .uGroupCountX = (uint32_t)currentWidth / 16, @@ -1849,7 +1879,23 @@ pl_refr_load_skybox_from_panorama(uint32_t uSceneHandle, const char* pcPath, int plCommandBuffer* ptCommandBuffer = gptGfx->request_command_buffer(ptCmdPool); gptGfx->begin_command_recording(ptCommandBuffer, NULL); - plComputeEncoder* ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer); + + const plPassBufferResource atInnerPassBuffers[] = { + { .tHandle = atInnerComputeBuffers[0], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atInnerComputeBuffers[1], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atInnerComputeBuffers[2], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atInnerComputeBuffers[3], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atInnerComputeBuffers[4], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atInnerComputeBuffers[5], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = atInnerComputeBuffers[6], .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + }; + + const plPassResources tInnerPassResources = { + .uBufferCount = 7, + .atBuffers = atInnerPassBuffers + }; + + plComputeEncoder* ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer, &tInnerPassResources); gptGfx->bind_compute_bind_groups(ptComputeEncoder, atSpecularComputeShaders[i], 0, 1, &tLutBindGroup, 0, NULL); gptGfx->bind_compute_shader(ptComputeEncoder, atSpecularComputeShaders[i]); gptGfx->dispatch(ptComputeEncoder, 1, &tDispach); @@ -3794,13 +3840,13 @@ pl_refr_render_scene(uint32_t uSceneHandle, uint32_t uViewHandle, plViewOptions .tTexture = ptView->atUVMaskTexture1[uFrameIdx], .uSlot = 0, .tType = PL_TEXTURE_BINDING_TYPE_STORAGE, - .tCurrentUsage = PL_TEXTURE_USAGE_STORAGE + .tCurrentUsage = PL_TEXTURE_USAGE_STORAGE }, { .tTexture = ptView->atUVMaskTexture0[uFrameIdx], .uSlot = 1, .tType = PL_TEXTURE_BINDING_TYPE_STORAGE, - .tCurrentUsage = PL_TEXTURE_USAGE_STORAGE + .tCurrentUsage = PL_TEXTURE_USAGE_STORAGE } }; @@ -3829,7 +3875,7 @@ pl_refr_render_scene(uint32_t uSceneHandle, uint32_t uViewHandle, plViewOptions gptGfx->begin_command_recording(ptCommandBuffer, &tBeginInfo); // begin main renderpass (directly to swapchain) - plComputeEncoder* ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer); + plComputeEncoder* ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer, NULL); ptView->tLastUVMask = (i % 2 == 0) ? ptView->atUVMaskTexture1[uFrameIdx] : ptView->atUVMaskTexture0[uFrameIdx]; diff --git a/extensions/pl_renderer_internal.c b/extensions/pl_renderer_internal.c index 8f382413..2441e634 100644 --- a/extensions/pl_renderer_internal.c +++ b/extensions/pl_renderer_internal.c @@ -689,33 +689,48 @@ pl_refr_perform_skinning(plCommandBuffer* ptCommandBuffer, uint32_t uSceneHandle int iUnused; } SkinDynamicData; - plComputeEncoder* ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer); - - for(uint32_t i = 0; i < uSkinCount; i++) + if(uSkinCount) { - plDynamicBinding tDynamicBinding = pl__allocate_dynamic_data(ptDevice); - SkinDynamicData* ptDynamicData = (SkinDynamicData*)tDynamicBinding.pcData; - ptDynamicData->iSourceDataOffset = ptScene->sbtSkinData[i].iSourceDataOffset; - ptDynamicData->iDestDataOffset = ptScene->sbtSkinData[i].iDestDataOffset; - ptDynamicData->iDestVertexOffset = ptScene->sbtSkinData[i].iDestVertexOffset; - - const plDispatch tDispach = { - .uGroupCountX = ptScene->sbtSkinData[i].uVertexCount, - .uGroupCountY = 1, - .uGroupCountZ = 1, - .uThreadPerGroupX = 1, - .uThreadPerGroupY = 1, - .uThreadPerGroupZ = 1 + + const plPassBufferResource atPassBuffers[] = { + { .tHandle = ptScene->tSkinStorageBuffer, .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_READ }, + { .tHandle = ptScene->tVertexBuffer, .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, + { .tHandle = ptScene->tStorageBuffer, .tStages = PL_STAGE_COMPUTE, .tUsage = PL_PASS_RESOURCE_USAGE_WRITE }, }; - const plBindGroupHandle atBindGroups[] = { - ptScene->tSkinBindGroup0, - ptScene->sbtSkinData[i].tTempBindGroup + + const plPassResources tPassResources = { + .uBufferCount = 3, + .atBuffers = atPassBuffers }; - gptGfx->bind_compute_bind_groups(ptComputeEncoder, ptScene->sbtSkinData[i].tShader, 0, 2, atBindGroups, 1, &tDynamicBinding); - gptGfx->bind_compute_shader(ptComputeEncoder, ptScene->sbtSkinData[i].tShader); - gptGfx->dispatch(ptComputeEncoder, 1, &tDispach); + + plComputeEncoder* ptComputeEncoder = gptGfx->begin_compute_pass(ptCommandBuffer, &tPassResources); + + for(uint32_t i = 0; i < uSkinCount; i++) + { + plDynamicBinding tDynamicBinding = pl__allocate_dynamic_data(ptDevice); + SkinDynamicData* ptDynamicData = (SkinDynamicData*)tDynamicBinding.pcData; + ptDynamicData->iSourceDataOffset = ptScene->sbtSkinData[i].iSourceDataOffset; + ptDynamicData->iDestDataOffset = ptScene->sbtSkinData[i].iDestDataOffset; + ptDynamicData->iDestVertexOffset = ptScene->sbtSkinData[i].iDestVertexOffset; + + const plDispatch tDispach = { + .uGroupCountX = ptScene->sbtSkinData[i].uVertexCount, + .uGroupCountY = 1, + .uGroupCountZ = 1, + .uThreadPerGroupX = 1, + .uThreadPerGroupY = 1, + .uThreadPerGroupZ = 1 + }; + const plBindGroupHandle atBindGroups[] = { + ptScene->tSkinBindGroup0, + ptScene->sbtSkinData[i].tTempBindGroup + }; + gptGfx->bind_compute_bind_groups(ptComputeEncoder, ptScene->sbtSkinData[i].tShader, 0, 2, atBindGroups, 1, &tDynamicBinding); + gptGfx->bind_compute_shader(ptComputeEncoder, ptScene->sbtSkinData[i].tShader); + gptGfx->dispatch(ptComputeEncoder, 1, &tDispach); + } + gptGfx->end_compute_pass(ptComputeEncoder); } - gptGfx->end_compute_pass(ptComputeEncoder); pl_end_cpu_sample(gptProfile, 0); } diff --git a/extensions/pl_shader_ext.c b/extensions/pl_shader_ext.c index d7402737..afa88cf4 100644 --- a/extensions/pl_shader_ext.c +++ b/extensions/pl_shader_ext.c @@ -373,6 +373,7 @@ pl_compile_glsl(const char* pcShader, const char* pcEntryFunc, plShaderOptions* spvc_compiler_options_set_uint(tOptions, SPVC_COMPILER_OPTION_MSL_VERSION, 30000); spvc_compiler_options_set_bool(tOptions, SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS, true); spvc_compiler_options_set_bool(tOptions, SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING, true); + spvc_compiler_options_set_bool(tOptions, SPVC_COMPILER_OPTION_MSL_FORCE_ACTIVE_ARGUMENT_BUFFER_RESOURCES, true); spvc_compiler_rename_entry_point(tMslCompiler, tModule.pcEntryFunc, "kernel_main", SpvExecutionModelGLCompute); diff --git a/sandbox/app.c b/sandbox/app.c index c6e104c4..26c4b794 100644 --- a/sandbox/app.c +++ b/sandbox/app.c @@ -97,7 +97,7 @@ pl_app_load(plApiRegistryI* ptApiRegistry, plEditorData* ptEditorData) "../shaders/" }, #ifndef PL_OFFLINE_SHADERS_ONLY - .tFlags = PL_SHADER_FLAGS_ALWAYS_COMPILE + .tFlags = PL_SHADER_FLAGS_ALWAYS_COMPILE | PL_SHADER_FLAGS_INCLUDE_DEBUG #endif }; gptShader->initialize(&tDefaultShaderOptions); diff --git a/scripts/package.py b/scripts/package.py index 2813835c..51199420 100644 --- a/scripts/package.py +++ b/scripts/package.py @@ -144,23 +144,16 @@ # copy scripts for script in scripts: if platform.system() == "Windows": - if debug_package: - shutil.move("../out/" + script + "d.dll", target_directory + "/bin/") - else: - shutil.move("../out/" + script + ".dll", target_directory + "/bin/") + shutil.move("../out/" + script + ".dll", target_directory + "/bin/") for file in glob.glob("../out/" + script + "d_*.pdb"): shutil.move(file, target_directory + "/bin/") elif platform.system() == "Darwin": + shutil.move("../out/" + script + ".dylib", target_directory + "/bin/") if debug_package: - shutil.move("../out/" + script + "d.dylib", target_directory + "/bin/") shutil.copytree("../out/" + script + "d.dylib.dSYM", target_directory + "/bin/" + script + "d.dylib.dSYM") - else: - shutil.move("../out/" + script + ".dylib", target_directory + "/bin/") + elif platform.system() == "Linux": - if debug_package: - shutil.move("../out/" + script + "d.so", target_directory + "/bin/") - else: - shutil.move("../out/" + script + ".so", target_directory + "/bin/") + shutil.move("../out/" + script + ".so", target_directory + "/bin/") # copy libs & executable if platform.system() == "Windows": diff --git a/shaders/panorama_to_cubemap.comp b/shaders/panorama_to_cubemap.comp index fa66b834..75bca35b 100644 --- a/shaders/panorama_to_cubemap.comp +++ b/shaders/panorama_to_cubemap.comp @@ -84,6 +84,9 @@ main() const int iSrcpixelIndex = iColumnindex + iRowindex * iWidth; - const vec3 tColor = vec3(tBufferIn.atPixelData[iSrcpixelIndex].r, tBufferIn.atPixelData[iSrcpixelIndex].g, tBufferIn.atPixelData[iSrcpixelIndex].b); - pl_write_face(iCurrentPixel, iFace, tColor); + if(iSrcpixelIndex < iWidth * iHeight) + { + const vec3 tColor = vec3(tBufferIn.atPixelData[iSrcpixelIndex].r, tBufferIn.atPixelData[iSrcpixelIndex].g, tBufferIn.atPixelData[iSrcpixelIndex].b); + pl_write_face(iCurrentPixel, iFace, tColor); + } }