From 3bb101a9b8fdfe0344f4dbe9ddd8098be41084a1 Mon Sep 17 00:00:00 2001 From: Alex Hirsch Date: Thu, 13 Feb 2020 00:04:40 +0100 Subject: [PATCH] Add pipeline barriers between shaders operating on shared images --- raygun/compute/compute_system.cpp | 2 +- raygun/gpu/gpu_buffer.hpp | 2 +- raygun/gpu/image.hpp | 2 + raygun/render/acceleration_structure.cpp | 4 +- raygun/render/raytracer.cpp | 58 +++++++++++++++++++++++- raygun/render/raytracer.hpp | 5 ++ raygun/render/render_system.cpp | 28 ++++++++++-- 7 files changed, 91 insertions(+), 10 deletions(-) diff --git a/raygun/compute/compute_system.cpp b/raygun/compute/compute_system.cpp index 2f5e58c..42ffc7a 100644 --- a/raygun/compute/compute_system.cpp +++ b/raygun/compute/compute_system.cpp @@ -39,7 +39,7 @@ void ComputePass::dispatch(vk::CommandBuffer& cmd, uint32_t width, uint32_t heig cmd.dispatch(width, height, depth); } -ComputePass::ComputePass(string_view name) : cs(RG().computeSystem()), computeShader(RG().resourceManager().loadShader(name)) +ComputePass::ComputePass(string_view name) : computeShader(RG().resourceManager().loadShader(name)), cs(RG().computeSystem()) { auto shaderStageInfo = computeShader->shaderStageInfo(vk::ShaderStageFlagBits::eCompute); diff --git a/raygun/gpu/gpu_buffer.hpp b/raygun/gpu/gpu_buffer.hpp index 201fa98..a2524f7 100644 --- a/raygun/gpu/gpu_buffer.hpp +++ b/raygun/gpu/gpu_buffer.hpp @@ -32,7 +32,7 @@ class Buffer { Buffer(vk::DeviceSize size, vk::BufferUsageFlags usage, vk::MemoryPropertyFlags memoryType); ~Buffer(); - operator vk::Buffer() { return *m_buffer; } + operator vk::Buffer() const { return *m_buffer; } vk::DeviceSize size() const { return m_info.range; } diff --git a/raygun/gpu/image.hpp b/raygun/gpu/image.hpp index eec0575..0d33482 100644 --- a/raygun/gpu/image.hpp +++ b/raygun/gpu/image.hpp @@ -31,6 +31,8 @@ class Image { Image(vk::Extent2D extent, vk::Format format = vk::Format::eR16G16B16A16Sfloat, uint32_t numMipLayers = 1, vk::SampleCountFlagBits samples = vk::SampleCountFlagBits::e1, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral); + operator vk::Image() const { return *m_image; } + const vk::Extent2D& extent() const { return m_extent; } const vk::Format& format() const { return m_format; } uint32_t numMips() const { return m_numMips; } diff --git a/raygun/render/acceleration_structure.cpp b/raygun/render/acceleration_structure.cpp index 6204122..18e4338 100644 --- a/raygun/render/acceleration_structure.cpp +++ b/raygun/render/acceleration_structure.cpp @@ -56,8 +56,8 @@ namespace { const auto transform = glm::transpose(entity.globalTransform().toMat4()); memcpy(&instance.transform, &transform, sizeof(instance.transform)); - const auto result = device.getAccelerationStructureHandleNV(*entity.model->bottomLevelAS, sizeof(instance.accelerationStructureHandle), - &instance.accelerationStructureHandle); + [[maybe_unused]] const auto result = device.getAccelerationStructureHandleNV(*entity.model->bottomLevelAS, sizeof(instance.accelerationStructureHandle), + &instance.accelerationStructureHandle); RAYGUN_ASSERT(result == vk::Result::eSuccess); return instance; diff --git a/raygun/render/raytracer.cpp b/raygun/render/raytracer.cpp index 86d502d..7ea8239 100644 --- a/raygun/render/raytracer.cpp +++ b/raygun/render/raytracer.cpp @@ -62,7 +62,7 @@ void Raytracer::setupBottomLevelAS() cmd->begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); - auto& models = RG().resourceManager().models(); + auto models = RG().resourceManager().models(); for(auto& model: models) { if(!model->bottomLevelAS) { model->bottomLevelAS = std::make_unique(*cmd, *model->mesh); @@ -95,6 +95,8 @@ const gpu::Image& Raytracer::doRaytracing(vk::CommandBuffer& cmd) RG().profiler().writeTimestamp(cmd, TimestampQueryID::RTOnlyStart); + initialImageBarrier(cmd); + const auto stride = raytracingProperties.shaderGroupHandleSize; cmd.traceRaysNV(*m_sbtBuffer, m_raygenGroupIndex * stride, // @@ -103,6 +105,8 @@ const gpu::Image& Raytracer::doRaytracing(vk::CommandBuffer& cmd) *m_sbtBuffer, 0, 0, // vc.windowSize.width, vc.windowSize.height, 1); + computeShaderImageBarrier(cmd, {m_baseImage.get(), m_normalImage.get(), m_roughImage.get()}, vk::PipelineStageFlagBits::eRayTracingShaderNV); + RG().profiler().writeTimestamp(cmd, TimestampQueryID::RTOnlyEnd); RG().profiler().writeTimestamp(cmd, TimestampQueryID::PostprocStart); @@ -111,14 +115,29 @@ const gpu::Image& Raytracer::doRaytracing(vk::CommandBuffer& cmd) int dispatchHeight = vc.windowSize.height / COMPUTE_WG_Y_SIZE + ((vc.windowSize.height % COMPUTE_WG_Y_SIZE) > 0 ? 1 : 0); RG().profiler().writeTimestamp(cmd, TimestampQueryID::RoughStart); + m_roughPrepare->dispatch(cmd, dispatchWidth, dispatchHeight); + computeShaderImageBarrier(cmd, {m_roughTransitions.get(), m_roughColorsA.get(), m_roughColorsB.get()}); + for(int i = 0; i < 10; ++i) { m_roughBlurH->dispatch(cmd, dispatchWidth, dispatchHeight); + computeShaderImageBarrier(cmd, {m_roughColorsA.get(), m_roughColorsB.get()}); m_roughBlurV->dispatch(cmd, dispatchWidth, dispatchHeight); + computeShaderImageBarrier(cmd, {m_roughColorsA.get(), m_roughColorsB.get()}); } + RG().profiler().writeTimestamp(cmd, TimestampQueryID::RoughEnd); m_postprocess->dispatch(cmd, dispatchWidth, dispatchHeight); + computeShaderImageBarrier(cmd, { + m_baseImage.get(), + m_normalImage.get(), + m_roughImage.get(), + m_finalImage.get(), + m_roughTransitions.get(), + m_roughColorsA.get(), + m_roughColorsB.get(), + }); ImGui::Checkbox("Use FXAA", &m_useFXAA); if(m_useFXAA) { @@ -324,4 +343,41 @@ const gpu::Image& Raytracer::selectResultImage() return *images[selectedResult]; } +void Raytracer::initialImageBarrier(vk::CommandBuffer& cmd) +{ + const auto images = {m_baseImage.get(), m_normalImage.get(), m_roughImage.get(), m_finalImage.get(), + m_roughTransitions.get(), m_roughColorsA.get(), m_roughColorsB.get()}; + + std::vector imageBarriers; + imageBarriers.reserve(images.size()); + for(auto& image: images) { + auto& barrier = imageBarriers.emplace_back(); + barrier.setImage(*image); + barrier.setOldLayout(vk::ImageLayout::eUndefined); + barrier.setNewLayout(vk::ImageLayout::eGeneral); + barrier.setSubresourceRange(gpu::defaultImageSubresourceRange()); + } + + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eRayTracingShaderNV, // + vk::DependencyFlagBits::eByRegion, {}, {}, imageBarriers); +} + +void Raytracer::computeShaderImageBarrier(vk::CommandBuffer& cmd, std::initializer_list images, vk::PipelineStageFlags srcStageMask) +{ + std::vector imageBarriers; + imageBarriers.reserve(images.size()); + for(auto& image: images) { + auto& barrier = imageBarriers.emplace_back(); + barrier.setImage(*image); + barrier.setSrcAccessMask(vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); + barrier.setDstAccessMask(vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); + barrier.setOldLayout(vk::ImageLayout::eGeneral); + barrier.setNewLayout(vk::ImageLayout::eGeneral); + barrier.setSubresourceRange(gpu::defaultImageSubresourceRange()); + } + + cmd.pipelineBarrier(srcStageMask, vk::PipelineStageFlagBits::eComputeShader, // + vk::DependencyFlagBits::eByRegion, {}, {}, imageBarriers); +} + } // namespace raygun::render diff --git a/raygun/render/raytracer.hpp b/raygun/render/raytracer.hpp index 2c5ba96..0635981 100644 --- a/raygun/render/raytracer.hpp +++ b/raygun/render/raytracer.hpp @@ -58,6 +58,11 @@ struct Raytracer { const gpu::Image& selectResultImage(); + void initialImageBarrier(vk::CommandBuffer& cmd); + + void computeShaderImageBarrier(vk::CommandBuffer& cmd, std::initializer_list images, + vk::PipelineStageFlags srcStageMask = vk::PipelineStageFlagBits::eComputeShader); + vk::PhysicalDeviceRayTracingPropertiesNV raytracingProperties = {}; std::vector m_shaderGroups; diff --git a/raygun/render/render_system.cpp b/raygun/render/render_system.cpp index 816578c..b836fa0 100644 --- a/raygun/render/render_system.cpp +++ b/raygun/render/render_system.cpp @@ -99,19 +99,35 @@ void RenderSystem::render(Scene& scene) const auto& raytracerResultImage = m_raytracer->doRaytracing(*m_commandBuffer); - auto& image = m_swapchain->image(m_framebufferIndex); + // Ensure ray traced image is ready for transfer. + { + vk::ImageMemoryBarrier barrier; + barrier.setImage(raytracerResultImage); + barrier.setOldLayout(vk::ImageLayout::eGeneral); + barrier.setNewLayout(vk::ImageLayout::eTransferSrcOptimal); + barrier.setSrcAccessMask(vk::AccessFlagBits::eShaderWrite); + barrier.setDstAccessMask(vk::AccessFlagBits::eTransferRead); + barrier.setSubresourceRange(gpu::defaultImageSubresourceRange()); + + m_commandBuffer->pipelineBarrier(vk::PipelineStageFlagBits::eRayTracingShaderNV | vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, {}, {}, barrier); + } + + auto& resultImage = m_swapchain->image(m_framebufferIndex); + // Transition result image layout for blit. { vk::ImageMemoryBarrier barr; - barr.setImage(image); + barr.setImage(resultImage); barr.setDstAccessMask(vk::AccessFlagBits::eTransferWrite); barr.setNewLayout(vk::ImageLayout::eTransferDstOptimal); barr.setSubresourceRange(gpu::defaultImageSubresourceRange()); - m_commandBuffer->pipelineBarrier(vk::PipelineStageFlagBits::eRayTracingShaderNV, vk::PipelineStageFlagBits::eTransfer, + m_commandBuffer->pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, // vk::DependencyFlagBits::eByRegion, {}, {}, barr); } + // Copy ray traced image -> result image. { vk::Offset3D offset = {0, 0, 0}; vk::Offset3D bound = {(int32_t)vc.windowSize.width, (int32_t)vc.windowSize.height, 1}; @@ -122,8 +138,9 @@ void RenderSystem::render(Scene& scene) blit.setSrcOffsets({offset, bound}); blit.setSrcSubresource(gpu::defaultImageSubresourceLayers()); - m_commandBuffer->blitImage(raytracerResultImage.image(), raytracerResultImage.initialLayout(), image, vk::ImageLayout::eTransferDstOptimal, blit, - vk::Filter::eNearest); + m_commandBuffer->blitImage(raytracerResultImage, vk::ImageLayout::eTransferSrcOptimal, // + resultImage, vk::ImageLayout::eTransferDstOptimal, // + blit, vk::Filter::eNearest); } beginRenderPass(); @@ -377,6 +394,7 @@ void RenderSystem::setupRenderPass() attachments[0].setFormat(vc.surfaceFormat); attachments[0].setSamples(SAMPLES); attachments[0].setLoadOp(vk::AttachmentLoadOp::eLoad); + attachments[0].setInitialLayout(vk::ImageLayout::eTransferDstOptimal); attachments[0].setFinalLayout(vk::ImageLayout::ePresentSrcKHR); vk::AttachmentReference colorRef = {};