diff --git a/apps/app.c b/apps/app.c index ce5eb1c1..151546fd 100644 --- a/apps/app.c +++ b/apps/app.c @@ -259,9 +259,10 @@ pl_app_load(plApiRegistryI* ptApiRegistry, plAppData* ptAppData) plModelLoaderData tLoaderData0 = {0}; pl_begin_profile_sample("load models 0"); - gptModelLoader->load_gltf(ptMainComponentLibrary, "../data/glTF-Sample-Assets-main/Models/FlightHelmet/glTF/FlightHelmet.gltf", NULL, &tLoaderData0); + // gptModelLoader->load_gltf(ptMainComponentLibrary, "../data/glTF-Sample-Assets-main/Models/FlightHelmet/glTF/FlightHelmet.gltf", NULL, &tLoaderData0); + gptModelLoader->load_gltf(ptMainComponentLibrary, "../data/glTF-Sample-Assets-main/Models/CesiumMan/glTF/CesiumMan.gltf", NULL, &tLoaderData0); // gptModelLoader->load_gltf(ptMainComponentLibrary, "../data/glTF-Sample-Assets-main/Models/DamagedHelmet/glTF/DamagedHelmet.gltf", NULL, &tLoaderData0); - // gptModelLoader->load_gltf(ptMainComponentLibrary, "../data/glTF-Sample-Assets-main/Models/Sponza/glTF/Sponza.gltf", NULL, &tLoaderData0); + gptModelLoader->load_gltf(ptMainComponentLibrary, "../data/glTF-Sample-Assets-main/Models/Sponza/glTF/Sponza.gltf", NULL, &tLoaderData0); gptModelLoader->load_stl(ptMainComponentLibrary, "../data/pilotlight-assets-master/meshes/monkey.stl", (plVec4){1.0f, 1.0f, 0.0f, 0.80f}, &tTransform0, &tLoaderData0); gptRenderer->add_drawable_objects_to_scene(ptAppData->uSceneHandle0, tLoaderData0.uOpaqueCount, tLoaderData0.atOpaqueObjects, tLoaderData0.uTransparentCount, tLoaderData0.atTransparentObjects); gptModelLoader->free_data(&tLoaderData0); @@ -422,7 +423,8 @@ pl_app_update(plAppData* ptAppData) uint64_t ulValue1 = ulValue0 + 1; uint64_t ulValue2 = ulValue0 + 2; uint64_t ulValue3 = ulValue0 + 3; - ptAppData->aulNextTimelineValue[ptGraphics->uCurrentFrameIndex] = ulValue3; + uint64_t ulValue4 = ulValue0 + 4; + ptAppData->aulNextTimelineValue[ptGraphics->uCurrentFrameIndex] = ulValue4; // first set of work @@ -435,8 +437,8 @@ pl_app_update(plAppData* ptAppData) plCommandBuffer tCommandBuffer = gptGfx->begin_command_recording(ptGraphics, &tBeginInfo0); - gptRenderer->update_scene(tCommandBuffer, ptAppData->uSceneHandle0); - gptRenderer->update_scene(tCommandBuffer, ptAppData->uSceneHandle1); + gptRenderer->update_skin_textures(tCommandBuffer, ptAppData->uSceneHandle0); + gptRenderer->update_skin_textures(tCommandBuffer, ptAppData->uSceneHandle1); gptGfx->end_command_recording(ptGraphics, &tCommandBuffer); const plSubmitInfo tSubmitInfo0 = { @@ -446,6 +448,24 @@ pl_app_update(plAppData* ptAppData) }; gptGfx->submit_command_buffer(ptGraphics, &tCommandBuffer, &tSubmitInfo0); + const plBeginCommandInfo tBeginInfo00 = { + .uWaitSemaphoreCount = 1, + .atWaitSempahores = {ptAppData->atSempahore[ptGraphics->uCurrentFrameIndex]}, + .auWaitSemaphoreValues = {ulValue1}, + }; + tCommandBuffer = gptGfx->begin_command_recording(ptGraphics, &tBeginInfo00); + + gptRenderer->perform_skinning(tCommandBuffer, ptAppData->uSceneHandle0); + gptRenderer->perform_skinning(tCommandBuffer, ptAppData->uSceneHandle1); + gptGfx->end_command_recording(ptGraphics, &tCommandBuffer); + + const plSubmitInfo tSubmitInfo00 = { + .uSignalSemaphoreCount = 1, + .atSignalSempahores = {ptAppData->atSempahore[ptGraphics->uCurrentFrameIndex]}, + .auSignalSemaphoreValues = {ulValue2} + }; + gptGfx->submit_command_buffer(ptGraphics, &tCommandBuffer, &tSubmitInfo00); + plViewOptions tViewOptions = { .bShowAllBoundingBoxes = ptAppData->bDrawAllBoundingBoxes, .bShowVisibleBoundingBoxes = ptAppData->bDrawVisibleBoundingBoxes, @@ -473,7 +493,7 @@ pl_app_update(plAppData* ptAppData) const plBeginCommandInfo tBeginInfo1 = { .uWaitSemaphoreCount = 1, .atWaitSempahores = {ptAppData->atSempahore[ptGraphics->uCurrentFrameIndex]}, - .auWaitSemaphoreValues = {ulValue1} + .auWaitSemaphoreValues = {ulValue2} }; tCommandBuffer = gptGfx->begin_command_recording(ptGraphics, &tBeginInfo1); gptRenderer->render_scene(tCommandBuffer, ptAppData->uSceneHandle0, ptAppData->uViewHandle0, tViewOptions); @@ -485,7 +505,7 @@ pl_app_update(plAppData* ptAppData) const plSubmitInfo tSubmitInfo1 = { .uSignalSemaphoreCount = 1, .atSignalSempahores = {ptAppData->atSempahore[ptGraphics->uCurrentFrameIndex]}, - .auSignalSemaphoreValues = {ulValue2} + .auSignalSemaphoreValues = {ulValue3} }; gptGfx->submit_command_buffer(ptGraphics, &tCommandBuffer, &tSubmitInfo1); @@ -494,7 +514,7 @@ pl_app_update(plAppData* ptAppData) const plBeginCommandInfo tBeginInfo2 = { .uWaitSemaphoreCount = 1, .atWaitSempahores = {ptAppData->atSempahore[ptGraphics->uCurrentFrameIndex]}, - .auWaitSemaphoreValues = {ulValue2}, + .auWaitSemaphoreValues = {ulValue3}, }; tCommandBuffer = gptGfx->begin_command_recording(ptGraphics, &tBeginInfo2); @@ -601,7 +621,7 @@ pl_app_update(plAppData* ptAppData) const plSubmitInfo tSubmitInfo2 = { .uSignalSemaphoreCount = 1, .atSignalSempahores = {ptAppData->atSempahore[ptGraphics->uCurrentFrameIndex]}, - .auSignalSemaphoreValues = {ulValue3}, + .auSignalSemaphoreValues = {ulValue4}, }; gptGfx->end_command_recording(ptGraphics, &tCommandBuffer); if(!gptGfx->present(ptGraphics, &tCommandBuffer, &tSubmitInfo2)) diff --git a/extensions/pl_ref_renderer_ext.c b/extensions/pl_ref_renderer_ext.c index d5eca131..1571c0fb 100644 --- a/extensions/pl_ref_renderer_ext.c +++ b/extensions/pl_ref_renderer_ext.c @@ -59,9 +59,14 @@ typedef struct _plComputeShaderVariant typedef struct _plSkinData { - plEntity tEntity; - plTextureHandle atDynamicTexture[PL_FRAMES_IN_FLIGHT]; - plBindGroupHandle tTempBindGroup; + plEntity tEntity; + plTextureHandle atDynamicTexture[PL_FRAMES_IN_FLIGHT]; + plBindGroupHandle tTempBindGroup; + plComputeShaderHandle tShader; + uint32_t uVertexCount; + int iSourceDataOffset; + int iDestDataOffset; + int iDestVertexOffset; } plSkinData; typedef struct _plDrawable @@ -180,6 +185,7 @@ typedef struct _plRefView typedef struct _plRefScene { + plShaderHandle tLightingShader; // skybox resources (optional) int iEnvironmentMips; @@ -193,17 +199,22 @@ typedef struct _plRefScene // lighting (final quad to use for composition) plDrawable tLightingDrawable; + // skins + plBindGroupHandle tSkinBindGroup0; + // CPU buffers plVec3* sbtVertexPosBuffer; plVec4* sbtVertexDataBuffer; uint32_t* sbuIndexBuffer; plGPUMaterial* sbtMaterialBuffer; + plVec4* sbtSkinVertexDataBuffer; // GPU buffers plBufferHandle tVertexBuffer; plBufferHandle tIndexBuffer; plBufferHandle tStorageBuffer; plBufferHandle tMaterialDataBuffer; + plBufferHandle tSkinStorageBuffer; // views uint32_t uViewCount; @@ -231,7 +242,6 @@ typedef struct _plRefRendererData plShaderHandle tOpaqueShader; plShaderHandle tTransparentShader; plShaderHandle tSkyboxShader; - plShaderHandle tLightingShader; // graphics shader variant system uint32_t uVariantCount; @@ -251,7 +261,7 @@ typedef struct _plRefRendererData plSamplerHandle tDefaultSampler; plSamplerHandle tEnvSampler; plTextureHandle tDummyTexture; - plBindGroupHandle tNullSkinBindgroup; + plTextureHandle tDummyTextureCube; // scenes plRefScene* sbtScenes; @@ -316,6 +326,7 @@ static const plJobI* gptJob = NULL; //----------------------------------------------------------------------------- // general helpers +static void pl__add_drawable_skin_data_to_global_buffer(plRefScene*, uint32_t uDrawableIndex, plDrawable* atDrawables); static void pl__add_drawable_data_to_global_buffer(plRefScene*, uint32_t uDrawableIndex, plDrawable* atDrawables); static bool pl__sat_visibility_test(plCameraComponent*, const plAABB*); @@ -387,6 +398,16 @@ pl_refr_initialize(plWindow* ptWindow) }; gptData->tDummyTexture = pl__refr_create_texture_with_data(&tDummyTextureDesc, "dummy", 0, afDummyTextureData, sizeof(afDummyTextureData)); + const plTextureDesc tSkyboxTextureDesc = { + .tDimensions = {1, 1, 1}, + .tFormat = PL_FORMAT_R32G32B32A32_FLOAT, + .uLayers = 6, + .uMips = 1, + .tType = PL_TEXTURE_TYPE_CUBE, + .tUsage = PL_TEXTURE_USAGE_SAMPLED + }; + gptData->tDummyTextureCube = pl__refr_create_texture(&tSkyboxTextureDesc, "dummy cube", 0); + // create default sampler const plSamplerDesc tSamplerDesc = { .tFilter = PL_FILTER_LINEAR, @@ -406,26 +427,6 @@ pl_refr_initialize(plWindow* ptWindow) }; gptData->tEnvSampler = gptDevice->create_sampler(&ptGraphics->tDevice, &tEnvSamplerDesc, "ENV sampler"); - // create null skin bind group (to be bound when skinning isn't enabled) - const plBindGroupLayout tBindGroupLayout1 = { - .uTextureBindingCount = 1, - .atTextureBindings = {{.uSlot = 0, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED}} - }; - gptData->tNullSkinBindgroup = gptDevice->create_bind_group(&ptGraphics->tDevice, &tBindGroupLayout1, "null skin bind group"); - - const plBindGroupUpdateTextureData tBGTextureData = { - .tTexture = gptData->tDummyTexture, - .uSlot = 0, - .uIndex = 0, - .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED - }; - - const plBindGroupUpdateData tBGData = { - .uTextureCount = 1, - .atTextures = &tBGTextureData - }; - gptDevice->update_bind_group(&ptGraphics->tDevice, gptData->tNullSkinBindgroup, &tBGData); - // create main render pass layout const plRenderPassLayoutDescription tRenderPassLayoutDesc = { .atRenderTargets = { @@ -461,16 +462,7 @@ pl_refr_initialize(plWindow* ptWindow) // create template shaders - int aiConstantData[6] = {0}; - aiConstantData[5] = PL_RENDERING_FLAG_USE_IBL | PL_RENDERING_FLAG_USE_PUNCTUAL; - - aiConstantData[0] = (int)PL_MESH_FORMAT_FLAG_HAS_NORMAL; - int iFlagCopy = (int)PL_MESH_FORMAT_FLAG_HAS_NORMAL; - while(iFlagCopy) - { - aiConstantData[1] += iFlagCopy & 1; - iFlagCopy >>= 1; - } + int aiConstantData[5] = {0}; plShaderDescription tOpaqueShaderDescription = { @@ -497,7 +489,7 @@ pl_refr_initialize(plWindow* ptWindow) .uByteStride = sizeof(float) * 3, .atAttributes = { {.uByteOffset = 0, .tFormat = PL_FORMAT_R32G32B32_FLOAT}} }, - .uConstantCount = 6, + .uConstantCount = 5, .pTempConstantData = aiConstantData, .atBlendStates = { pl__get_blend_state(PL_BLEND_MODE_OPAQUE), @@ -509,7 +501,7 @@ pl_refr_initialize(plWindow* ptWindow) .uBlendStateCount = 5, .tRenderPassLayout = gptData->tRenderPassLayout, .uSubpassIndex = 0, - .uBindGroupLayoutCount = 3, + .uBindGroupLayoutCount = 2, .atBindGroupLayouts = { { .uBufferBindingCount = 3, @@ -558,12 +550,6 @@ pl_refr_initialize(plWindow* ptWindow) {.uSlot = 10, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED, .uDescriptorCount = 1}, {.uSlot = 11, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED, .uDescriptorCount = 1}, } - }, - { - .uTextureBindingCount = 1, - .atTextureBindings = { - {.uSlot = 0, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED} - } } } }; @@ -600,7 +586,7 @@ pl_refr_initialize(plWindow* ptWindow) .uByteStride = sizeof(float) * 3, .atAttributes = { {.uByteOffset = 0, .tFormat = PL_FORMAT_R32G32B32_FLOAT}} }, - .uConstantCount = 6, + .uConstantCount = 5, .pTempConstantData = aiConstantData, .atBlendStates = { pl__get_blend_state(PL_BLEND_MODE_ALPHA) @@ -608,7 +594,7 @@ pl_refr_initialize(plWindow* ptWindow) .uBlendStateCount = 1, .tRenderPassLayout = gptData->tRenderPassLayout, .uSubpassIndex = 2, - .uBindGroupLayoutCount = 3, + .uBindGroupLayoutCount = 2, .atBindGroupLayouts = { { .uBufferBindingCount = 3, @@ -657,12 +643,6 @@ pl_refr_initialize(plWindow* ptWindow) {.uSlot = 10, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED, .uDescriptorCount = 1}, {.uSlot = 11, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED, .uDescriptorCount = 1}, } - }, - { - .uTextureBindingCount = 1, - .atTextureBindings = { - {.uSlot = 0, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED} - } } } }; @@ -673,103 +653,6 @@ pl_refr_initialize(plWindow* ptWindow) tTransparentShaderDescription.atConstants[i].tType = PL_DATA_TYPE_INT; } gptData->tTransparentShader = gptDevice->create_shader(&gptData->tGraphics.tDevice, &tTransparentShaderDescription); - - // create lighting shader - int aiLightingConstantData[1] = {PL_RENDERING_FLAG_USE_IBL | PL_RENDERING_FLAG_USE_PUNCTUAL}; - plShaderDescription tLightingShaderDesc = { - #ifdef PL_METAL_BACKEND - .pcVertexShader = "../shaders/metal/lighting.metal", - .pcPixelShader = "../shaders/metal/lighting.metal", - .pcVertexShaderEntryFunc = "vertex_main", - .pcPixelShaderEntryFunc = "fragment_main", - #else - .pcVertexShader = "lighting.vert.spv", - .pcPixelShader = "lighting.frag.spv", - .pcVertexShaderEntryFunc = "main", - .pcPixelShaderEntryFunc = "main", - #endif - .tGraphicsState = { - .ulDepthWriteEnabled = 0, - .ulDepthMode = PL_COMPARE_MODE_ALWAYS, - .ulCullMode = PL_CULL_MODE_NONE, - .ulStencilMode = PL_COMPARE_MODE_ALWAYS, - .ulStencilRef = 0xff, - .ulStencilMask = 0xff, - .ulStencilOpFail = PL_STENCIL_OP_KEEP, - .ulStencilOpDepthFail = PL_STENCIL_OP_KEEP, - .ulStencilOpPass = PL_STENCIL_OP_KEEP - }, - .tVertexBufferBinding = { - .uByteStride = 12, - .atAttributes = { {.uByteOffset = 0, .tFormat = PL_FORMAT_R32G32B32_FLOAT}} - }, - .atBlendStates = { - pl__get_blend_state(PL_BLEND_MODE_OPAQUE) - }, - .uConstantCount = 1, - .pTempConstantData = aiLightingConstantData, - .uBlendStateCount = 1, - .uSubpassIndex = 1, - .tRenderPassLayout = gptData->tRenderPassLayout, - .uBindGroupLayoutCount = 3, - .atBindGroupLayouts = { - { - .uBufferBindingCount = 3, - .aBufferBindings = { - { - .tType = PL_BUFFER_BINDING_TYPE_UNIFORM, - .uSlot = 0, - .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL - }, - { - .tType = PL_BUFFER_BINDING_TYPE_STORAGE, - .uSlot = 1, - .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL - }, - { - .tType = PL_BUFFER_BINDING_TYPE_STORAGE, - .uSlot = 2, - .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL - }, - }, - .uSamplerBindingCount = 2, - .atSamplerBindings = { - {.uSlot = 3, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL}, - {.uSlot = 4, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL} - }, - .uTextureBindingCount = 3, - .atTextureBindings = { - {.uSlot = 5, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED, .uDescriptorCount = 1}, - {.uSlot = 6, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED, .uDescriptorCount = 1}, - {.uSlot = 7, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED, .uDescriptorCount = 1} - } - }, - { - .uTextureBindingCount = 6, - .atTextureBindings = { - { .uSlot = 0, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, - { .uSlot = 1, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, - { .uSlot = 2, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, - { .uSlot = 3, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, - { .uSlot = 4, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, - { .uSlot = 5, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, - }, - }, - { - .uTextureBindingCount = 1, - .atTextureBindings = { - { .uSlot = 0, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED} - }, - } - } - }; - for(uint32_t i = 0; i < tLightingShaderDesc.uConstantCount; i++) - { - tLightingShaderDesc.atConstants[i].uID = i; - tLightingShaderDesc.atConstants[i].uOffset = i * sizeof(int); - tLightingShaderDesc.atConstants[i].tType = PL_DATA_TYPE_INT; - } - gptData->tLightingShader = gptDevice->create_shader(&ptGraphics->tDevice, &tLightingShaderDesc); } static uint32_t @@ -793,6 +676,11 @@ pl_refr_create_scene(void) ptScene->tSkyboxTexture = (plTextureHandle) {UINT32_MAX, UINT32_MAX}; ptScene->tSkyboxBindGroup = (plBindGroupHandle){UINT32_MAX, UINT32_MAX}; + // IBL defaults + ptScene->tGGXLUTTexture = (plTextureHandle) {UINT32_MAX, UINT32_MAX}; + ptScene->tGGXEnvTexture = (plTextureHandle) {UINT32_MAX, UINT32_MAX}; + ptScene->tLambertianEnvTexture = (plTextureHandle) {UINT32_MAX, UINT32_MAX}; + return uSceneHandle; } @@ -2006,6 +1894,11 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~CPU Buffers~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + int iSceneWideRenderingFlags = PL_RENDERING_FLAG_USE_PUNCTUAL; + int iSceneWideRenderingFlags = 0; + if(ptScene->tGGXEnvTexture.uIndex != UINT32_MAX) + iSceneWideRenderingFlags |= PL_RENDERING_FLAG_USE_IBL; + // fill CPU buffers & drawable list pl_begin_profile_sample("fill CPU opaque buffers"); const uint32_t uOpaqueDrawableCount = pl_sb_size(ptScene->sbtOpaqueDrawables); @@ -2026,10 +1919,11 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) // add data to global buffers pl__add_drawable_data_to_global_buffer(ptScene, i, ptScene->sbtOpaqueDrawables); - + pl__add_drawable_skin_data_to_global_buffer(ptScene, i, ptScene->sbtOpaqueDrawables); + // choose shader variant - int aiConstantData0[6] = {0}; - aiConstantData0[5] = PL_RENDERING_FLAG_USE_IBL | PL_RENDERING_FLAG_USE_PUNCTUAL; + int aiConstantData0[5] = {0}; + aiConstantData0[4] = iSceneWideRenderingFlags; aiConstantData0[0] = (int)ptMesh->ulVertexStreamMask; // iTextureMappingFlags flags @@ -2043,7 +1937,6 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) // iMaterialFlags flags aiConstantData0[3] |= PL_INFO_MATERIAL_METALLICROUGHNESS; - aiConstantData0[4] = (int)(ptMesh->tSkinComponent.uIndex != UINT32_MAX); int iFlagCopy0 = (int)ptMesh->ulVertexStreamMask; while(iFlagCopy0) { @@ -2067,30 +1960,6 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) }; ptScene->sbtOpaqueDrawables[i].uShader = pl__get_shader_variant(uSceneHandle, gptData->tOpaqueShader, &tVariant).uIndex; - - if(ptMesh->tSkinComponent.uIndex != UINT32_MAX) - { - - plSkinData tSkinData = {.tEntity = ptMesh->tSkinComponent}; - - plSkinComponent* ptSkinComponent = gptECS->get_component(&ptScene->tComponentLibrary, PL_COMPONENT_TYPE_SKIN, ptMesh->tSkinComponent); - unsigned int textureWidth = (unsigned int)ceilf(sqrtf((float)(pl_sb_size(ptSkinComponent->sbtJoints) * 8))); - pl_sb_resize(ptSkinComponent->sbtTextureData, textureWidth * textureWidth); - const plTextureDesc tSkinTextureDesc = { - .tDimensions = {(float)textureWidth, (float)textureWidth, 1}, - .tFormat = PL_FORMAT_R32G32B32A32_FLOAT, - .uLayers = 1, - .uMips = 1, - .tType = PL_TEXTURE_TYPE_2D, - .tUsage = PL_TEXTURE_USAGE_SAMPLED - }; - - for(uint32_t uFrameIndex = 0; uFrameIndex < PL_FRAMES_IN_FLIGHT; uFrameIndex++) - tSkinData.atDynamicTexture[uFrameIndex] = pl__refr_create_texture_with_data(&tSkinTextureDesc, "joint texture", uFrameIndex, ptSkinComponent->sbtTextureData, sizeof(float) * 4 * textureWidth * textureWidth); - - ptScene->sbtOpaqueDrawables[i].uSkinIndex = pl_sb_size(ptScene->sbtSkinData); - pl_sb_push(ptScene->sbtSkinData, tSkinData); - } } pl_end_profile_sample(); @@ -2113,10 +1982,11 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) // add data to global buffers pl__add_drawable_data_to_global_buffer(ptScene, i, ptScene->sbtTransparentDrawables); + pl__add_drawable_skin_data_to_global_buffer(ptScene, i, ptScene->sbtTransparentDrawables); // choose shader variant - int aiConstantData0[6] = {0}; - aiConstantData0[5] = PL_RENDERING_FLAG_USE_IBL | PL_RENDERING_FLAG_USE_PUNCTUAL; + int aiConstantData0[5] = {0}; + aiConstantData0[4] = iSceneWideRenderingFlags; aiConstantData0[0] = (int)ptMesh->ulVertexStreamMask; // iTextureMappingFlags flags @@ -2129,7 +1999,6 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) // iMaterialFlags flags aiConstantData0[3] |= PL_INFO_MATERIAL_METALLICROUGHNESS; - aiConstantData0[4] = (int)(ptMesh->tSkinComponent.uIndex != UINT32_MAX); int iFlagCopy0 = (int)ptMesh->ulVertexStreamMask; while(iFlagCopy0) { @@ -2153,30 +2022,6 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) }; ptScene->sbtTransparentDrawables[i].uShader = pl__get_shader_variant(uSceneHandle, gptData->tTransparentShader, &tVariant).uIndex; - - if(ptMesh->tSkinComponent.uIndex != UINT32_MAX) - { - - plSkinData tSkinData = {.tEntity = ptMesh->tSkinComponent}; - - plSkinComponent* ptSkinComponent = gptECS->get_component(&ptScene->tComponentLibrary, PL_COMPONENT_TYPE_SKIN, ptMesh->tSkinComponent); - unsigned int textureWidth = (unsigned int)ceilf(sqrtf((float)(pl_sb_size(ptSkinComponent->sbtJoints) * 8))); - pl_sb_resize(ptSkinComponent->sbtTextureData, textureWidth * textureWidth); - const plTextureDesc tSkinTextureDesc = { - .tDimensions = {(float)textureWidth, (float)textureWidth, 1}, - .tFormat = PL_FORMAT_R32G32B32A32_FLOAT, - .uLayers = 1, - .uMips = 1, - .tType = PL_TEXTURE_TYPE_2D, - .tUsage = PL_TEXTURE_USAGE_SAMPLED - }; - - for(uint32_t uFrameIndex = 0; uFrameIndex < PL_FRAMES_IN_FLIGHT; uFrameIndex++) - tSkinData.atDynamicTexture[uFrameIndex] = pl__refr_create_texture_with_data(&tSkinTextureDesc, "joint texture", uFrameIndex, ptSkinComponent->sbtTextureData, sizeof(float) * 4 * textureWidth * textureWidth); - - ptScene->sbtTransparentDrawables[i].uSkinIndex = pl_sb_size(ptScene->sbtSkinData); - pl_sb_push(ptScene->sbtSkinData, tSkinData); - } } pl_end_profile_sample(); @@ -2198,7 +2043,7 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) }; const plBufferDescription tVertexBufferDesc = { - .tUsage = PL_BUFFER_USAGE_VERTEX, + .tUsage = PL_BUFFER_USAGE_VERTEX | PL_BUFFER_USAGE_STORAGE, .uByteSize = sizeof(plVec3) * pl_sb_size(ptScene->sbtVertexPosBuffer) }; @@ -2207,10 +2052,143 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) .uByteSize = sizeof(plVec4) * pl_sb_size(ptScene->sbtVertexDataBuffer) }; - ptScene->tMaterialDataBuffer = pl__refr_create_local_buffer(&tShaderBufferDesc, "shader", uSceneHandle, ptScene->sbtMaterialBuffer); - ptScene->tIndexBuffer = pl__refr_create_local_buffer(&tIndexBufferDesc, "index", uSceneHandle, ptScene->sbuIndexBuffer); - ptScene->tVertexBuffer = pl__refr_create_local_buffer(&tVertexBufferDesc, "vertex", uSceneHandle, ptScene->sbtVertexPosBuffer); - ptScene->tStorageBuffer = pl__refr_create_local_buffer(&tStorageBufferDesc, "storage", uSceneHandle, ptScene->sbtVertexDataBuffer); + const plBufferDescription tSkinStorageBufferDesc = { + .tUsage = PL_BUFFER_USAGE_STORAGE, + .uByteSize = sizeof(plVec4) * pl_sb_size(ptScene->sbtSkinVertexDataBuffer) + }; + + ptScene->tMaterialDataBuffer = pl__refr_create_local_buffer(&tShaderBufferDesc, "shader", uSceneHandle, ptScene->sbtMaterialBuffer); + ptScene->tIndexBuffer = pl__refr_create_local_buffer(&tIndexBufferDesc, "index", uSceneHandle, ptScene->sbuIndexBuffer); + ptScene->tVertexBuffer = pl__refr_create_local_buffer(&tVertexBufferDesc, "vertex", uSceneHandle, ptScene->sbtVertexPosBuffer); + ptScene->tStorageBuffer = pl__refr_create_local_buffer(&tStorageBufferDesc, "storage", uSceneHandle, ptScene->sbtVertexDataBuffer); + + if(tSkinStorageBufferDesc.uByteSize > 0) + { + ptScene->tSkinStorageBuffer = pl__refr_create_local_buffer(&tSkinStorageBufferDesc, "skin storage", uSceneHandle, ptScene->sbtSkinVertexDataBuffer); + + const plBindGroupLayout tSkinBindGroupLayout0 = { + .uSamplerBindingCount = 1, + .atSamplerBindings = { + {.uSlot = 3, .tStages = PL_STAGE_COMPUTE} + }, + .uBufferBindingCount = 3, + .aBufferBindings = { + { .uSlot = 0, .tType = PL_BUFFER_BINDING_TYPE_STORAGE, .tStages = PL_STAGE_COMPUTE}, + { .uSlot = 1, .tType = PL_BUFFER_BINDING_TYPE_STORAGE, .tStages = PL_STAGE_COMPUTE}, + { .uSlot = 2, .tType = PL_BUFFER_BINDING_TYPE_STORAGE, .tStages = PL_STAGE_COMPUTE}, + } + }; + ptScene->tSkinBindGroup0 = gptDevice->create_bind_group(ptDevice, &tSkinBindGroupLayout0, "skin bind group 0"); + + const plBindGroupUpdateSamplerData atSamplerData[] = { + { .uSlot = 3, .tSampler = gptData->tDefaultSampler} + }; + const plBindGroupUpdateBufferData atBufferData[] = + { + { .uSlot = 0, .tBuffer = ptScene->tSkinStorageBuffer, .szBufferRange = tSkinStorageBufferDesc.uByteSize}, + { .uSlot = 1, .tBuffer = ptScene->tVertexBuffer, .szBufferRange = tVertexBufferDesc.uByteSize}, + { .uSlot = 2, .tBuffer = ptScene->tStorageBuffer, .szBufferRange = tStorageBufferDesc.uByteSize} + + }; + plBindGroupUpdateData tBGData0 = { + .uBufferCount = 3, + .atBuffers = atBufferData, + .uSamplerCount = 1, + .atSamplerBindings = atSamplerData, + }; + gptDevice->update_bind_group(&ptGraphics->tDevice, ptScene->tSkinBindGroup0, &tBGData0); + } + + // create lighting shader + int aiLightingConstantData[1] = {iSceneWideRenderingFlags}; + plShaderDescription tLightingShaderDesc = { + #ifdef PL_METAL_BACKEND + .pcVertexShader = "../shaders/metal/lighting.metal", + .pcPixelShader = "../shaders/metal/lighting.metal", + .pcVertexShaderEntryFunc = "vertex_main", + .pcPixelShaderEntryFunc = "fragment_main", + #else + .pcVertexShader = "lighting.vert.spv", + .pcPixelShader = "lighting.frag.spv", + .pcVertexShaderEntryFunc = "main", + .pcPixelShaderEntryFunc = "main", + #endif + .tGraphicsState = { + .ulDepthWriteEnabled = 0, + .ulDepthMode = PL_COMPARE_MODE_ALWAYS, + .ulCullMode = PL_CULL_MODE_NONE, + .ulStencilMode = PL_COMPARE_MODE_ALWAYS, + .ulStencilRef = 0xff, + .ulStencilMask = 0xff, + .ulStencilOpFail = PL_STENCIL_OP_KEEP, + .ulStencilOpDepthFail = PL_STENCIL_OP_KEEP, + .ulStencilOpPass = PL_STENCIL_OP_KEEP + }, + .tVertexBufferBinding = { + .uByteStride = 12, + .atAttributes = { {.uByteOffset = 0, .tFormat = PL_FORMAT_R32G32B32_FLOAT}} + }, + .atBlendStates = { + pl__get_blend_state(PL_BLEND_MODE_OPAQUE) + }, + .uConstantCount = 1, + .pTempConstantData = aiLightingConstantData, + .uBlendStateCount = 1, + .uSubpassIndex = 1, + .tRenderPassLayout = gptData->tRenderPassLayout, + .uBindGroupLayoutCount = 2, + .atBindGroupLayouts = { + { + .uBufferBindingCount = 3, + .aBufferBindings = { + { + .tType = PL_BUFFER_BINDING_TYPE_UNIFORM, + .uSlot = 0, + .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL + }, + { + .tType = PL_BUFFER_BINDING_TYPE_STORAGE, + .uSlot = 1, + .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL + }, + { + .tType = PL_BUFFER_BINDING_TYPE_STORAGE, + .uSlot = 2, + .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL + }, + }, + .uSamplerBindingCount = 2, + .atSamplerBindings = { + {.uSlot = 3, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL}, + {.uSlot = 4, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL} + }, + .uTextureBindingCount = 3, + .atTextureBindings = { + {.uSlot = 5, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED, .uDescriptorCount = 1}, + {.uSlot = 6, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED, .uDescriptorCount = 1}, + {.uSlot = 7, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED, .uDescriptorCount = 1} + } + }, + { + .uTextureBindingCount = 6, + .atTextureBindings = { + { .uSlot = 0, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, + { .uSlot = 1, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, + { .uSlot = 2, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, + { .uSlot = 3, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, + { .uSlot = 4, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, + { .uSlot = 5, .tStages = PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_INPUT_ATTACHMENT}, + }, + } + } + }; + for(uint32_t i = 0; i < tLightingShaderDesc.uConstantCount; i++) + { + tLightingShaderDesc.atConstants[i].uID = i; + tLightingShaderDesc.atConstants[i].uOffset = i * sizeof(int); + tLightingShaderDesc.atConstants[i].tType = PL_DATA_TYPE_INT; + } + ptScene->tLightingShader = gptDevice->create_shader(&ptGraphics->tDevice, &tLightingShaderDesc); pl_end_profile_sample(); } @@ -2230,7 +2208,7 @@ pl_refr_run_ecs(uint32_t uSceneHandle) } static void -pl_refr_update_scene(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle) +pl_refr_update_skin_textures(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle) { pl_begin_profile_sample(__FUNCTION__); plGraphics* ptGraphics = &gptData->tGraphics; @@ -2245,7 +2223,7 @@ pl_refr_update_scene(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle) plBindGroupLayout tBindGroupLayout1 = { .uTextureBindingCount = 1, .atTextureBindings = { - {.uSlot = 0, .tStages = PL_STAGE_VERTEX | PL_STAGE_PIXEL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED} + {.uSlot = 0, .tStages = PL_STAGE_ALL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED} } }; ptScene->sbtSkinData[i].tTempBindGroup = gptDevice->get_temporary_bind_group(ptDevice, &tBindGroupLayout1, "skin temporary bind group"); @@ -2271,6 +2249,56 @@ pl_refr_update_scene(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle) gptGfx->copy_buffer_to_texture(&tBlitEncoder, gptData->tStagingBufferHandle[ptGraphics->uCurrentFrameIndex], ptScene->sbtSkinData[i].atDynamicTexture[ptGraphics->uCurrentFrameIndex], 1, &tBufferImageCopy); } gptGfx->end_blit_pass(&tBlitEncoder); + + pl_end_profile_sample(); +} + +static void +pl_refr_perform_skinning(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle) +{ + pl_begin_profile_sample(__FUNCTION__); + plGraphics* ptGraphics = &gptData->tGraphics; + plDevice* ptDevice = &ptGraphics->tDevice; + plRefScene* ptScene = &gptData->sbtScenes[uSceneHandle]; + + // update skin textures + const uint32_t uSkinCount = pl_sb_size(ptScene->sbtSkinData); + + typedef struct _SkinDynamicData + { + int iSourceDataOffset; + int iDestDataOffset; + int iDestVertexOffset; + int iUnused; + } SkinDynamicData; + + plComputeEncoder tComputeEncoder = gptGfx->begin_compute_pass(ptGraphics, &tCommandBuffer); + + for(uint32_t i = 0; i < uSkinCount; i++) + { + plDynamicBinding tDynamicBinding = gptDevice->allocate_dynamic_data(ptDevice, sizeof(SkinDynamicData)); + SkinDynamicData* ptDynamicData = (SkinDynamicData*)tDynamicBinding.pcData; + ptDynamicData->iSourceDataOffset = ptScene->sbtSkinData[i].iSourceDataOffset; + ptDynamicData->iDestDataOffset = ptScene->sbtSkinData[i].iDestDataOffset; + ptDynamicData->iDestVertexOffset = ptScene->sbtSkinData[i].iDestVertexOffset; + + const plDispatch tDispach = { + .uGroupCountX = ptScene->sbtSkinData[i].uVertexCount, + .uGroupCountY = 1, + .uGroupCountZ = 1, + .uThreadPerGroupX = 1, + .uThreadPerGroupY = 1, + .uThreadPerGroupZ = 1 + }; + const plBindGroupHandle atBindGroups[] = { + ptScene->tSkinBindGroup0, + ptScene->sbtSkinData[i].tTempBindGroup + }; + gptGfx->bind_compute_bind_groups(&tComputeEncoder, ptScene->sbtSkinData[i].tShader, 0, 2, atBindGroups, &tDynamicBinding); + gptGfx->bind_compute_shader(&tComputeEncoder, ptScene->sbtSkinData[i].tShader); + gptGfx->dispatch(&tComputeEncoder, 1, &tDispach); + } + gptGfx->end_compute_pass(&tComputeEncoder); pl_end_profile_sample(); } @@ -2447,17 +2475,17 @@ pl_refr_render_scene(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle, uint }; const plBindGroupUpdateTextureData tTextureData[] = { { - .tTexture = ptScene->tLambertianEnvTexture, + .tTexture = ptScene->tLambertianEnvTexture.uIndex != UINT32_MAX ? ptScene->tLambertianEnvTexture : gptData->tDummyTextureCube, .uSlot = 5, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED }, { - .tTexture = ptScene->tGGXEnvTexture, + .tTexture = ptScene->tGGXEnvTexture.uIndex != UINT32_MAX ? ptScene->tGGXEnvTexture : gptData->tDummyTextureCube, .uSlot = 6, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED }, { - .tTexture = ptScene->tGGXLUTTexture, + .tTexture = ptScene->tGGXLUTTexture.uIndex != UINT32_MAX ? ptScene->tGGXLUTTexture : gptData->tDummyTexture, .uSlot = 7, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED }, @@ -2527,7 +2555,7 @@ pl_refr_render_scene(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle, uint .uIndexOffset = tDrawable.uIndexOffset, .uTriangleCount = tDrawable.uIndexCount == 0 ? tDrawable.uVertexCount / 3 : tDrawable.uIndexCount / 3, .uBindGroup1 = tDrawable.tMaterialBindGroup.uIndex, - .uBindGroup2 = tDrawable.uSkinIndex == UINT32_MAX ? gptData->tNullSkinBindgroup.uIndex : ptScene->sbtSkinData[tDrawable.uSkinIndex].tTempBindGroup.uIndex, + .uBindGroup2 = UINT32_MAX, .uDynamicBufferOffset = tDynamicBinding.uByteOffset, .uInstanceStart = 0, .uInstanceCount = 1 @@ -2550,14 +2578,14 @@ pl_refr_render_scene(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle, uint gptGfx->add_to_stream(ptStream, (plStreamDraw) { - .uShaderVariant = gptData->tLightingShader.uIndex, + .uShaderVariant = ptScene->tLightingShader.uIndex, .uDynamicBuffer = tLightingDynamicData.uBufferHandle, .uVertexBuffer = ptScene->tVertexBuffer.uIndex, .uIndexBuffer = ptScene->tIndexBuffer.uIndex, .uIndexOffset = ptScene->tLightingDrawable.uIndexOffset, .uTriangleCount = 2, .uBindGroup1 = ptView->tLightingBindGroup[ptGraphics->uCurrentFrameIndex].uIndex, - .uBindGroup2 = gptData->tNullSkinBindgroup.uIndex, + .uBindGroup2 = UINT32_MAX, .uDynamicBufferOffset = tLightingDynamicData.uByteOffset, .uInstanceStart = 0, .uInstanceCount = 1 @@ -2585,6 +2613,7 @@ pl_refr_render_scene(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle, uint .uIndexOffset = ptScene->tSkyboxDrawable.uIndexOffset, .uTriangleCount = ptScene->tSkyboxDrawable.uIndexCount / 3, .uBindGroup1 = ptScene->tSkyboxBindGroup.uIndex, + .uBindGroup2 = UINT32_MAX, .uDynamicBufferOffset = tSkyboxDynamicData.uByteOffset, .uInstanceStart = 0, .uInstanceCount = 1 @@ -2619,7 +2648,7 @@ pl_refr_render_scene(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle, uint .uIndexOffset = tDrawable.uIndexOffset, .uTriangleCount = tDrawable.uIndexCount == 0 ? tDrawable.uVertexCount / 3 : tDrawable.uIndexCount / 3, .uBindGroup1 = tDrawable.tMaterialBindGroup.uIndex, - .uBindGroup2 = tDrawable.uSkinIndex == UINT32_MAX ? gptData->tNullSkinBindgroup.uIndex : ptScene->sbtSkinData[tDrawable.uSkinIndex].tTempBindGroup.uIndex, + .uBindGroup2 = UINT32_MAX, .uDynamicBufferOffset = tDynamicBinding.uByteOffset, .uInstanceStart = 0, .uInstanceCount = 1 @@ -2715,6 +2744,193 @@ pl_add_drawable_objects_to_scene(uint32_t uSceneHandle, uint32_t uOpaqueCount, c // [SECTION] internal API implementation //----------------------------------------------------------------------------- +static void +pl__add_drawable_skin_data_to_global_buffer(plRefScene* ptScene, uint32_t uDrawableIndex, plDrawable* atDrawables) +{ + plEntity tEntity = atDrawables[uDrawableIndex].tEntity; + + // get actual components + plObjectComponent* ptObject = gptECS->get_component(&ptScene->tComponentLibrary, PL_COMPONENT_TYPE_OBJECT, tEntity); + plMeshComponent* ptMesh = gptECS->get_component(&ptScene->tComponentLibrary, PL_COMPONENT_TYPE_MESH, ptObject->tMesh); + + if(ptMesh->tSkinComponent.uIndex == UINT32_MAX) + return; + + const uint32_t uVertexDataStartIndex = pl_sb_size(ptScene->sbtSkinVertexDataBuffer); + const uint32_t uVertexCount = pl_sb_size(ptMesh->sbtVertexPositions); + + // stride within storage buffer + uint32_t uStride = 0; + + uint64_t ulVertexStreamMask = 0; + + // calculate vertex stream mask based on provided data + if(pl_sb_size(ptMesh->sbtVertexPositions) > 0) { uStride += 1; ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_POSITION; } + if(pl_sb_size(ptMesh->sbtVertexNormals) > 0) { uStride += 1; ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_NORMAL; } + if(pl_sb_size(ptMesh->sbtVertexTangents) > 0) { uStride += 1; ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_TANGENT; } + if(pl_sb_size(ptMesh->sbtVertexWeights[0]) > 0) { uStride += 1; ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_0; } + if(pl_sb_size(ptMesh->sbtVertexWeights[1]) > 0) { uStride += 1; ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_1; } + if(pl_sb_size(ptMesh->sbtVertexJoints[0]) > 0) { uStride += 1; ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_JOINTS_0; } + if(pl_sb_size(ptMesh->sbtVertexJoints[1]) > 0) { uStride += 1; ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_JOINTS_1; } + + pl_sb_add_n(ptScene->sbtSkinVertexDataBuffer, uStride * uVertexCount); + + // current attribute offset + uint32_t uOffset = 0; + + // positions + const uint32_t uVertexPositionCount = pl_sb_size(ptMesh->sbtVertexPositions); + for(uint32_t i = 0; i < uVertexPositionCount; i++) + { + const plVec3* ptPosition = &ptMesh->sbtVertexPositions[i]; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride].x = ptPosition->x; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride].y = ptPosition->y; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride].z = ptPosition->z; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride].w = 1.0f; + } + + if(uVertexPositionCount > 0) + uOffset += 1; + + // normals + const uint32_t uVertexNormalCount = pl_sb_size(ptMesh->sbtVertexNormals); + for(uint32_t i = 0; i < uVertexNormalCount; i++) + { + ptMesh->sbtVertexNormals[i] = pl_norm_vec3(ptMesh->sbtVertexNormals[i]); + const plVec3* ptNormal = &ptMesh->sbtVertexNormals[i]; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].x = ptNormal->x; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].y = ptNormal->y; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].z = ptNormal->z; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].w = 0.0f; + } + + if(uVertexNormalCount > 0) + uOffset += 1; + + // tangents + const uint32_t uVertexTangentCount = pl_sb_size(ptMesh->sbtVertexTangents); + for(uint32_t i = 0; i < uVertexTangentCount; i++) + { + const plVec4* ptTangent = &ptMesh->sbtVertexTangents[i]; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].x = ptTangent->x; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].y = ptTangent->y; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].z = ptTangent->z; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].w = ptTangent->w; + } + + if(uVertexTangentCount > 0) + uOffset += 1; + + // joints 0 + const uint32_t uVertexJoint0Count = pl_sb_size(ptMesh->sbtVertexJoints[0]); + for(uint32_t i = 0; i < uVertexJoint0Count; i++) + { + const plVec4* ptJoint = &ptMesh->sbtVertexJoints[0][i]; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].x = ptJoint->x; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].y = ptJoint->y; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].z = ptJoint->z; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].w = ptJoint->w; + } + + if(uVertexJoint0Count > 0) + uOffset += 1; + + // weights 0 + const uint32_t uVertexWeights0Count = pl_sb_size(ptMesh->sbtVertexWeights[0]); + for(uint32_t i = 0; i < uVertexWeights0Count; i++) + { + const plVec4* ptWeight = &ptMesh->sbtVertexWeights[0][i]; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].x = ptWeight->x; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].y = ptWeight->y; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].z = ptWeight->z; + ptScene->sbtSkinVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].w = ptWeight->w; + } + + if(uVertexWeights0Count > 0) + uOffset += 1; + + PL_ASSERT(uOffset == uStride && "sanity check"); + + // stride within storage buffer + uint32_t uDestStride = 0; + + // calculate vertex stream mask based on provided data + if(pl_sb_size(ptMesh->sbtVertexNormals) > 0) { uDestStride += 1; } + if(pl_sb_size(ptMesh->sbtVertexTangents) > 0) { uDestStride += 1; } + if(pl_sb_size(ptMesh->sbtVertexColors[0]) > 0) { uDestStride += 1; } + if(pl_sb_size(ptMesh->sbtVertexColors[1]) > 0) { uDestStride += 1; } + if(pl_sb_size(ptMesh->sbtVertexTextureCoordinates[0]) > 0) { uDestStride += 1; } + if(pl_sb_size(ptMesh->sbtVertexTextureCoordinates[1]) > 0) { uDestStride += 1; } + + plSkinData tSkinData = { + .tEntity = ptMesh->tSkinComponent, + .uVertexCount = uVertexCount, + .iSourceDataOffset = uVertexDataStartIndex, + .iDestDataOffset = atDrawables[uDrawableIndex].uDataOffset, + .iDestVertexOffset = atDrawables[uDrawableIndex].uVertexOffset, + }; + + plSkinComponent* ptSkinComponent = gptECS->get_component(&ptScene->tComponentLibrary, PL_COMPONENT_TYPE_SKIN, ptMesh->tSkinComponent); + unsigned int textureWidth = (unsigned int)ceilf(sqrtf((float)(pl_sb_size(ptSkinComponent->sbtJoints) * 8))); + pl_sb_resize(ptSkinComponent->sbtTextureData, textureWidth * textureWidth); + const plTextureDesc tSkinTextureDesc = { + .tDimensions = {(float)textureWidth, (float)textureWidth, 1}, + .tFormat = PL_FORMAT_R32G32B32A32_FLOAT, + .uLayers = 1, + .uMips = 1, + .tType = PL_TEXTURE_TYPE_2D, + .tUsage = PL_TEXTURE_USAGE_SAMPLED + }; + + for(uint32_t uFrameIndex = 0; uFrameIndex < PL_FRAMES_IN_FLIGHT; uFrameIndex++) + tSkinData.atDynamicTexture[uFrameIndex] = pl__refr_create_texture_with_data(&tSkinTextureDesc, "joint texture", uFrameIndex, ptSkinComponent->sbtTextureData, sizeof(float) * 4 * textureWidth * textureWidth); + + + + int aiSpecializationData[] = {(int)ulVertexStreamMask, (int)uStride, (int)ptMesh->ulVertexStreamMask, (int)uDestStride}; + const plComputeShaderDescription tComputeShaderDesc = { + #ifdef PL_METAL_BACKEND + .pcShader = "../shaders/metal/skinning.metal", + .pcShaderEntryFunc = "kernel_main", + #else + .pcShader = "skinning.comp.spv", + .pcShaderEntryFunc = "main", + #endif + .uConstantCount = 4, + .pTempConstantData = aiSpecializationData, + .atConstants = { + { .uID = 0, .uOffset = 0, .tType = PL_DATA_TYPE_INT}, + { .uID = 1, .uOffset = sizeof(int), .tType = PL_DATA_TYPE_INT}, + { .uID = 2, .uOffset = 2 * sizeof(int), .tType = PL_DATA_TYPE_INT}, + { .uID = 3, .uOffset = 3 * sizeof(int), .tType = PL_DATA_TYPE_INT} + }, + .uBindGroupLayoutCount = 2, + .atBindGroupLayouts = { + { + .uBufferBindingCount = 3, + .aBufferBindings = { + { .tType = PL_BUFFER_BINDING_TYPE_STORAGE, .uSlot = 0, .tStages = PL_STAGE_COMPUTE}, + { .tType = PL_BUFFER_BINDING_TYPE_STORAGE, .uSlot = 1, .tStages = PL_STAGE_COMPUTE}, + { .tType = PL_BUFFER_BINDING_TYPE_STORAGE, .uSlot = 2, .tStages = PL_STAGE_COMPUTE}, + }, + .uSamplerBindingCount = 1, + .atSamplerBindings = { + {.uSlot = 3, .tStages = PL_STAGE_COMPUTE} + } + }, + { + .uTextureBindingCount = 1, + .atTextureBindings = { + {.uSlot = 0, .tStages = PL_STAGE_ALL, .tType = PL_TEXTURE_BINDING_TYPE_SAMPLED} + } + } + } + }; + tSkinData.tShader = gptDevice->create_compute_shader(&gptData->tGraphics.tDevice, &tComputeShaderDesc); + atDrawables[uDrawableIndex].uSkinIndex = pl_sb_size(ptScene->sbtSkinData); + pl_sb_push(ptScene->sbtSkinData, tSkinData); +} + static void pl__add_drawable_data_to_global_buffer(plRefScene* ptScene, uint32_t uDrawableIndex, plDrawable* atDrawables) { @@ -2750,10 +2966,6 @@ pl__add_drawable_data_to_global_buffer(plRefScene* ptScene, uint32_t uDrawableIn if(pl_sb_size(ptMesh->sbtVertexTangents) > 0) { uStride += 1; ptMesh->ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_TANGENT; } if(pl_sb_size(ptMesh->sbtVertexColors[0]) > 0) { uStride += 1; ptMesh->ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_COLOR_0; } if(pl_sb_size(ptMesh->sbtVertexColors[1]) > 0) { uStride += 1; ptMesh->ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_COLOR_1; } - if(pl_sb_size(ptMesh->sbtVertexWeights[0]) > 0) { uStride += 1; ptMesh->ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_0; } - if(pl_sb_size(ptMesh->sbtVertexWeights[1]) > 0) { uStride += 1; ptMesh->ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_1; } - if(pl_sb_size(ptMesh->sbtVertexJoints[0]) > 0) { uStride += 1; ptMesh->ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_JOINTS_0; } - if(pl_sb_size(ptMesh->sbtVertexJoints[1]) > 0) { uStride += 1; ptMesh->ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_JOINTS_1; } if(pl_sb_size(ptMesh->sbtVertexTextureCoordinates[0]) > 0) { uStride += 1; ptMesh->ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_TEXCOORD_0; } if(pl_sb_size(ptMesh->sbtVertexTextureCoordinates[1]) > 0) { uStride += 1; ptMesh->ulVertexStreamMask |= PL_MESH_FORMAT_FLAG_HAS_TEXCOORD_1; } @@ -2820,34 +3032,6 @@ pl__add_drawable_data_to_global_buffer(plRefScene* ptScene, uint32_t uDrawableIn if(uVertexColorCount > 0) uOffset += 1; - // joints 0 - const uint32_t uVertexJoint0Count = pl_sb_size(ptMesh->sbtVertexJoints[0]); - for(uint32_t i = 0; i < uVertexJoint0Count; i++) - { - const plVec4* ptJoint = &ptMesh->sbtVertexJoints[0][i]; - ptScene->sbtVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].x = ptJoint->x; - ptScene->sbtVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].y = ptJoint->y; - ptScene->sbtVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].z = ptJoint->z; - ptScene->sbtVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].w = ptJoint->w; - } - - if(uVertexJoint0Count > 0) - uOffset += 1; - - // weights 0 - const uint32_t uVertexWeights0Count = pl_sb_size(ptMesh->sbtVertexWeights[0]); - for(uint32_t i = 0; i < uVertexWeights0Count; i++) - { - const plVec4* ptWeight = &ptMesh->sbtVertexWeights[0][i]; - ptScene->sbtVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].x = ptWeight->x; - ptScene->sbtVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].y = ptWeight->y; - ptScene->sbtVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].z = ptWeight->z; - ptScene->sbtVertexDataBuffer[uVertexDataStartIndex + i * uStride + uOffset].w = ptWeight->w; - } - - if(uVertexWeights0Count > 0) - uOffset += 1; - PL_ASSERT(uOffset == uStride && "sanity check"); plGPUMaterial tMaterial = { @@ -3558,7 +3742,8 @@ pl_load_ref_renderer_api(void) .get_graphics = pl_refr_get_graphics, .load_skybox_from_panorama = pl_refr_load_skybox_from_panorama, .finalize_scene = pl_refr_finalize_scene, - .update_scene = pl_refr_update_scene, + .update_skin_textures = pl_refr_update_skin_textures, + .perform_skinning = pl_refr_perform_skinning, .render_scene = pl_refr_render_scene, .get_view_texture_id = pl_refr_get_view_texture_id, .resize_view = pl_refr_resize_view, diff --git a/extensions/pl_ref_renderer_ext.h b/extensions/pl_ref_renderer_ext.h index 85d6b956..4358c756 100644 --- a/extensions/pl_ref_renderer_ext.h +++ b/extensions/pl_ref_renderer_ext.h @@ -81,7 +81,8 @@ typedef struct _plRefRendererI // per frame void (*run_ecs)(uint32_t uSceneHandle); - void (*update_scene)(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle); + void (*update_skin_textures)(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle); + void (*perform_skinning)(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle); void (*render_scene)(plCommandBuffer tCommandBuffer, uint32_t uSceneHandle, uint32_t uViewHandle, plViewOptions tOptions); // misc diff --git a/scripts/gen_build.py b/scripts/gen_build.py index 9d6cf9cf..12a8192b 100644 --- a/scripts/gen_build.py +++ b/scripts/gen_build.py @@ -157,6 +157,7 @@ def add_plugin_to_metal_app(name, reloadable, objc = False, binary_name = None): "draw_3d_line.vert", "panorama_to_cubemap.comp", "filter_environment.comp", + "skinning.comp" ] metal_shaders = [ @@ -167,7 +168,8 @@ def add_plugin_to_metal_app(name, reloadable, objc = False, binary_name = None): "lighting.metal", "skybox.metal", "filter_environment.metal", - "transparent.metal" + "transparent.metal", + "skinning.metal" ] with pl.target("app", pl.TargetType.DYNAMIC_LIBRARY, True): diff --git a/shaders/glsl/gbuffer_common.glsl b/shaders/glsl/gbuffer_common.glsl index 4158b487..1506dd7e 100644 --- a/shaders/glsl/gbuffer_common.glsl +++ b/shaders/glsl/gbuffer_common.glsl @@ -111,17 +111,11 @@ layout(set = 1, binding = 9) uniform texture2D tIridescenceThicknessTexture; layout(set = 1, binding = 10) uniform texture2D tSpecularTexture; layout(set = 1, binding = 11) uniform texture2D tSpecularColorTexture; -//----------------------------------------------------------------------------- -// [SECTION] bind group 2 -//----------------------------------------------------------------------------- - -layout(set = 2, binding = 0) uniform texture2D tSkinningSampler; - //----------------------------------------------------------------------------- // [SECTION] dynamic bind group //----------------------------------------------------------------------------- -layout(set = 3, binding = 0) uniform _plObjectInfo +layout(set = 2, binding = 0) uniform _plObjectInfo { int iDataOffset; int iVertexOffset; @@ -137,8 +131,7 @@ layout(constant_id = 0) const int iMeshVariantFlags = 0; layout(constant_id = 1) const int iDataStride = 0; layout(constant_id = 2) const int iTextureMappingFlags = 0; layout(constant_id = 3) const int iMaterialFlags = 0; -layout(constant_id = 4) const int iUseSkinning = 0; -layout(constant_id = 5) const int iRenderingFlags = 0; +layout(constant_id = 4) const int iRenderingFlags = 0; //----------------------------------------------------------------------------- // [SECTION] defines diff --git a/shaders/glsl/lighting.frag b/shaders/glsl/lighting.frag index 9ec85774..2bee466f 100644 --- a/shaders/glsl/lighting.frag +++ b/shaders/glsl/lighting.frag @@ -111,9 +111,7 @@ layout(input_attachment_index = 4, set = 1, binding = 3) uniform subpassInput t layout(input_attachment_index = 5, set = 1, binding = 4) uniform subpassInput tAOMetalRoughnessTexture; layout(input_attachment_index = 0, set = 1, binding = 5) uniform subpassInput tDepthSampler; -layout(set = 2, binding = 0) uniform texture2D tSkinningSampler; - -layout(set = 3, binding = 0) uniform _plObjectInfo +layout(set = 2, binding = 0) uniform _plObjectInfo { int iDataOffset; int iVertexOffset; diff --git a/shaders/glsl/lighting.vert b/shaders/glsl/lighting.vert index cb8d723d..8deaf455 100644 --- a/shaders/glsl/lighting.vert +++ b/shaders/glsl/lighting.vert @@ -103,9 +103,7 @@ layout (set = 0, binding = 5) uniform textureCube u_LambertianEnvSampler; layout (set = 0, binding = 6) uniform textureCube u_GGXEnvSampler; layout (set = 0, binding = 7) uniform texture2D u_GGXLUT; -layout(set = 2, binding = 0) uniform texture2D tSkinningSampler; - -layout(set = 3, binding = 0) uniform _plObjectInfo +layout(set = 2, binding = 0) uniform _plObjectInfo { int iDataOffset; int iVertexOffset; diff --git a/shaders/glsl/primitive.vert b/shaders/glsl/primitive.vert index a36de3b2..08b020eb 100644 --- a/shaders/glsl/primitive.vert +++ b/shaders/glsl/primitive.vert @@ -16,70 +16,6 @@ layout(location = 0) out struct plShaderOut { #include "gbuffer_common.glsl" -mat4 get_matrix_from_texture(texture2D s, int index) -{ - mat4 result = mat4(1); - int texSize = textureSize(sampler2D(s, tDefaultSampler), 0)[0]; - int pixelIndex = index * 4; - for (int i = 0; i < 4; ++i) - { - int x = (pixelIndex + i) % texSize; - //Rounding mode of integers is undefined: - //https://www.khronos.org/registry/OpenGL/specs/es/3.0/GLSL_ES_Specification_3.00.pdf (section 12.33) - int y = (pixelIndex + i - x) / texSize; - result[i] = texelFetch(sampler2D(s, tDefaultSampler), ivec2(x,y), 0); - } - return result; -} - -mat4 get_skinning_matrix(vec4 inJoints0, vec4 inWeights0) -{ - mat4 skin = mat4(0); - - skin += - inWeights0.x * get_matrix_from_texture(tSkinningSampler, int(inJoints0.x) * 2) + - inWeights0.y * get_matrix_from_texture(tSkinningSampler, int(inJoints0.y) * 2) + - inWeights0.z * get_matrix_from_texture(tSkinningSampler, int(inJoints0.z) * 2) + - inWeights0.w * get_matrix_from_texture(tSkinningSampler, int(inJoints0.w) * 2); - - if (skin == mat4(0)) { - return mat4(1); - } - return skin; -} - -vec4 get_position(vec4 inJoints0, vec4 inWeights0) -{ - vec4 pos = vec4(inPos, 1.0); - - if(bool(iUseSkinning)) - { - pos = get_skinning_matrix(inJoints0, inWeights0) * pos; - } - - return pos; -} - -vec3 get_normal(vec3 inNormal, vec4 inJoints0, vec4 inWeights0) -{ - vec3 tNormal = inNormal; - if(bool(iUseSkinning)) - { - tNormal = mat3(get_skinning_matrix(inJoints0, inWeights0)) * tNormal; - } - return normalize(tNormal); -} - -vec3 get_tangent(vec4 inTangent, vec4 inJoints0, vec4 inWeights0) -{ - vec3 tTangent = inTangent.xyz; - if(bool(iUseSkinning)) - { - tTangent = mat3(get_skinning_matrix(inJoints0, inWeights0)) * tTangent; - } - return normalize(tTangent); -} - void main() { @@ -91,10 +27,6 @@ main() vec2 inTexCoord1 = vec2(0.0, 0.0); vec4 inColor0 = vec4(1.0, 1.0, 1.0, 1.0); vec4 inColor1 = vec4(0.0, 0.0, 0.0, 0.0); - vec4 inJoints0 = vec4(0.0, 0.0, 0.0, 0.0); - vec4 inJoints1 = vec4(0.0, 0.0, 0.0, 0.0); - vec4 inWeights0 = vec4(0.0, 0.0, 0.0, 0.0); - vec4 inWeights1 = vec4(0.0, 0.0, 0.0, 0.0); int iCurrentAttribute = 0; // offset = offset into current mesh + offset into global buffer @@ -107,26 +39,22 @@ main() if(bool(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TEXCOORD_1)){ inTexCoord1 = tVertexBuffer.atVertexData[iVertexDataOffset + iCurrentAttribute].xy; iCurrentAttribute++;} if(bool(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_COLOR_0)) { inColor0 = tVertexBuffer.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} if(bool(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_COLOR_1)) { inColor1 = tVertexBuffer.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(bool(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_JOINTS_0)) { inJoints0 = tVertexBuffer.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(bool(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_JOINTS_1)) { inJoints1 = tVertexBuffer.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(bool(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_0)) { inWeights0 = tVertexBuffer.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(bool(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_1)) { inWeights1 = tVertexBuffer.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - tShaderIn.tWorldNormal = mat3(tObjectInfo.tModel) * get_normal(inNormal, inJoints0, inWeights0); + tShaderIn.tWorldNormal = mat3(tObjectInfo.tModel) * normalize(inNormal); if(bool(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_NORMAL)) { if(bool(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TANGENT)) { - vec3 tangent = get_tangent(inTangent, inJoints0, inWeights0); + vec3 tangent = normalize(inTangent.xyz); vec3 WorldTangent = mat3(tObjectInfo.tModel) * tangent; - vec3 WorldBitangent = cross(get_normal(inNormal, inJoints0, inWeights0), tangent) * inTangent.w; + vec3 WorldBitangent = cross(normalize(inNormal), tangent) * inTangent.w; WorldBitangent = mat3(tObjectInfo.tModel) * WorldBitangent; tShaderIn.tTBN = mat3(WorldTangent, WorldBitangent, tShaderIn.tWorldNormal); } } - vec4 pos = tObjectInfo.tModel * get_position(inJoints0, inWeights0); + vec4 pos = tObjectInfo.tModel * inPosition; tShaderIn.tPosition = pos.xyz / pos.w; gl_Position = tGlobalInfo.tCameraViewProjection * pos; tShaderIn.tUV[0] = inTexCoord0; diff --git a/shaders/glsl/skinning.comp b/shaders/glsl/skinning.comp new file mode 100644 index 00000000..64adc87f --- /dev/null +++ b/shaders/glsl/skinning.comp @@ -0,0 +1,146 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : enable + +//----------------------------------------------------------------------------- +// [SECTION] specialication constants +//----------------------------------------------------------------------------- + +layout(constant_id = 0) const int iSourceMeshVariantFlags = 0; +layout(constant_id = 1) const int iSourceDataStride = 0; +layout(constant_id = 2) const int iDestMeshVariantFlags = 0; +layout(constant_id = 3) const int iDestDataStride = 0; + +//----------------------------------------------------------------------------- +// [SECTION] bind group 0 +//----------------------------------------------------------------------------- + +layout(std140, set = 0, binding = 0) readonly buffer _tInputDataBuffer +{ + vec4 atVertexData[]; +} tInputDataBuffer; + +layout(std140, set = 0, binding = 1, scalar) buffer _tOutputPosBuffer +{ + vec3 atVertexData[]; +} tOutputPosBuffer; + +layout(std140, set = 0, binding = 2) buffer _tOutputDataBuffer +{ + vec4 atVertexData[]; +} tOutputDataBuffer; + + +layout(set = 0, binding = 3) uniform sampler tSampler; + +//----------------------------------------------------------------------------- +// [SECTION] bind group 1 +//----------------------------------------------------------------------------- + +layout(set = 1, binding = 0) uniform texture2D tSkinningTexture; + +//----------------------------------------------------------------------------- +// [SECTION] dynamic bind group +//----------------------------------------------------------------------------- + +layout(set = 2, binding = 0) uniform _plObjectInfo +{ + int iSourceDataOffset; + int iDestDataOffset; + int iDestVertexOffset; + int iUnused; +} tObjectInfo; + +//----------------------------------------------------------------------------- +// [SECTION] helpers +//----------------------------------------------------------------------------- + +// iMeshVariantFlags +const int PL_MESH_FORMAT_FLAG_NONE = 0; +const int PL_MESH_FORMAT_FLAG_HAS_POSITION = 1 << 0; +const int PL_MESH_FORMAT_FLAG_HAS_NORMAL = 1 << 1; +const int PL_MESH_FORMAT_FLAG_HAS_TANGENT = 1 << 2; +const int PL_MESH_FORMAT_FLAG_HAS_TEXCOORD_0 = 1 << 3; +const int PL_MESH_FORMAT_FLAG_HAS_TEXCOORD_1 = 1 << 4; +const int PL_MESH_FORMAT_FLAG_HAS_COLOR_0 = 1 << 5; +const int PL_MESH_FORMAT_FLAG_HAS_COLOR_1 = 1 << 6; +const int PL_MESH_FORMAT_FLAG_HAS_JOINTS_0 = 1 << 7; +const int PL_MESH_FORMAT_FLAG_HAS_JOINTS_1 = 1 << 8; +const int PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_0 = 1 << 9; +const int PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_1 = 1 << 10; + +mat4 +get_matrix_from_texture(int index) +{ + mat4 result = mat4(1); + int texSize = textureSize(sampler2D(tSkinningTexture, tSampler), 0)[0]; + int pixelIndex = index * 4; + for (int i = 0; i < 4; ++i) + { + int x = (pixelIndex + i) % texSize; + //Rounding mode of integers is undefined: + //https://www.khronos.org/registry/OpenGL/specs/es/3.0/GLSL_ES_Specification_3.00.pdf (section 12.33) + int y = (pixelIndex + i - x) / texSize; + result[i] = texelFetch(sampler2D(tSkinningTexture, tSampler), ivec2(x,y), 0); + } + return result; +} + +mat4 +get_skinning_matrix(vec4 inJoints0, vec4 inWeights0) +{ + mat4 skin = mat4(0); + + skin += + inWeights0.x * get_matrix_from_texture(int(inJoints0.x) * 2) + + inWeights0.y * get_matrix_from_texture(int(inJoints0.y) * 2) + + inWeights0.z * get_matrix_from_texture(int(inJoints0.z) * 2) + + inWeights0.w * get_matrix_from_texture(int(inJoints0.w) * 2); + + if (skin == mat4(0)) { + return mat4(1); + } + return skin; +} + +//----------------------------------------------------------------------------- +// [SECTION] entry +//----------------------------------------------------------------------------- + +layout (local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +void +main() +{ + // const float fXCoord = gl_WorkGroupID.x * 16 + gl_LocalInvocationID.x; + // const float tYCoord = gl_WorkGroupID.y * 16 + gl_LocalInvocationID.y; + // const int iFace = int(gl_WorkGroupID.z * 3 + gl_LocalInvocationID.z); + + const uint iVertexIndex = gl_WorkGroupID.x; + + vec4 inPosition = vec4(0.0, 0.0, 0.0, 1.0); + vec3 inNormal = vec3(0.0, 0.0, 0.0); + vec4 inTangent = vec4(0.0, 0.0, 0.0, 0.0); + vec4 inJoints0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 inWeights0 = vec4(0.0, 0.0, 0.0, 0.0); + + const uint iSourceVertexDataOffset = iSourceDataStride * iVertexIndex + tObjectInfo.iSourceDataOffset; + int iCurrentAttribute = 0; + if(bool(iSourceMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_POSITION)) { inPosition.xyz = tInputDataBuffer.atVertexData[iSourceVertexDataOffset + iCurrentAttribute].xyz; iCurrentAttribute++;} + if(bool(iSourceMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_NORMAL)) { inNormal = tInputDataBuffer.atVertexData[iSourceVertexDataOffset + iCurrentAttribute].xyz; iCurrentAttribute++;} + if(bool(iSourceMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TANGENT)) { inTangent = tInputDataBuffer.atVertexData[iSourceVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} + if(bool(iSourceMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_JOINTS_0)) { inJoints0 = tInputDataBuffer.atVertexData[iSourceVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} + if(bool(iSourceMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_0)) { inWeights0 = tInputDataBuffer.atVertexData[iSourceVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} + + mat4 skin = get_skinning_matrix(inJoints0, inWeights0); + + vec4 outPosition = skin * inPosition; + vec3 outNormal = normalize(mat3(skin) * inNormal); + vec3 outTangent = normalize(mat3(skin) * inTangent.xyz); + + const uint iDestVertexDataOffset = iDestDataStride * iVertexIndex + tObjectInfo.iDestDataOffset; + iCurrentAttribute = 0; + tOutputPosBuffer.atVertexData[iVertexIndex + tObjectInfo.iDestVertexOffset] = outPosition.xyz; + if(bool(iDestMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_POSITION)){ iCurrentAttribute++;} + if(bool(iDestMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_NORMAL)) { tOutputDataBuffer.atVertexData[iDestVertexDataOffset + iCurrentAttribute].xyz = outNormal; iCurrentAttribute++;} + if(bool(iDestMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TANGENT)) { tOutputDataBuffer.atVertexData[iDestVertexDataOffset + iCurrentAttribute].xyz = outTangent; iCurrentAttribute++;} +} \ No newline at end of file diff --git a/shaders/metal/lighting.metal b/shaders/metal/lighting.metal index 236dd809..805b982c 100644 --- a/shaders/metal/lighting.metal +++ b/shaders/metal/lighting.metal @@ -122,15 +122,6 @@ struct BindGroup_1 texture2d tDepthTexture; }; -//----------------------------------------------------------------------------- -// [SECTION] bind group 2 -//----------------------------------------------------------------------------- - -struct BindGroup_2 -{ - texture2d tSkinningTexture; -}; - //----------------------------------------------------------------------------- // [SECTION] dynamic bind group //----------------------------------------------------------------------------- @@ -348,8 +339,7 @@ vertex VertexOut vertex_main( VertexIn in [[stage_in]], device const BindGroup_0& bg0 [[ buffer(1) ]], device const BindGroup_1& bg1 [[ buffer(2) ]], - device const BindGroup_2& bg2 [[ buffer(3) ]], - device const DynamicData& tObjectInfo [[ buffer(4) ]] + device const DynamicData& tObjectInfo [[ buffer(3) ]] ) { @@ -370,8 +360,7 @@ fragment float4 fragment_main( VertexOut in [[stage_in]], device const BindGroup_0& bg0 [[ buffer(1) ]], device const BindGroup_1& bg1 [[ buffer(2) ]], - device const BindGroup_2& bg2 [[ buffer(3) ]], - device const DynamicData& tObjectInfo [[ buffer(4) ]], + device const DynamicData& tObjectInfo [[ buffer(3) ]], bool front_facing [[front_facing]] ) { diff --git a/shaders/metal/primitive.metal b/shaders/metal/primitive.metal index fb104434..2126f1cc 100644 --- a/shaders/metal/primitive.metal +++ b/shaders/metal/primitive.metal @@ -11,8 +11,7 @@ constant int iMeshVariantFlags [[ function_constant(0) ]]; constant int iDataStride [[ function_constant(1) ]]; constant int iTextureMappingFlags [[ function_constant(2) ]]; constant int iMaterialFlags [[ function_constant(3) ]]; -constant int iUseSkinning [[ function_constant(4) ]]; -constant int iRenderingFlags [[ function_constant(5) ]]; +constant int iRenderingFlags [[ function_constant(4) ]]; //----------------------------------------------------------------------------- // [SECTION] defines & structs @@ -207,15 +206,6 @@ struct BindGroup_1 texture2d tSpecularColorTexture; }; -//----------------------------------------------------------------------------- -// [SECTION] bind group 2 -//----------------------------------------------------------------------------- - -struct BindGroup_2 -{ - texture2d tSkinningTexture; -}; - //----------------------------------------------------------------------------- // [SECTION] dynamic bind group //----------------------------------------------------------------------------- @@ -266,73 +256,6 @@ struct plMultipleRenderTargets // [SECTION] helpers //----------------------------------------------------------------------------- -float4x4 -get_matrix_from_texture(device const texture2d& s, int index) -{ - float4x4 result = float4x4(1); - int texSize = s.get_width(); - int pixelIndex = index * 4; - for (int i = 0; i < 4; ++i) - { - int x = (pixelIndex + i) % texSize; - //Rounding mode of integers is undefined: - //https://www.khronos.org/registry/OpenGL/specs/es/3.0/GLSL_ES_Specification_3.00.pdf (section 12.33) - int y = (pixelIndex + i - x) / texSize; - result[i] = s.read(uint2(x,y)); - } - return result; -} - -float4x4 -get_skinning_matrix(device const texture2d& s, float4 inJoints0, float4 inWeights0) -{ - float4x4 skin = float4x4(0); - - skin += - inWeights0.x * get_matrix_from_texture(s, int(inJoints0.x) * 2) + - inWeights0.y * get_matrix_from_texture(s, int(inJoints0.y) * 2) + - inWeights0.z * get_matrix_from_texture(s, int(inJoints0.z) * 2) + - inWeights0.w * get_matrix_from_texture(s, int(inJoints0.w) * 2); - - // if (skin == float4x4(0)) { - // return float4x4(1); - // } - return skin; -} - -float4 -get_position(device const texture2d& s, float3 inPos, float4 inJoints0, float4 inWeights0) -{ - float4 pos = float4(inPos, 1.0); - if(bool(iUseSkinning)) - { - pos = get_skinning_matrix(s, inJoints0, inWeights0) * pos; - } - return pos; -} - -float4 -get_normal(device const texture2d& s, float3 inNormal, float4 inJoints0, float4 inWeights0) -{ - float4 tNormal = float4(inNormal, 0.0); - if(bool(iUseSkinning)) - { - tNormal = get_skinning_matrix(s, inJoints0, inWeights0) * tNormal; - } - return fast::normalize(tNormal); -} - -float4 -get_tangent(device const texture2d& s, float4 inTangent, float4 inJoints0, float4 inWeights0) -{ - float4 tTangent = float4(inTangent.xyz, 0.0); - if(bool(iUseSkinning)) - { - tTangent = get_skinning_matrix(s, inJoints0, inWeights0) * tTangent; - } - return fast::normalize(tTangent); -} - NormalInfo pl_get_normal_info(device const BindGroup_0& bg0, device const BindGroup_1& bg1, VertexOut tShaderIn, bool front_facing) { @@ -475,50 +398,41 @@ vertex VertexOut vertex_main( VertexIn in [[stage_in]], device const BindGroup_0& bg0 [[ buffer(1) ]], device const BindGroup_1& bg1 [[ buffer(2) ]], - device const BindGroup_2& bg2 [[ buffer(3) ]], - device const DynamicData& tObjectInfo [[ buffer(4) ]] + device const DynamicData& tObjectInfo [[ buffer(3) ]] ) { VertexOut tShaderOut; - float3 inPosition = in.tPosition; - float3 inNormal = float3(0.0, 0.0, 0.0); + float4 inPosition = float4(in.tPosition, 1.0); + float4 inNormal = float4(0.0, 0.0, 0.0, 0.0); float4 inTangent = float4(0.0, 0.0, 0.0, 0.0); float2 inTexCoord0 = float2(0.0, 0.0); float2 inTexCoord1 = float2(0.0, 0.0); float4 inColor0 = float4(1.0, 1.0, 1.0, 1.0); float4 inColor1 = float4(0.0, 0.0, 0.0, 0.0); - float4 inJoints0 = float4(0.0, 0.0, 0.0, 0.0); - float4 inJoints1 = float4(0.0, 0.0, 0.0, 0.0); - float4 inWeights0 = float4(0.0, 0.0, 0.0, 0.0); - float4 inWeights1 = float4(0.0, 0.0, 0.0, 0.0); int iCurrentAttribute = 0; const uint iVertexDataOffset = iDataStride * (vertexID - tObjectInfo.iVertexOffset) + tObjectInfo.iDataOffset; if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_POSITION) { inPosition.xyz = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute].xyz; iCurrentAttribute++;} - if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_NORMAL) { inNormal = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute].xyz; iCurrentAttribute++;} + if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_NORMAL) { inNormal.xyz = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute].xyz; iCurrentAttribute++;} if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TANGENT) { inTangent = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TEXCOORD_0){ inTexCoord0 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute].xy; iCurrentAttribute++;} if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TEXCOORD_1){ inTexCoord1 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute].xy; iCurrentAttribute++;} if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_COLOR_0) { inColor0 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_COLOR_1) { inColor1 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_JOINTS_0) { inJoints0 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_JOINTS_1) { inJoints1 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_0) { inWeights0 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_1) { inWeights1 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - float4 tWorldNormal4 = tObjectInfo.tModel * get_normal(bg2.tSkinningTexture, inNormal, inJoints0, inWeights0); + float4 tWorldNormal4 = tObjectInfo.tModel * fast::normalize(inNormal); tShaderOut.tWorldNormal = tWorldNormal4.xyz; if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_NORMAL) { if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TANGENT) { - float4 tangent = get_tangent(bg2.tSkinningTexture, inTangent, inJoints0, inWeights0); + float4 tangent = fast::normalize(inTangent); float4 WorldTangent = tObjectInfo.tModel * tangent; - float4 WorldBitangent = float4(cross(get_normal(bg2.tSkinningTexture, inNormal, inJoints0, inWeights0).xyz, tangent.xyz) * inTangent.w, 0.0); + float4 WorldBitangent = float4(cross(fast::normalize(inNormal).xyz, tangent.xyz) * inTangent.w, 0.0); WorldBitangent = tObjectInfo.tModel * WorldBitangent; tShaderOut.tTBN0 = WorldTangent.xyz; tShaderOut.tTBN1 = WorldBitangent.xyz; @@ -526,7 +440,7 @@ vertex VertexOut vertex_main( } } - float4 pos = tObjectInfo.tModel * get_position(bg2.tSkinningTexture, inPosition, inJoints0, inWeights0); + float4 pos = tObjectInfo.tModel * inPosition; tShaderOut.tPosition = pos.xyz / pos.w; tShaderOut.tPositionOut = bg0.data->tCameraViewProjection * pos; tShaderOut.tUV = inTexCoord0; @@ -541,8 +455,7 @@ fragment plMultipleRenderTargets fragment_main( VertexOut in [[stage_in]], device const BindGroup_0& bg0 [[ buffer(1) ]], device const BindGroup_1& bg1 [[ buffer(2) ]], - device const BindGroup_2& bg2 [[ buffer(3) ]], - device const DynamicData& tObjectInfo [[ buffer(4) ]], + device const DynamicData& tObjectInfo [[ buffer(3) ]], bool front_facing [[front_facing]] ) { diff --git a/shaders/metal/skinning.metal b/shaders/metal/skinning.metal new file mode 100644 index 00000000..152265a0 --- /dev/null +++ b/shaders/metal/skinning.metal @@ -0,0 +1,146 @@ +#include +#include + +using namespace metal; + +//----------------------------------------------------------------------------- +// [SECTION] specialization constants +//----------------------------------------------------------------------------- + +constant int iSourceMeshVariantFlags [[ function_constant(0) ]]; +constant int iSourceDataStride [[ function_constant(1) ]]; +constant int iDestMeshVariantFlags [[ function_constant(2) ]]; +constant int iDestDataStride [[ function_constant(3) ]]; + +//----------------------------------------------------------------------------- +// [SECTION] defines & structs +//----------------------------------------------------------------------------- + +// iMeshVariantFlags +#define PL_MESH_FORMAT_FLAG_NONE 0 +#define PL_MESH_FORMAT_FLAG_HAS_POSITION 1 << 0 +#define PL_MESH_FORMAT_FLAG_HAS_NORMAL 1 << 1 +#define PL_MESH_FORMAT_FLAG_HAS_TANGENT 1 << 2 +#define PL_MESH_FORMAT_FLAG_HAS_TEXCOORD_0 1 << 3 +#define PL_MESH_FORMAT_FLAG_HAS_TEXCOORD_1 1 << 4 +#define PL_MESH_FORMAT_FLAG_HAS_COLOR_0 1 << 5 +#define PL_MESH_FORMAT_FLAG_HAS_COLOR_1 1 << 6 +#define PL_MESH_FORMAT_FLAG_HAS_JOINTS_0 1 << 7 +#define PL_MESH_FORMAT_FLAG_HAS_JOINTS_1 1 << 8 +#define PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_0 1 << 9 +#define PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_1 1 << 10 + +//----------------------------------------------------------------------------- +// [SECTION] bind group 0 +//----------------------------------------------------------------------------- + +struct BindGroup_0 +{ + device float4* atInputDataBuffer; + device packed_float3* atOutputPosBuffer; + device float4* atOutputDataBuffer; + sampler tSampler; +}; + +//----------------------------------------------------------------------------- +// [SECTION] bind group 1 +//----------------------------------------------------------------------------- + +struct BindGroup_1 +{ + texture2d tSkinningTexture; +}; + +//----------------------------------------------------------------------------- +// [SECTION] dynamic bind group +//----------------------------------------------------------------------------- + +struct DynamicData +{ + int iSourceDataOffset; + int iDestDataOffset; + int iDestVertexOffset; + int iUnused; +}; + + +//----------------------------------------------------------------------------- +// [SECTION] helpers +//----------------------------------------------------------------------------- + +float4x4 +get_matrix_from_texture(device const texture2d& s, int index) +{ + float4x4 result = float4x4(1); + int texSize = s.get_width(); + int pixelIndex = index * 4; + for (int i = 0; i < 4; ++i) + { + int x = (pixelIndex + i) % texSize; + //Rounding mode of integers is undefined: + //https://www.khronos.org/registry/OpenGL/specs/es/3.0/GLSL_ES_Specification_3.00.pdf (section 12.33) + int y = (pixelIndex + i - x) / texSize; + result[i] = s.read(uint2(x,y)); + } + return result; +} + +float4x4 +get_skinning_matrix(device const texture2d& s, float4 inJoints0, float4 inWeights0) +{ + float4x4 skin = float4x4(0); + + skin += + inWeights0.x * get_matrix_from_texture(s, int(inJoints0.x) * 2) + + inWeights0.y * get_matrix_from_texture(s, int(inJoints0.y) * 2) + + inWeights0.z * get_matrix_from_texture(s, int(inJoints0.z) * 2) + + inWeights0.w * get_matrix_from_texture(s, int(inJoints0.w) * 2); + + // if (skin == float4x4(0)) { + // return float4x4(1); + // } + return skin; +} + +//----------------------------------------------------------------------------- +// [SECTION] entry +//----------------------------------------------------------------------------- + +kernel void kernel_main( + device const BindGroup_0& bg0 [[ buffer(0) ]], + device const BindGroup_1& bg1 [[ buffer(1) ]], + device const DynamicData& tObjectInfo [[ buffer(2) ]], + uint3 tWorkGroup [[threadgroup_position_in_grid]], + uint3 tLocalIndex [[thread_position_in_threadgroup]] + ) +{ + + const uint iVertexIndex = tWorkGroup.x; + + float4 inPosition = float4(0.0, 0.0, 0.0, 1.0); + float4 inNormal = float4(0.0, 0.0, 0.0, 0.0); + float4 inTangent = float4(0.0, 0.0, 0.0, 0.0); + float4 inJoints0 = float4(0.0, 0.0, 0.0, 0.0); + float4 inWeights0 = float4(0.0, 0.0, 0.0, 0.0); + + const uint iSourceVertexDataOffset = iSourceDataStride * iVertexIndex + tObjectInfo.iSourceDataOffset; + int iCurrentAttribute = 0; + if(bool(iSourceMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_POSITION)) { inPosition.xyz = bg0.atInputDataBuffer[iSourceVertexDataOffset + iCurrentAttribute].xyz; iCurrentAttribute++;} + if(bool(iSourceMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_NORMAL)) { inNormal.xyz = bg0.atInputDataBuffer[iSourceVertexDataOffset + iCurrentAttribute].xyz; iCurrentAttribute++;} + if(bool(iSourceMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TANGENT)) { inTangent = bg0.atInputDataBuffer[iSourceVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} + if(bool(iSourceMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_JOINTS_0)) { inJoints0 = bg0.atInputDataBuffer[iSourceVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} + if(bool(iSourceMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_0)) { inWeights0 = bg0.atInputDataBuffer[iSourceVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} + + float4x4 skin = get_skinning_matrix(bg1.tSkinningTexture, inJoints0, inWeights0); + + float4 outPosition = skin * inPosition; + float3 outNormal = fast::normalize(skin * inNormal).xyz; + float3 outTangent = fast::normalize(skin * inTangent).xyz; + + const uint iDestVertexDataOffset = iDestDataStride * iVertexIndex + tObjectInfo.iDestDataOffset; + iCurrentAttribute = 0; + bg0.atOutputPosBuffer[iVertexIndex + tObjectInfo.iDestVertexOffset] = outPosition.xyz; + if(bool(iDestMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_POSITION)){ iCurrentAttribute++;} + if(bool(iDestMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_NORMAL)) { bg0.atOutputDataBuffer[iDestVertexDataOffset + iCurrentAttribute].xyz = outNormal; iCurrentAttribute++;} + if(bool(iDestMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TANGENT)) { bg0.atOutputDataBuffer[iDestVertexDataOffset + iCurrentAttribute].xyz = outTangent; iCurrentAttribute++;} +} \ No newline at end of file diff --git a/shaders/metal/transparent.metal b/shaders/metal/transparent.metal index 48137611..99e10e83 100644 --- a/shaders/metal/transparent.metal +++ b/shaders/metal/transparent.metal @@ -11,8 +11,7 @@ constant int iMeshVariantFlags [[ function_constant(0) ]]; constant int iDataStride [[ function_constant(1) ]]; constant int iTextureMappingFlags [[ function_constant(2) ]]; constant int iMaterialFlags [[ function_constant(3) ]]; -constant int iUseSkinning [[ function_constant(4) ]]; -constant int iRenderingFlags [[ function_constant(5) ]]; +constant int iRenderingFlags [[ function_constant(4) ]]; //----------------------------------------------------------------------------- // [SECTION] defines & structs @@ -190,11 +189,6 @@ struct BindGroup_1 texture2d tSpecularColorTexture; }; -struct BindGroup_2 -{ - texture2d tSkinningTexture; -}; - struct VertexIn { float3 tPosition [[attribute(0)]]; }; @@ -220,118 +214,46 @@ struct DynamicData float4x4 tModel; }; -float4x4 -get_matrix_from_texture(device const texture2d& s, int index) -{ - float4x4 result = float4x4(1); - int texSize = s.get_width(); - int pixelIndex = index * 4; - for (int i = 0; i < 4; ++i) - { - int x = (pixelIndex + i) % texSize; - //Rounding mode of integers is undefined: - //https://www.khronos.org/registry/OpenGL/specs/es/3.0/GLSL_ES_Specification_3.00.pdf (section 12.33) - int y = (pixelIndex + i - x) / texSize; - result[i] = s.read(uint2(x,y)); - } - return result; -} - -float4x4 get_skinning_matrix(device const texture2d& s, float4 inJoints0, float4 inWeights0) -{ - float4x4 skin = float4x4(0); - - skin += - inWeights0.x * get_matrix_from_texture(s, int(inJoints0.x) * 2) + - inWeights0.y * get_matrix_from_texture(s, int(inJoints0.y) * 2) + - inWeights0.z * get_matrix_from_texture(s, int(inJoints0.z) * 2) + - inWeights0.w * get_matrix_from_texture(s, int(inJoints0.w) * 2); - - // if (skin == float4x4(0)) { - // return float4x4(1); - // } - return skin; -} - -float4 get_position(device const texture2d& s, float3 inPos, float4 inJoints0, float4 inWeights0) -{ - float4 pos = float4(inPos, 1.0); - if(bool(iUseSkinning)) - { - pos = get_skinning_matrix(s, inJoints0, inWeights0) * pos; - } - return pos; -} - -float4 get_normal(device const texture2d& s, float3 inNormal, float4 inJoints0, float4 inWeights0) -{ - float4 tNormal = float4(inNormal, 0.0); - if(bool(iUseSkinning)) - { - tNormal = get_skinning_matrix(s, inJoints0, inWeights0) * tNormal; - } - return normalize(tNormal); -} - -float4 get_tangent(device const texture2d& s, float4 inTangent, float4 inJoints0, float4 inWeights0) -{ - float4 tTangent = float4(inTangent.xyz, 0.0); - if(bool(iUseSkinning)) - { - tTangent = get_skinning_matrix(s, inJoints0, inWeights0) * tTangent; - } - return normalize(tTangent); -} - vertex VertexOut vertex_main( uint vertexID [[ vertex_id ]], VertexIn in [[stage_in]], device const BindGroup_0& bg0 [[ buffer(1) ]], device const BindGroup_1& bg1 [[ buffer(2) ]], - device const BindGroup_2& bg2 [[ buffer(3) ]], - device const DynamicData& tObjectInfo [[ buffer(4) ]] + device const DynamicData& tObjectInfo [[ buffer(3) ]] ) { VertexOut tShaderOut; - float3 inPosition = in.tPosition; - float3 inNormal = float3(0.0, 0.0, 0.0); + float4 inPosition = float4(in.tPosition, 1.0); + float4 inNormal = float4(0.0, 0.0, 0.0, 0.0); float4 inTangent = float4(0.0, 0.0, 0.0, 0.0); float2 inTexCoord0 = float2(0.0, 0.0); float2 inTexCoord1 = float2(0.0, 0.0); float4 inColor0 = float4(1.0, 1.0, 1.0, 1.0); float4 inColor1 = float4(0.0, 0.0, 0.0, 0.0); - float4 inJoints0 = float4(0.0, 0.0, 0.0, 0.0); - float4 inJoints1 = float4(0.0, 0.0, 0.0, 0.0); - float4 inWeights0 = float4(0.0, 0.0, 0.0, 0.0); - float4 inWeights1 = float4(0.0, 0.0, 0.0, 0.0); int iCurrentAttribute = 0; const uint iVertexDataOffset = iDataStride * (vertexID - tObjectInfo.iVertexOffset) + tObjectInfo.iDataOffset; if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_POSITION) { inPosition.xyz = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute].xyz; iCurrentAttribute++;} - if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_NORMAL) { inNormal = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute].xyz; iCurrentAttribute++;} + if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_NORMAL) { inNormal.xyz = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute].xyz; iCurrentAttribute++;} if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TANGENT) { inTangent = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TEXCOORD_0){ inTexCoord0 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute].xy; iCurrentAttribute++;} if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TEXCOORD_1){ inTexCoord1 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute].xy; iCurrentAttribute++;} if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_COLOR_0) { inColor0 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_COLOR_1) { inColor1 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_JOINTS_0) { inJoints0 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_JOINTS_1) { inJoints1 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_0) { inWeights0 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_WEIGHTS_1) { inWeights1 = bg0.atVertexData[iVertexDataOffset + iCurrentAttribute]; iCurrentAttribute++;} - float4 tWorldNormal4 = tObjectInfo.tModel * get_normal(bg2.tSkinningTexture, inNormal, inJoints0, inWeights0); + float4 tWorldNormal4 = tObjectInfo.tModel * fast::normalize(inNormal); tShaderOut.tWorldNormal = tWorldNormal4.xyz; if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_NORMAL) { if(iMeshVariantFlags & PL_MESH_FORMAT_FLAG_HAS_TANGENT) { - float4 tangent = get_tangent(bg2.tSkinningTexture, inTangent, inJoints0, inWeights0); + float4 tangent = fast::normalize(inTangent); float4 WorldTangent = tObjectInfo.tModel * tangent; - float4 WorldBitangent = float4(cross(get_normal(bg2.tSkinningTexture, inNormal, inJoints0, inWeights0).xyz, tangent.xyz) * inTangent.w, 0.0); + float4 WorldBitangent = float4(cross(fast::normalize(inNormal).xyz, tangent.xyz) * inTangent.w, 0.0); WorldBitangent = tObjectInfo.tModel * WorldBitangent; tShaderOut.tTBN0 = WorldTangent.xyz; tShaderOut.tTBN1 = WorldBitangent.xyz; @@ -339,7 +261,7 @@ vertex VertexOut vertex_main( } } - float4 pos = tObjectInfo.tModel * get_position(bg2.tSkinningTexture, inPosition, inJoints0, inWeights0); + float4 pos = tObjectInfo.tModel * inPosition; tShaderOut.tPosition = pos.xyz / pos.w; tShaderOut.tPositionOut = bg0.data->tCameraViewProjection * pos; tShaderOut.tUV = inTexCoord0; @@ -673,8 +595,7 @@ fragment plRenderTargets fragment_main( VertexOut in [[stage_in]], device const BindGroup_0& bg0 [[ buffer(1) ]], device const BindGroup_1& bg1 [[ buffer(2) ]], - device const BindGroup_2& bg2 [[ buffer(3) ]], - device const DynamicData& tObjectInfo [[ buffer(4) ]], + device const DynamicData& tObjectInfo [[ buffer(3) ]], bool front_facing [[front_facing]] ) {