From e1a378e733cc8f81df5d6f7325db6843aad733b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Leclercq?= Date: Wed, 1 Dec 2021 10:40:47 +0100 Subject: [PATCH] Graphics/[SpriteChain|Submesh]Renderer: Fix material pass not breaking batching --- .../Nazara/Graphics/RenderQueueRegistry.hpp | 4 ++++ .../Nazara/Graphics/RenderQueueRegistry.inl | 13 +++++++++++++ include/Nazara/Graphics/RenderSpriteChain.inl | 15 ++++++++++----- include/Nazara/Graphics/RenderSubmesh.inl | 15 ++++++++++----- src/Nazara/Graphics/SpriteChainRenderer.cpp | 19 +++++++++++++------ src/Nazara/Graphics/SubmeshRenderer.cpp | 11 +++++++++-- 6 files changed, 59 insertions(+), 18 deletions(-) diff --git a/include/Nazara/Graphics/RenderQueueRegistry.hpp b/include/Nazara/Graphics/RenderQueueRegistry.hpp index 2f2e230f5..7c383e10f 100644 --- a/include/Nazara/Graphics/RenderQueueRegistry.hpp +++ b/include/Nazara/Graphics/RenderQueueRegistry.hpp @@ -13,6 +13,7 @@ namespace Nz { class AbstractBuffer; + class MaterialPass; class RenderPipeline; class VertexDeclaration; @@ -25,17 +26,20 @@ namespace Nz inline void Clear(); inline std::size_t FetchLayerIndex(int renderLayer) const; + inline std::size_t FetchMaterialPassIndex(const MaterialPass* materialPass) const; inline std::size_t FetchPipelineIndex(const RenderPipeline* pipeline) const; inline std::size_t FetchVertexBuffer(const AbstractBuffer* vertexBuffer) const; inline std::size_t FetchVertexDeclaration(const VertexDeclaration* vertexDeclaration) const; inline void RegisterLayer(int renderLayer); + inline void RegisterMaterialPass(const MaterialPass* materialPass); inline void RegisterPipeline(const RenderPipeline* pipeline); inline void RegisterVertexBuffer(const AbstractBuffer* vertexBuffer); inline void RegisterVertexDeclaration(const VertexDeclaration* vertexDeclaration); private: robin_hood::unordered_map m_renderLayerRegistry; + robin_hood::unordered_map m_materialPassRegistry; robin_hood::unordered_map m_pipelineRegistry; robin_hood::unordered_map m_vertexBufferRegistry; robin_hood::unordered_map m_vertexDeclarationRegistry; diff --git a/include/Nazara/Graphics/RenderQueueRegistry.inl b/include/Nazara/Graphics/RenderQueueRegistry.inl index 3525068fe..fde43e418 100644 --- a/include/Nazara/Graphics/RenderQueueRegistry.inl +++ b/include/Nazara/Graphics/RenderQueueRegistry.inl @@ -22,6 +22,14 @@ namespace Nz return it->second; } + inline std::size_t RenderQueueRegistry::FetchMaterialPassIndex(const MaterialPass* materialPass) const + { + auto it = m_materialPassRegistry.find(materialPass); + assert(it != m_materialPassRegistry.end()); + + return it->second; + } + inline std::size_t RenderQueueRegistry::FetchPipelineIndex(const RenderPipeline* pipeline) const { auto it = m_pipelineRegistry.find(pipeline); @@ -51,6 +59,11 @@ namespace Nz m_renderLayerRegistry.try_emplace(renderLayer, m_renderLayerRegistry.size()); } + inline void RenderQueueRegistry::RegisterMaterialPass(const MaterialPass* materialPass) + { + m_materialPassRegistry.try_emplace(materialPass, m_materialPassRegistry.size()); + } + inline void RenderQueueRegistry::RegisterPipeline(const RenderPipeline* pipeline) { m_pipelineRegistry.try_emplace(pipeline, m_pipelineRegistry.size()); diff --git a/include/Nazara/Graphics/RenderSpriteChain.inl b/include/Nazara/Graphics/RenderSpriteChain.inl index 1353316af..b483074ab 100644 --- a/include/Nazara/Graphics/RenderSpriteChain.inl +++ b/include/Nazara/Graphics/RenderSpriteChain.inl @@ -24,9 +24,6 @@ namespace Nz inline UInt64 RenderSpriteChain::ComputeSortingScore(const Frustumf& frustum, const RenderQueueRegistry& registry) const { UInt64 layerIndex = registry.FetchLayerIndex(m_renderLayer); - UInt64 elementType = GetElementType(); - UInt64 pipelineIndex = registry.FetchPipelineIndex(m_renderPipeline.get()); - UInt64 vertexDeclarationIndex = registry.FetchVertexDeclaration(m_vertexDeclaration.get()); if (m_materialPass->IsFlagEnabled(MaterialPassFlag::Transparent)) { @@ -47,6 +44,11 @@ namespace Nz } else { + UInt64 elementType = GetElementType(); + UInt64 materialPassIndex = registry.FetchMaterialPassIndex(m_materialPass.get()); + UInt64 pipelineIndex = registry.FetchPipelineIndex(m_renderPipeline.get()); + UInt64 vertexDeclarationIndex = registry.FetchVertexDeclaration(m_vertexDeclaration.get()); + UInt64 matFlags = 0; // Opaque RQ index: @@ -54,14 +56,16 @@ namespace Nz // - Transparent flag (1bit) // - Element type (4bits) // - Pipeline (16bits) + // - MaterialPass (16bits) // - VertexDeclaration (8bits) - // - ?? (24bits) - Depth? + // - ?? (8bits) - Depth? return (layerIndex & 0xFF) << 60 | (matFlags) << 52 | (elementType & 0xF) << 51 | (pipelineIndex & 0xFFFF) << 35 | - (vertexDeclarationIndex & 0xFF) << 23; + (materialPassIndex & 0xFFFF) << 23 | + (vertexDeclarationIndex & 0xFF) << 7; } } @@ -103,6 +107,7 @@ namespace Nz inline void RenderSpriteChain::Register(RenderQueueRegistry& registry) const { registry.RegisterLayer(m_renderLayer); + registry.RegisterMaterialPass(m_materialPass.get()); registry.RegisterPipeline(m_renderPipeline.get()); registry.RegisterVertexDeclaration(m_vertexDeclaration.get()); } diff --git a/include/Nazara/Graphics/RenderSubmesh.inl b/include/Nazara/Graphics/RenderSubmesh.inl index 2a2bd39fa..2b9bbf4cc 100644 --- a/include/Nazara/Graphics/RenderSubmesh.inl +++ b/include/Nazara/Graphics/RenderSubmesh.inl @@ -24,9 +24,6 @@ namespace Nz inline UInt64 RenderSubmesh::ComputeSortingScore(const Frustumf& frustum, const RenderQueueRegistry& registry) const { UInt64 layerIndex = registry.FetchLayerIndex(m_renderLayer); - UInt64 elementType = GetElementType(); - UInt64 pipelineIndex = registry.FetchPipelineIndex(m_renderPipeline.get()); - UInt64 vertexBufferIndex = registry.FetchVertexBuffer(m_vertexBuffer.get()); if (m_materialPass->IsFlagEnabled(MaterialPassFlag::Transparent)) { @@ -46,6 +43,11 @@ namespace Nz } else { + UInt64 elementType = GetElementType(); + UInt64 materialPassIndex = registry.FetchMaterialPassIndex(m_materialPass.get()); + UInt64 pipelineIndex = registry.FetchPipelineIndex(m_renderPipeline.get()); + UInt64 vertexBufferIndex = registry.FetchVertexBuffer(m_vertexBuffer.get()); + UInt64 matFlags = 0; // Opaque RQ index: @@ -53,14 +55,16 @@ namespace Nz // - Transparent flag (1bit) // - Element type (4bits) // - Pipeline (16bits) + // - MaterialPass (16bits) // - VertexBuffer (8bits) - // - ?? (24bits) - Depth? + // - ?? (8bits) - Depth? return (layerIndex & 0xFF) << 60 | (matFlags) << 52 | (elementType & 0xF) << 51 | (pipelineIndex & 0xFFFF) << 35 | - (vertexBufferIndex & 0xFF) << 23; + (materialPassIndex & 0xFFFF) << 23 | + (vertexBufferIndex & 0xFF) << 7; } } @@ -97,6 +101,7 @@ namespace Nz inline void RenderSubmesh::Register(RenderQueueRegistry& registry) const { registry.RegisterLayer(m_renderLayer); + registry.RegisterMaterialPass(m_materialPass.get()); registry.RegisterPipeline(m_renderPipeline.get()); registry.RegisterVertexBuffer(m_vertexBuffer.get()); } diff --git a/src/Nazara/Graphics/SpriteChainRenderer.cpp b/src/Nazara/Graphics/SpriteChainRenderer.cpp index bcd7e1791..d21db2eda 100644 --- a/src/Nazara/Graphics/SpriteChainRenderer.cpp +++ b/src/Nazara/Graphics/SpriteChainRenderer.cpp @@ -65,6 +65,7 @@ namespace Nz UInt8* currentAllocationMemPtr = nullptr; const VertexDeclaration* currentVertexDeclaration = nullptr; AbstractBuffer* currentVertexBuffer = nullptr; + const MaterialPass* currentMaterialPass = nullptr; const RenderPipeline* currentPipeline = nullptr; const ShaderBinding* currentShaderBinding = nullptr; const Texture* currentTextureOverlay = nullptr; @@ -122,24 +123,30 @@ namespace Nz currentVertexDeclaration = vertexDeclaration; } - if (currentPipeline != &spriteChain.GetRenderPipeline()) + if (const RenderPipeline* pipeline = &spriteChain.GetRenderPipeline(); currentPipeline != pipeline) { FlushDrawCall(); - currentPipeline = &spriteChain.GetRenderPipeline(); + currentPipeline = pipeline; } - if (currentWorldInstance != &spriteChain.GetWorldInstance()) + if (const MaterialPass* materialPass = &spriteChain.GetMaterialPass(); currentMaterialPass != materialPass) + { + FlushDrawData(); + currentMaterialPass = materialPass; + } + + if (const WorldInstance* worldInstance = &spriteChain.GetWorldInstance(); currentWorldInstance != worldInstance) { // TODO: Flushing draw calls on instance binding means we can have e.g. 1000 sprites rendered using a draw call for each one // which is far from being efficient, using some bindless could help (or at least instancing?) FlushDrawData(); - currentWorldInstance = &spriteChain.GetWorldInstance(); + currentWorldInstance = worldInstance; } - if (currentTextureOverlay != spriteChain.GetTextureOverlay()) + if (const Texture* textureOverlay = spriteChain.GetTextureOverlay(); currentTextureOverlay != textureOverlay) { FlushDrawData(); - currentTextureOverlay = spriteChain.GetTextureOverlay(); + currentTextureOverlay = textureOverlay; } std::size_t remainingQuads = spriteChain.GetSpriteCount(); diff --git a/src/Nazara/Graphics/SubmeshRenderer.cpp b/src/Nazara/Graphics/SubmeshRenderer.cpp index fae79d6d4..638de6703 100644 --- a/src/Nazara/Graphics/SubmeshRenderer.cpp +++ b/src/Nazara/Graphics/SubmeshRenderer.cpp @@ -29,6 +29,7 @@ namespace Nz const AbstractBuffer* currentIndexBuffer = nullptr; const AbstractBuffer* currentVertexBuffer = nullptr; + const MaterialPass* currentMaterialPass = nullptr; const RenderPipeline* currentPipeline = nullptr; const ShaderBinding* currentShaderBinding = nullptr; const WorldInstance* currentWorldInstance = nullptr; @@ -59,6 +60,12 @@ namespace Nz currentPipeline = pipeline; } + if (const MaterialPass* materialPass = &submesh.GetMaterialPass(); currentMaterialPass != materialPass) + { + FlushDrawCall(); + currentMaterialPass = materialPass; + } + if (const AbstractBuffer* indexBuffer = submesh.GetIndexBuffer(); currentIndexBuffer != indexBuffer) { FlushDrawCall(); @@ -71,12 +78,12 @@ namespace Nz currentVertexBuffer = vertexBuffer; } - if (currentWorldInstance != &submesh.GetWorldInstance()) + if (const WorldInstance* worldInstance = &submesh.GetWorldInstance(); currentWorldInstance != worldInstance) { // TODO: Flushing draw calls on instance binding means we can have e.g. 1000 sprites rendered using a draw call for each one // which is far from being efficient, using some bindless could help (or at least instancing?) FlushDrawData(); - currentWorldInstance = &submesh.GetWorldInstance(); + currentWorldInstance = worldInstance; } if (!currentShaderBinding)