Graphics/[SpriteChain|Submesh]Renderer: Fix material pass not breaking batching

2021-12-01 10:40:47 +01:00
parent b256ddd06b
commit e1a378e733
6 changed files with 59 additions and 18 deletions
--- a/include/Nazara/Graphics/RenderQueueRegistry.hpp
+++ b/include/Nazara/Graphics/RenderQueueRegistry.hpp
@@ -13,6 +13,7 @@
 namespace Nz
 {
 	class AbstractBuffer;
+	class MaterialPass;
 	class RenderPipeline;
 	class VertexDeclaration;

@@ -25,17 +26,20 @@ namespace Nz
 			inline void Clear();

 			inline std::size_t FetchLayerIndex(int renderLayer) const;
+			inline std::size_t FetchMaterialPassIndex(const MaterialPass* materialPass) const;
 			inline std::size_t FetchPipelineIndex(const RenderPipeline* pipeline) const;
 			inline std::size_t FetchVertexBuffer(const AbstractBuffer* vertexBuffer) const;
 			inline std::size_t FetchVertexDeclaration(const VertexDeclaration* vertexDeclaration) const;

 			inline void RegisterLayer(int renderLayer);
+			inline void RegisterMaterialPass(const MaterialPass* materialPass);
 			inline void RegisterPipeline(const RenderPipeline* pipeline);
 			inline void RegisterVertexBuffer(const AbstractBuffer* vertexBuffer);
 			inline void RegisterVertexDeclaration(const VertexDeclaration* vertexDeclaration);

 		private:
 			robin_hood::unordered_map<int, std::size_t> m_renderLayerRegistry;
+			robin_hood::unordered_map<const MaterialPass*, std::size_t> m_materialPassRegistry;
 			robin_hood::unordered_map<const RenderPipeline*, std::size_t> m_pipelineRegistry;
 			robin_hood::unordered_map<const AbstractBuffer*, std::size_t> m_vertexBufferRegistry;
 			robin_hood::unordered_map<const VertexDeclaration*, std::size_t> m_vertexDeclarationRegistry;
--- a/include/Nazara/Graphics/RenderQueueRegistry.inl
+++ b/include/Nazara/Graphics/RenderQueueRegistry.inl
@@ -22,6 +22,14 @@ namespace Nz
 		return it->second;
 	}

+	inline std::size_t RenderQueueRegistry::FetchMaterialPassIndex(const MaterialPass* materialPass) const
+	{
+		auto it = m_materialPassRegistry.find(materialPass);
+		assert(it != m_materialPassRegistry.end());
+
+		return it->second;
+	}
+
 	inline std::size_t RenderQueueRegistry::FetchPipelineIndex(const RenderPipeline* pipeline) const
 	{
 		auto it = m_pipelineRegistry.find(pipeline);
@@ -51,6 +59,11 @@ namespace Nz
 		m_renderLayerRegistry.try_emplace(renderLayer, m_renderLayerRegistry.size());
 	}

+	inline void RenderQueueRegistry::RegisterMaterialPass(const MaterialPass* materialPass)
+	{
+		m_materialPassRegistry.try_emplace(materialPass, m_materialPassRegistry.size());
+	}
+
 	inline void RenderQueueRegistry::RegisterPipeline(const RenderPipeline* pipeline)
 	{
 		m_pipelineRegistry.try_emplace(pipeline, m_pipelineRegistry.size());
--- a/include/Nazara/Graphics/RenderSpriteChain.inl
+++ b/include/Nazara/Graphics/RenderSpriteChain.inl
@@ -24,9 +24,6 @@ namespace Nz
 	inline UInt64 RenderSpriteChain::ComputeSortingScore(const Frustumf& frustum, const RenderQueueRegistry& registry) const
 	{
 		UInt64 layerIndex = registry.FetchLayerIndex(m_renderLayer);
-		UInt64 elementType = GetElementType();
-		UInt64 pipelineIndex = registry.FetchPipelineIndex(m_renderPipeline.get());
-		UInt64 vertexDeclarationIndex = registry.FetchVertexDeclaration(m_vertexDeclaration.get());

 		if (m_materialPass->IsFlagEnabled(MaterialPassFlag::Transparent))
 		{
@@ -47,6 +44,11 @@ namespace Nz
 		}
 		else
 		{
+			UInt64 elementType = GetElementType();
+			UInt64 materialPassIndex = registry.FetchMaterialPassIndex(m_materialPass.get());
+			UInt64 pipelineIndex = registry.FetchPipelineIndex(m_renderPipeline.get());
+			UInt64 vertexDeclarationIndex = registry.FetchVertexDeclaration(m_vertexDeclaration.get());
+
 			UInt64 matFlags = 0;

 			// Opaque RQ index:
@@ -54,14 +56,16 @@ namespace Nz
 			// - Transparent flag (1bit)
 			// - Element type (4bits)
 			// - Pipeline (16bits)
+			// - MaterialPass (16bits)
 			// - VertexDeclaration (8bits)
-			// - ?? (24bits) - Depth?
+			// - ?? (8bits) - Depth?

 			return (layerIndex & 0xFF)             << 60 |
 			       (matFlags)                      << 52 |
 			       (elementType & 0xF)             << 51 |
 			       (pipelineIndex & 0xFFFF)        << 35 |
-			       (vertexDeclarationIndex & 0xFF) << 23;
+			       (materialPassIndex & 0xFFFF)    << 23 |
+			       (vertexDeclarationIndex & 0xFF) <<  7;
 		}
 	}

@@ -103,6 +107,7 @@ namespace Nz
 	inline void RenderSpriteChain::Register(RenderQueueRegistry& registry) const
 	{
 		registry.RegisterLayer(m_renderLayer);
+		registry.RegisterMaterialPass(m_materialPass.get());
 		registry.RegisterPipeline(m_renderPipeline.get());
 		registry.RegisterVertexDeclaration(m_vertexDeclaration.get());
 	}
--- a/include/Nazara/Graphics/RenderSubmesh.inl
+++ b/include/Nazara/Graphics/RenderSubmesh.inl
@@ -24,9 +24,6 @@ namespace Nz
 	inline UInt64 RenderSubmesh::ComputeSortingScore(const Frustumf& frustum, const RenderQueueRegistry& registry) const
 	{
 		UInt64 layerIndex = registry.FetchLayerIndex(m_renderLayer);
-		UInt64 elementType = GetElementType();
-		UInt64 pipelineIndex = registry.FetchPipelineIndex(m_renderPipeline.get());
-		UInt64 vertexBufferIndex = registry.FetchVertexBuffer(m_vertexBuffer.get());
 		
 		if (m_materialPass->IsFlagEnabled(MaterialPassFlag::Transparent))
 		{
@@ -46,6 +43,11 @@ namespace Nz
 		}
 		else
 		{
+			UInt64 elementType = GetElementType();
+			UInt64 materialPassIndex = registry.FetchMaterialPassIndex(m_materialPass.get());
+			UInt64 pipelineIndex = registry.FetchPipelineIndex(m_renderPipeline.get());
+			UInt64 vertexBufferIndex = registry.FetchVertexBuffer(m_vertexBuffer.get());
+
 			UInt64 matFlags = 0;

 			// Opaque RQ index:
@@ -53,14 +55,16 @@ namespace Nz
 			// - Transparent flag (1bit)
 			// - Element type (4bits)
 			// - Pipeline (16bits)
+			// - MaterialPass (16bits)
 			// - VertexBuffer (8bits)
-			// - ?? (24bits) - Depth?
+			// - ?? (8bits) - Depth?

 			return (layerIndex & 0xFF)             << 60 |
 			       (matFlags)                      << 52 |
 			       (elementType & 0xF)             << 51 |
 			       (pipelineIndex & 0xFFFF)        << 35 |
-			       (vertexBufferIndex & 0xFF)      << 23;
+			       (materialPassIndex & 0xFFFF)    << 23 |
+			       (vertexBufferIndex & 0xFF)      <<  7;
 		}
 	}

@@ -97,6 +101,7 @@ namespace Nz
 	inline void RenderSubmesh::Register(RenderQueueRegistry& registry) const
 	{
 		registry.RegisterLayer(m_renderLayer);
+		registry.RegisterMaterialPass(m_materialPass.get());
 		registry.RegisterPipeline(m_renderPipeline.get());
 		registry.RegisterVertexBuffer(m_vertexBuffer.get());
 	}
--- a/src/Nazara/Graphics/SpriteChainRenderer.cpp
+++ b/src/Nazara/Graphics/SpriteChainRenderer.cpp
@@ -65,6 +65,7 @@ namespace Nz
 		UInt8* currentAllocationMemPtr = nullptr;
 		const VertexDeclaration* currentVertexDeclaration = nullptr;
 		AbstractBuffer* currentVertexBuffer = nullptr;
+		const MaterialPass* currentMaterialPass = nullptr;
 		const RenderPipeline* currentPipeline = nullptr;
 		const ShaderBinding* currentShaderBinding = nullptr;
 		const Texture* currentTextureOverlay = nullptr;
@@ -122,24 +123,30 @@ namespace Nz
 				currentVertexDeclaration = vertexDeclaration;
 			}

-			if (currentPipeline != &spriteChain.GetRenderPipeline())
+			if (const RenderPipeline* pipeline = &spriteChain.GetRenderPipeline(); currentPipeline != pipeline)
 			{
 				FlushDrawCall();
-				currentPipeline = &spriteChain.GetRenderPipeline();
+				currentPipeline = pipeline;
 			}

-			if (currentWorldInstance != &spriteChain.GetWorldInstance())
+			if (const MaterialPass* materialPass = &spriteChain.GetMaterialPass(); currentMaterialPass != materialPass)
+			{
+				FlushDrawData();
+				currentMaterialPass = materialPass;
+			}
+
+			if (const WorldInstance* worldInstance = &spriteChain.GetWorldInstance(); currentWorldInstance != worldInstance)
 			{
 				// TODO: Flushing draw calls on instance binding means we can have e.g. 1000 sprites rendered using a draw call for each one
 				// which is far from being efficient, using some bindless could help (or at least instancing?)
 				FlushDrawData();
-				currentWorldInstance = &spriteChain.GetWorldInstance();
+				currentWorldInstance = worldInstance;
 			}

-			if (currentTextureOverlay != spriteChain.GetTextureOverlay())
+			if (const Texture* textureOverlay = spriteChain.GetTextureOverlay(); currentTextureOverlay != textureOverlay)
 			{
 				FlushDrawData();
-				currentTextureOverlay = spriteChain.GetTextureOverlay();
+				currentTextureOverlay = textureOverlay;
 			}

 			std::size_t remainingQuads = spriteChain.GetSpriteCount();
--- a/src/Nazara/Graphics/SubmeshRenderer.cpp
+++ b/src/Nazara/Graphics/SubmeshRenderer.cpp
@@ -29,6 +29,7 @@ namespace Nz

 		const AbstractBuffer* currentIndexBuffer = nullptr;
 		const AbstractBuffer* currentVertexBuffer = nullptr;
+		const MaterialPass* currentMaterialPass = nullptr;
 		const RenderPipeline* currentPipeline = nullptr;
 		const ShaderBinding* currentShaderBinding = nullptr;
 		const WorldInstance* currentWorldInstance = nullptr;
@@ -59,6 +60,12 @@ namespace Nz
 				currentPipeline = pipeline;
 			}

+			if (const MaterialPass* materialPass = &submesh.GetMaterialPass(); currentMaterialPass != materialPass)
+			{
+				FlushDrawCall();
+				currentMaterialPass = materialPass;
+			}
+
 			if (const AbstractBuffer* indexBuffer = submesh.GetIndexBuffer(); currentIndexBuffer != indexBuffer)
 			{
 				FlushDrawCall();
@@ -71,12 +78,12 @@ namespace Nz
 				currentVertexBuffer = vertexBuffer;
 			}

-			if (currentWorldInstance != &submesh.GetWorldInstance())
+			if (const WorldInstance* worldInstance = &submesh.GetWorldInstance(); currentWorldInstance != worldInstance)
 			{
 				// TODO: Flushing draw calls on instance binding means we can have e.g. 1000 sprites rendered using a draw call for each one
 				// which is far from being efficient, using some bindless could help (or at least instancing?)
 				FlushDrawData();
-				currentWorldInstance = &submesh.GetWorldInstance();
+				currentWorldInstance = worldInstance;
 			}

 			if (!currentShaderBinding)