OpenGLRenderer/CommandBuffer: Replace std::visit by a switch (to improve performance)

This commit is contained in:
SirLynix
2023-04-30 21:12:33 +02:00
parent 97f1c2c56c
commit 3957687a31
3 changed files with 339 additions and 296 deletions

View File

@@ -62,273 +62,18 @@ namespace Nz
{
const GL::Context* context = GL::Context::GetCurrentContext();
StackArray<std::size_t> colorIndexes = NazaraStackArrayNoInit(std::size_t, m_maxColorBufferCount);
for (const auto& commandVariant : m_commands)
{
std::visit([&](auto&& command)
switch (commandVariant.index())
{
using T = std::decay_t<decltype(command)>;
#define NAZARA_OPENGL_COMMAND_CALLBACK(Command) \
case TypeListFind<CommandList, Command>: \
Execute(context, std::get<TypeListFind<CommandList, Command>>(commandVariant)); \
break;
if constexpr (std::is_same_v<T, BeginDebugRegionData>)
{
if (context->glPushDebugGroup)
context->glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, GLsizei(command.regionName.size()), command.regionName.data());
}
else if constexpr (std::is_same_v<T, BlitTextureData>)
{
context->BlitTexture(*command.source, *command.target, command.sourceBox, command.targetBox, command.filter);
}
else if constexpr (std::is_same_v<T, CopyBufferData>)
{
context->BindBuffer(GL::BufferTarget::CopyRead, command.source);
context->BindBuffer(GL::BufferTarget::CopyWrite, command.target);
context->glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, command.sourceOffset, command.targetOffset, command.size);
}
else if constexpr (std::is_same_v<T, CopyBufferFromMemoryData>)
{
context->BindBuffer(GL::BufferTarget::CopyWrite, command.target);
context->glBufferSubData(GL_COPY_WRITE_BUFFER, command.targetOffset, command.size, command.memory);
}
else if constexpr (std::is_same_v<T, CopyTextureData>)
{
context->CopyTexture(*command.source, *command.target, command.sourceBox, command.targetPoint);
}
else if constexpr (std::is_same_v<T, DispatchData>)
{
if (!context->glDispatchCompute)
throw std::runtime_error("compute shaders are not supported on this device");
command.states.pipeline->Apply(*context);
ApplyBindings(*context, command.bindings);
context->glDispatchCompute(command.numGroupsX, command.numGroupsY, command.numGroupsZ);
}
else if constexpr (std::is_same_v<T, DrawData>)
{
ApplyStates(*context, command.states);
ApplyBindings(*context, command.bindings);
context->glDrawArraysInstanced(ToOpenGL(command.states.pipeline->GetPipelineInfo().primitiveMode), command.firstVertex, command.vertexCount, command.instanceCount);
}
else if constexpr (std::is_same_v<T, DrawIndexedData>)
{
const UInt8* origin = 0; //< For an easy way to cast an integer to a pointer
origin += command.states.indexBufferOffset;
switch (command.states.indexBufferType)
{
case IndexType::U8: origin += command.firstIndex * sizeof(UInt8); break;
case IndexType::U16: origin += command.firstIndex * sizeof(UInt16); break;
case IndexType::U32: origin += command.firstIndex * sizeof(UInt32); break;
}
ApplyStates(*context, command.states);
ApplyBindings(*context, command.bindings);
context->glDrawElementsInstanced(ToOpenGL(command.states.pipeline->GetPipelineInfo().primitiveMode), command.indexCount, ToOpenGL(command.states.indexBufferType), origin, command.instanceCount);
}
else if constexpr (std::is_same_v<T, EndDebugRegionData>)
{
if (context->glPopDebugGroup)
context->glPopDebugGroup();
}
else if constexpr (std::is_same_v<T, MemoryBarrier>)
{
if (context->glMemoryBarrier)
context->glMemoryBarrier(command.barriers);
}
else if constexpr (std::is_same_v<T, SetFrameBufferData>)
{
command.framebuffer->Activate();
context = GL::Context::GetCurrentContext();
std::size_t colorBufferCount = command.framebuffer->GetColorBufferCount();
assert(colorBufferCount <= colorIndexes.size());
colorIndexes.fill(0);
std::size_t colorIndex = 0;
GLbitfield clearFields = 0;
std::optional<std::size_t> depthStencilIndex;
std::size_t attachmentCount = command.renderpass->GetAttachmentCount();
for (std::size_t i = 0; i < attachmentCount; ++i)
{
const auto& attachmentInfo = command.renderpass->GetAttachment(i);
switch (PixelFormatInfo::GetContent(attachmentInfo.format))
{
case PixelFormatContent::Undefined:
break;
case PixelFormatContent::ColorRGBA:
colorIndexes[colorIndex++] = i;
break;
case PixelFormatContent::Depth:
if (!depthStencilIndex)
depthStencilIndex = i;
break;
case PixelFormatContent::DepthStencil:
if (!depthStencilIndex)
depthStencilIndex = i;
break;
case PixelFormatContent::Stencil:
//FIXME: I'm not sure stencil is properly handled here
if (!depthStencilIndex)
depthStencilIndex = i;
break;
}
}
StackVector<GLenum> invalidateAttachments;
if (command.framebuffer->GetType() == FramebufferType::Texture)
{
const OpenGLFboFramebuffer& fboFramebuffer = static_cast<const OpenGLFboFramebuffer&>(*command.framebuffer);
invalidateAttachments = NazaraStackVector(GLenum, colorBufferCount + 1);
for (std::size_t i = 0; i < colorBufferCount; ++i)
{
std::size_t attachmentIndex = colorIndexes[i];
Color color = command.clearValues[attachmentIndex].color;
std::array<GLfloat, 4> clearColor = { color.r, color.g, color.b, color.a };
const auto& attachmentInfo = command.renderpass->GetAttachment(attachmentIndex);
if (attachmentInfo.loadOp == AttachmentLoadOp::Clear)
{
context->ResetColorWriteMasks();
// Reset scissor as it affects clear commands if enabled (disabling it would work too but it seems more expansive)
const Vector2ui& attachmentSize = fboFramebuffer.GetAttachmentSize(i);
context->SetScissorBox(0, 0, attachmentSize.x, attachmentSize.y);
context->glClearBufferfv(GL_COLOR, GLint(i), clearColor.data());
}
else if (attachmentInfo.loadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + i));
}
if (depthStencilIndex)
{
std::size_t attachmentIndex = *depthStencilIndex;
const auto& clearValues = command.clearValues[attachmentIndex];
const auto& depthStencilAttachment = command.renderpass->GetAttachment(attachmentIndex);
// Reset scissor as it affects clear commands if enabled (disabling it would work too but it seems more expansive)
if (depthStencilAttachment.loadOp == AttachmentLoadOp::Clear || depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Clear)
{
const Vector2ui& attachmentSize = fboFramebuffer.GetAttachmentSize(attachmentIndex);
context->SetScissorBox(0, 0, attachmentSize.x, attachmentSize.y);
}
if (depthStencilAttachment.loadOp == AttachmentLoadOp::Clear && depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Clear)
{
context->ResetDepthWriteMasks();
context->ResetStencilWriteMasks();
context->glClearBufferfi(GL_DEPTH_STENCIL, 0, clearValues.depth, clearValues.stencil);
}
else if (depthStencilAttachment.loadOp == AttachmentLoadOp::Clear)
{
context->ResetDepthWriteMasks();
context->glClearBufferfv(GL_DEPTH, 0, &clearValues.depth);
if (depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_STENCIL_ATTACHMENT);
}
else if (depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Clear)
{
context->ResetStencilWriteMasks();
context->glClearBufferuiv(GL_STENCIL, 0, &clearValues.stencil);
if (depthStencilAttachment.loadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_DEPTH_ATTACHMENT);
}
else if (depthStencilAttachment.loadOp == AttachmentLoadOp::Discard && depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_DEPTH_STENCIL_ATTACHMENT);
}
}
else
{
assert(command.framebuffer->GetType() == FramebufferType::Window);
// glDrawBuffers doesn't accept GL_BACK on OpenGL non-ES, and glDrawBuffer must be used instead
if (context->GetParams().type != GL::ContextType::OpenGL_ES && context->glDrawBuffer)
context->glDrawBuffer(GL_BACK);
else
{
GLenum buffer = GL_BACK;
context->glDrawBuffers(1, &buffer);
}
invalidateAttachments = NazaraStackVector(GLenum, 3); //< color + depth + stencil
if (colorIndex > 0)
{
assert(colorBufferCount <= 1);
std::size_t colorAttachmentIndex = colorIndexes.front();
const auto& colorAttachment = command.renderpass->GetAttachment(colorAttachmentIndex);
if (colorAttachment.loadOp == AttachmentLoadOp::Clear)
{
context->ResetColorWriteMasks();
const Color& color = command.clearValues[colorAttachmentIndex].color;
context->glClearColor(color.r, color.g, color.b, color.a);
clearFields |= GL_COLOR_BUFFER_BIT;
}
else if (colorAttachment.loadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_COLOR);
}
if (depthStencilIndex)
{
std::size_t attachmentIndex = *depthStencilIndex;
const auto& clearValues = command.clearValues[attachmentIndex];
const auto& depthStencilAttachment = command.renderpass->GetAttachment(attachmentIndex);
if (depthStencilAttachment.loadOp == AttachmentLoadOp::Clear)
{
context->ResetDepthWriteMasks();
context->glClearDepthf(clearValues.depth);
clearFields |= GL_DEPTH_BUFFER_BIT;
}
else if (depthStencilAttachment.loadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_DEPTH);
if (depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Clear && PixelFormatInfo::GetContent(depthStencilAttachment.format) == PixelFormatContent::DepthStencil)
{
context->ResetStencilWriteMasks();
context->glClearStencil(clearValues.stencil);
clearFields |= GL_STENCIL_BUFFER_BIT;
}
else if (depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_STENCIL);
}
if (clearFields)
{
// Reset scissor as it affects clear commands if enabled (disabling it would work too but it seems more expansive)
const Vector2ui& size = command.framebuffer->GetSize();
context->SetScissorBox(0, 0, size.x, size.y);
context->glClear(clearFields);
}
}
if (!invalidateAttachments.empty())
context->glInvalidateFramebuffer(GL_FRAMEBUFFER, GLsizei(invalidateAttachments.size()), invalidateAttachments.data());
}
else
static_assert(AlwaysFalse<T>::value, "non-exhaustive visitor");
}, commandVariant);
NAZARA_OPENGL_FOREACH_COMMANDS(NAZARA_OPENGL_COMMAND_CALLBACK, NAZARA_OPENGL_COMMAND_CALLBACK)
#undef NAZARA_OPENGL_COMMAND_CALLBACK
}
}
}
@@ -389,6 +134,275 @@ namespace Nz
context.BindVertexArray(vao.GetObjectId());
}
inline void OpenGLCommandBuffer::Execute(const GL::Context* context, const BeginDebugRegionCommand& command)
{
if (context->glPushDebugGroup)
context->glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, GLsizei(command.regionName.size()), command.regionName.data());
}
inline void OpenGLCommandBuffer::Execute(const GL::Context* context, const BlitTextureCommand& command)
{
context->BlitTexture(*command.source, *command.target, command.sourceBox, command.targetBox, command.filter);
}
inline void OpenGLCommandBuffer::Execute(const GL::Context* context, const CopyBufferCommand& command)
{
context->BindBuffer(GL::BufferTarget::CopyRead, command.source);
context->BindBuffer(GL::BufferTarget::CopyWrite, command.target);
context->glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, command.sourceOffset, command.targetOffset, command.size);
}
inline void OpenGLCommandBuffer::Execute(const GL::Context* context, const CopyBufferFromMemoryCommand& command)
{
context->BindBuffer(GL::BufferTarget::CopyWrite, command.target);
context->glBufferSubData(GL_COPY_WRITE_BUFFER, command.targetOffset, command.size, command.memory);
}
inline void OpenGLCommandBuffer::Execute(const GL::Context* context, const CopyTextureCommand& command)
{
context->CopyTexture(*command.source, *command.target, command.sourceBox, command.targetPoint);
}
inline void OpenGLCommandBuffer::Execute(const GL::Context* context, const DispatchCommand& command)
{
if (!context->glDispatchCompute)
throw std::runtime_error("compute shaders are not supported on this device");
command.states.pipeline->Apply(*context);
ApplyBindings(*context, command.bindings);
context->glDispatchCompute(command.numGroupsX, command.numGroupsY, command.numGroupsZ);
}
inline void OpenGLCommandBuffer::Execute(const GL::Context* context, const DrawCommand& command)
{
ApplyStates(*context, command.states);
ApplyBindings(*context, command.bindings);
context->glDrawArraysInstanced(ToOpenGL(command.states.pipeline->GetPipelineInfo().primitiveMode), command.firstVertex, command.vertexCount, command.instanceCount);
}
inline void OpenGLCommandBuffer::Execute(const GL::Context* context, const DrawIndexedCommand& command)
{
const UInt8* origin = 0; //< For an easy way to cast an integer to a pointer
origin += command.states.indexBufferOffset;
switch (command.states.indexBufferType)
{
case IndexType::U8: origin += command.firstIndex * sizeof(UInt8); break;
case IndexType::U16: origin += command.firstIndex * sizeof(UInt16); break;
case IndexType::U32: origin += command.firstIndex * sizeof(UInt32); break;
}
ApplyStates(*context, command.states);
ApplyBindings(*context, command.bindings);
context->glDrawElementsInstanced(ToOpenGL(command.states.pipeline->GetPipelineInfo().primitiveMode), command.indexCount, ToOpenGL(command.states.indexBufferType), origin, command.instanceCount);
}
inline void OpenGLCommandBuffer::Execute(const GL::Context* context, const EndDebugRegionCommand& command)
{
if (context->glPopDebugGroup)
context->glPopDebugGroup();
}
inline void OpenGLCommandBuffer::Execute(const GL::Context* context, const MemoryBarrier& command)
{
if (context->glMemoryBarrier)
context->glMemoryBarrier(command.barriers);
}
inline void OpenGLCommandBuffer::Execute(const GL::Context*& context, const SetFrameBufferCommand& command)
{
command.framebuffer->Activate();
StackArray<std::size_t> colorIndexes = NazaraStackArrayNoInit(std::size_t, m_maxColorBufferCount);
std::size_t colorBufferCount = command.framebuffer->GetColorBufferCount();
assert(colorBufferCount <= colorIndexes.size());
colorIndexes.fill(0);
std::size_t colorIndex = 0;
GLbitfield clearFields = 0;
std::optional<std::size_t> depthStencilIndex;
std::size_t attachmentCount = command.renderpass->GetAttachmentCount();
for (std::size_t i = 0; i < attachmentCount; ++i)
{
const auto& attachmentInfo = command.renderpass->GetAttachment(i);
switch (PixelFormatInfo::GetContent(attachmentInfo.format))
{
case PixelFormatContent::Undefined:
break;
case PixelFormatContent::ColorRGBA:
colorIndexes[colorIndex++] = i;
break;
case PixelFormatContent::Depth:
if (!depthStencilIndex)
depthStencilIndex = i;
break;
case PixelFormatContent::DepthStencil:
if (!depthStencilIndex)
depthStencilIndex = i;
break;
case PixelFormatContent::Stencil:
//FIXME: I'm not sure stencil is properly handled here
if (!depthStencilIndex)
depthStencilIndex = i;
break;
}
}
StackVector<GLenum> invalidateAttachments;
if (command.framebuffer->GetType() == FramebufferType::Texture)
{
const OpenGLFboFramebuffer& fboFramebuffer = static_cast<const OpenGLFboFramebuffer&>(*command.framebuffer);
invalidateAttachments = NazaraStackVector(GLenum, colorBufferCount + 1);
for (std::size_t i = 0; i < colorBufferCount; ++i)
{
std::size_t attachmentIndex = colorIndexes[i];
Color color = command.clearValues[attachmentIndex].color;
std::array<GLfloat, 4> clearColor = { color.r, color.g, color.b, color.a };
const auto& attachmentInfo = command.renderpass->GetAttachment(attachmentIndex);
if (attachmentInfo.loadOp == AttachmentLoadOp::Clear)
{
context->ResetColorWriteMasks();
// Reset scissor as it affects clear commands if enabled (disabling it would work too but it seems more expansive)
const Vector2ui& attachmentSize = fboFramebuffer.GetAttachmentSize(i);
context->SetScissorBox(0, 0, attachmentSize.x, attachmentSize.y);
context->glClearBufferfv(GL_COLOR, GLint(i), clearColor.data());
}
else if (attachmentInfo.loadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + i));
}
if (depthStencilIndex)
{
std::size_t attachmentIndex = *depthStencilIndex;
const auto& clearValues = command.clearValues[attachmentIndex];
const auto& depthStencilAttachment = command.renderpass->GetAttachment(attachmentIndex);
// Reset scissor as it affects clear commands if enabled (disabling it would work too but it seems more expansive)
if (depthStencilAttachment.loadOp == AttachmentLoadOp::Clear || depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Clear)
{
const Vector2ui& attachmentSize = fboFramebuffer.GetAttachmentSize(attachmentIndex);
context->SetScissorBox(0, 0, attachmentSize.x, attachmentSize.y);
}
if (depthStencilAttachment.loadOp == AttachmentLoadOp::Clear && depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Clear)
{
context->ResetDepthWriteMasks();
context->ResetStencilWriteMasks();
context->glClearBufferfi(GL_DEPTH_STENCIL, 0, clearValues.depth, clearValues.stencil);
}
else if (depthStencilAttachment.loadOp == AttachmentLoadOp::Clear)
{
context->ResetDepthWriteMasks();
context->glClearBufferfv(GL_DEPTH, 0, &clearValues.depth);
if (depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_STENCIL_ATTACHMENT);
}
else if (depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Clear)
{
context->ResetStencilWriteMasks();
context->glClearBufferuiv(GL_STENCIL, 0, &clearValues.stencil);
if (depthStencilAttachment.loadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_DEPTH_ATTACHMENT);
}
else if (depthStencilAttachment.loadOp == AttachmentLoadOp::Discard && depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_DEPTH_STENCIL_ATTACHMENT);
}
}
else
{
assert(command.framebuffer->GetType() == FramebufferType::Window);
// Window framebuffers have their own contexts
context = GL::Context::GetCurrentContext();
// glDrawBuffers doesn't accept GL_BACK on OpenGL non-ES, and glDrawBuffer must be used instead
if (context->GetParams().type != GL::ContextType::OpenGL_ES && context->glDrawBuffer)
context->glDrawBuffer(GL_BACK);
else
{
GLenum buffer = GL_BACK;
context->glDrawBuffers(1, &buffer);
}
invalidateAttachments = NazaraStackVector(GLenum, 3); //< color + depth + stencil
if (colorIndex > 0)
{
assert(colorBufferCount <= 1);
std::size_t colorAttachmentIndex = colorIndexes.front();
const auto& colorAttachment = command.renderpass->GetAttachment(colorAttachmentIndex);
if (colorAttachment.loadOp == AttachmentLoadOp::Clear)
{
context->ResetColorWriteMasks();
const Color& color = command.clearValues[colorAttachmentIndex].color;
context->glClearColor(color.r, color.g, color.b, color.a);
clearFields |= GL_COLOR_BUFFER_BIT;
}
else if (colorAttachment.loadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_COLOR);
}
if (depthStencilIndex)
{
std::size_t attachmentIndex = *depthStencilIndex;
const auto& clearValues = command.clearValues[attachmentIndex];
const auto& depthStencilAttachment = command.renderpass->GetAttachment(attachmentIndex);
if (depthStencilAttachment.loadOp == AttachmentLoadOp::Clear)
{
context->ResetDepthWriteMasks();
context->glClearDepthf(clearValues.depth);
clearFields |= GL_DEPTH_BUFFER_BIT;
}
else if (depthStencilAttachment.loadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_DEPTH);
if (depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Clear && PixelFormatInfo::GetContent(depthStencilAttachment.format) == PixelFormatContent::DepthStencil)
{
context->ResetStencilWriteMasks();
context->glClearStencil(clearValues.stencil);
clearFields |= GL_STENCIL_BUFFER_BIT;
}
else if (depthStencilAttachment.stencilLoadOp == AttachmentLoadOp::Discard)
invalidateAttachments.push_back(GL_STENCIL);
}
if (clearFields)
{
// Reset scissor as it affects clear commands if enabled (disabling it would work too but it seems more expansive)
const Vector2ui& size = command.framebuffer->GetSize();
context->SetScissorBox(0, 0, size.x, size.y);
context->glClear(clearFields);
}
}
if (!invalidateAttachments.empty())
context->glInvalidateFramebuffer(GL_FRAMEBUFFER, GLsizei(invalidateAttachments.size()), invalidateAttachments.data());
}
void OpenGLCommandBuffer::Release()
{
assert(m_owner);