From 2d1f054c61f71656a72eedbd8bda7693e950ab94 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 5 Apr 2019 18:31:24 -0300
Subject: [PATCH 1/3] gl_rasterizer: Use ARB_multi_bind to update UBOs across
 stages

---
 .../renderer_opengl/gl_rasterizer.cpp         | 32 ++++++-------------
 .../renderer_opengl/gl_rasterizer.h           |  3 ++
 src/video_core/renderer_opengl/utils.cpp      | 25 +++++++++++++++
 src/video_core/renderer_opengl/utils.h        | 20 ++++++++++++
 4 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7ff1e67377..148692943b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -299,6 +299,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
     BaseBindings base_bindings;
     std::array<bool, Maxwell::NumClipDistances> clip_distances{};
 
+    // Prepare UBO bindings
+    bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
+
     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         const auto& shader_config = gpu.regs.shader_config[index];
         const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -321,8 +324,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
             &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
 
         // Bind the emulation info buffer
-        glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset,
-                          static_cast<GLsizeiptr>(sizeof(ubo)));
+        bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset,
+                                 static_cast<GLsizeiptr>(sizeof(ubo)));
 
         Shader shader{shader_cache.GetStageProgram(program)};
         const auto [program_handle, next_bindings] =
@@ -366,6 +369,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
         base_bindings = next_bindings;
     }
 
+    bind_ubo_pushbuffer.Bind();
+
     SyncClipEnabled(clip_distances);
 
     gpu.dirty_flags.shaders = false;
@@ -900,23 +905,14 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
     const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
     const auto& entries = shader->GetShaderEntries().const_buffers;
 
-    constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
-    std::array<GLuint, max_binds> bind_buffers;
-    std::array<GLintptr, max_binds> bind_offsets;
-    std::array<GLsizeiptr, max_binds> bind_sizes;
-
-    ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points.");
-
     // Upload only the enabled buffers from the 16 constbuffers of each shader stage
     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
         const auto& used_buffer = entries[bindpoint];
         const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
 
         if (!buffer.enabled) {
-            // With disabled buffers set values as zero to unbind them
-            bind_buffers[bindpoint] = 0;
-            bind_offsets[bindpoint] = 0;
-            bind_sizes[bindpoint] = 0;
+            // Set values to zero to unbind buffers
+            bind_ubo_pushbuffer.Push(0, 0, 0);
             continue;
         }
 
@@ -944,16 +940,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
         const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
             buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
 
-        // Prepare values for multibind
-        bind_buffers[bindpoint] = buffer_cache.GetHandle();
-        bind_offsets[bindpoint] = const_buffer_offset;
-        bind_sizes[bindpoint] = size;
+        bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size);
     }
-
-    // The first binding is reserved for emulation values
-    const GLuint ubo_base_binding = base_bindings.cbuf + 1;
-    glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()),
-                       bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
 }
 
 void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 54fbf48aa5..72a399e3dc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -28,6 +28,7 @@
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/utils.h"
 
 namespace Core {
 class System;
@@ -229,6 +230,8 @@ private:
     PrimitiveAssembler primitive_assembler{buffer_cache};
     GLint uniform_buffer_alignment;
 
+    BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
+
     std::size_t CalculateVertexArraysSize() const;
 
     std::size_t CalculateIndexBufferSize() const;
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index d84634cb30..79bda54c1e 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -5,11 +5,36 @@
 #include <string>
 #include <fmt/format.h>
 #include <glad/glad.h>
+#include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/renderer_opengl/utils.h"
 
 namespace OpenGL {
 
+BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
+
+BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
+
+void BindBuffersRangePushBuffer::Setup(GLuint first_) {
+    first = first_;
+    buffers.clear();
+    offsets.clear();
+    sizes.clear();
+}
+
+void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) {
+    buffers.push_back(buffer);
+    offsets.push_back(offset);
+    sizes.push_back(size);
+}
+
+void BindBuffersRangePushBuffer::Bind() const {
+    const std::size_t count{buffers.size()};
+    DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
+    glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
+                       sizes.data());
+}
+
 void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) {
     if (!GLAD_GL_KHR_debug) {
         return; // We don't need to throw an error as this is just for debugging
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 1fcb6fc115..aef45c9dce 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -5,11 +5,31 @@
 #pragma once
 
 #include <string>
+#include <vector>
 #include <glad/glad.h>
 #include "common/common_types.h"
 
 namespace OpenGL {
 
+class BindBuffersRangePushBuffer {
+public:
+    BindBuffersRangePushBuffer(GLenum target);
+    ~BindBuffersRangePushBuffer();
+
+    void Setup(GLuint first_);
+
+    void Push(GLuint buffer, GLintptr offset, GLsizeiptr size);
+
+    void Bind() const;
+
+private:
+    GLenum target;
+    GLuint first;
+    std::vector<GLuint> buffers;
+    std::vector<GLintptr> offsets;
+    std::vector<GLsizeiptr> sizes;
+};
+
 void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = "");
 
 } // namespace OpenGL
\ No newline at end of file

From b631c09e72a761a4aa3dc8183ec1661e95619939 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 5 Apr 2019 03:50:26 -0300
Subject: [PATCH 2/3] gl_rasterizer: Use ARB_multi_bind to update SSBOs

---
 .../renderer_opengl/gl_rasterizer.cpp           | 17 ++++++++---------
 src/video_core/renderer_opengl/gl_rasterizer.h  |  1 +
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 148692943b..d250d5cbb9 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -299,8 +299,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
     BaseBindings base_bindings;
     std::array<bool, Maxwell::NumClipDistances> clip_distances{};
 
-    // Prepare UBO bindings
+    // Prepare packed bindings
     bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
+    bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
 
     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         const auto& shader_config = gpu.regs.shader_config[index];
@@ -370,6 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
     }
 
     bind_ubo_pushbuffer.Bind();
+    bind_ssbo_pushbuffer.Bind();
 
     SyncClipEnabled(clip_distances);
 
@@ -947,15 +949,12 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
 void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
                                           const Shader& shader, GLenum primitive_mode,
                                           BaseBindings base_bindings) {
-    // TODO(Rodrigo): Use ARB_multi_bind here
     const auto& entries = shader->GetShaderEntries().global_memory_entries;
-
-    for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) {
-        const auto& entry = entries[bindpoint];
-        const u32 current_bindpoint = base_bindings.gmem + bindpoint;
-        const auto& region = global_cache.GetGlobalRegion(entry, stage);
-
-        glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
+    for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+        const auto& entry{entries[bindpoint]};
+        const auto& region{global_cache.GetGlobalRegion(entry, stage)};
+        bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
+                                  static_cast<GLsizeiptr>(region->GetSizeInBytes()));
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 72a399e3dc..e4c64ae71e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -231,6 +231,7 @@ private:
     GLint uniform_buffer_alignment;
 
     BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
+    BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
 
     std::size_t CalculateVertexArraysSize() const;
 

From 34c3e2c7868620d9c1c76fd810db3cb368fbd22b Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 5 Apr 2019 19:19:49 -0300
Subject: [PATCH 3/3] renderer_opengl/utils: Skip empty binds

---
 src/video_core/renderer_opengl/utils.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index 79bda54c1e..84a9873715 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -31,6 +31,9 @@ void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr
 void BindBuffersRangePushBuffer::Bind() const {
     const std::size_t count{buffers.size()};
     DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
+    if (count == 0) {
+        return;
+    }
     glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
                        sizes.data());
 }