From 8d694701bcd97c3766692dff2a9b4ec2f3a64ebd Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 6 Dec 2022 22:32:59 +0100
Subject: [PATCH] MacroHLE: Add OpenGL Support

---
 .../backend/glsl/emit_glsl.cpp                |   2 +-
 .../glsl/emit_glsl_context_get_set.cpp        |  12 ++
 .../renderer_opengl/gl_graphics_pipeline.h    |   1 +
 .../renderer_opengl/gl_rasterizer.cpp         | 124 ++++++++++++------
 .../renderer_opengl/gl_rasterizer.h           |   4 +
 .../renderer_opengl/gl_shader_cache.cpp       |   3 +-
 6 files changed, 107 insertions(+), 39 deletions(-)

diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
index e8a4390f69..d91e044460 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
@@ -219,7 +219,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR
     EmitContext ctx{program, bindings, profile, runtime_info};
     Precolor(program);
     EmitCode(ctx, program);
-    const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))};
+    const std::string version{fmt::format("#version 460{}\n", GlslVersionSpecifier(ctx))};
     ctx.header.insert(0, version);
     if (program.shared_memory_size > 0) {
         const auto requested_size{program.shared_memory_size};
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 39579cf5d4..25106da672 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -234,6 +234,12 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
     case IR::Attribute::FrontFace:
         ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
         break;
+    case IR::Attribute::BaseInstance:
+        ctx.AddF32("{}=itof(gl_BaseInstance);", inst);
+        break;
+    case IR::Attribute::BaseVertex:
+        ctx.AddF32("{}=itof(gl_BaseVertex);", inst);
+        break;
     default:
         throw NotImplementedException("Get attribute {}", attr);
     }
@@ -250,6 +256,12 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, s
     case IR::Attribute::VertexId:
         ctx.AddU32("{}=uint(gl_VertexID);", inst);
         break;
+    case IR::Attribute::BaseInstance:
+        ctx.AddU32("{}=uint(gl_BaseInstance);", inst);
+        break;
+    case IR::Attribute::BaseVertex:
+        ctx.AddU32("{}=uint(gl_BaseVertex);", inst);
+        break;
     default:
         throw NotImplementedException("Get U32 attribute {}", attr);
     }
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index ea53ddb460..1c06b36555 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -40,6 +40,7 @@ struct GraphicsPipelineKey {
         BitField<6, 2, Maxwell::Tessellation::DomainType> tessellation_primitive;
         BitField<8, 2, Maxwell::Tessellation::Spacing> tessellation_spacing;
         BitField<10, 1, u32> tessellation_clockwise;
+        BitField<11, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
     };
     std::array<u32, 3> padding;
     VideoCommon::TransformFeedbackState xfb_state;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a44b8c454b..0807d0b88a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -202,7 +202,8 @@ void RasterizerOpenGL::Clear(u32 layer_count) {
     ++num_queued_commands;
 }
 
-void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
+template <typename Func>
+void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
     MICROPROFILE_SCOPE(OpenGL_Drawing);
 
     SCOPE_EXIT({ gpu.TickWork(); });
@@ -226,48 +227,97 @@ void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
     const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(draw_state.topology);
     BeginTransformFeedback(pipeline, primitive_mode);
 
-    const GLuint base_instance = static_cast<GLuint>(draw_state.base_instance);
-    const GLsizei num_instances = static_cast<GLsizei>(instance_count);
-    if (is_indexed) {
-        const GLint base_vertex = static_cast<GLint>(draw_state.base_index);
-        const GLsizei num_vertices = static_cast<GLsizei>(draw_state.index_buffer.count);
-        const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
-        const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format);
-        if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
-            glDrawElements(primitive_mode, num_vertices, format, offset);
-        } else if (num_instances == 1 && base_instance == 0) {
-            glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
-        } else if (base_vertex == 0 && base_instance == 0) {
-            glDrawElementsInstanced(primitive_mode, num_vertices, format, offset, num_instances);
-        } else if (base_vertex == 0) {
-            glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
-                                                num_instances, base_instance);
-        } else if (base_instance == 0) {
-            glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
-                                              num_instances, base_vertex);
-        } else {
-            glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
-                                                          offset, num_instances, base_vertex,
-                                                          base_instance);
-        }
-    } else {
-        const GLint base_vertex = static_cast<GLint>(draw_state.vertex_buffer.first);
-        const GLsizei num_vertices = static_cast<GLsizei>(draw_state.vertex_buffer.count);
-        if (num_instances == 1 && base_instance == 0) {
-            glDrawArrays(primitive_mode, base_vertex, num_vertices);
-        } else if (base_instance == 0) {
-            glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
-        } else {
-            glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
-                                              num_instances, base_instance);
-        }
-    }
+    draw_func(primitive_mode);
+
     EndTransformFeedback();
 
     ++num_queued_commands;
     has_written_global_memory |= pipeline->WritesGlobalMemory();
 }
 
+void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
+    PrepareDraw(is_indexed, [this, is_indexed, instance_count](GLenum primitive_mode) {
+        const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
+        const GLuint base_instance = static_cast<GLuint>(draw_state.base_instance);
+        const GLsizei num_instances = static_cast<GLsizei>(instance_count);
+        if (is_indexed) {
+            const GLint base_vertex = static_cast<GLint>(draw_state.base_index);
+            const GLsizei num_vertices = static_cast<GLsizei>(draw_state.index_buffer.count);
+            const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
+            const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format);
+            if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
+                glDrawElements(primitive_mode, num_vertices, format, offset);
+            } else if (num_instances == 1 && base_instance == 0) {
+                glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
+            } else if (base_vertex == 0 && base_instance == 0) {
+                glDrawElementsInstanced(primitive_mode, num_vertices, format, offset,
+                                        num_instances);
+            } else if (base_vertex == 0) {
+                glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
+                                                    num_instances, base_instance);
+            } else if (base_instance == 0) {
+                glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
+                                                  num_instances, base_vertex);
+            } else {
+                glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
+                                                              offset, num_instances, base_vertex,
+                                                              base_instance);
+            }
+        } else {
+            const GLint base_vertex = static_cast<GLint>(draw_state.vertex_buffer.first);
+            const GLsizei num_vertices = static_cast<GLsizei>(draw_state.vertex_buffer.count);
+            if (num_instances == 1 && base_instance == 0) {
+                glDrawArrays(primitive_mode, base_vertex, num_vertices);
+            } else if (base_instance == 0) {
+                glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
+            } else {
+                glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
+                                                  num_instances, base_instance);
+            }
+        }
+    });
+}
+
+void RasterizerOpenGL::DrawIndirect() {
+    const auto& params = maxwell3d->draw_manager->GetIndirectParams();
+    buffer_cache.SetDrawIndirect(&params);
+    PrepareDraw(params.is_indexed, [this, &params](GLenum primitive_mode) {
+        const auto [buffer, offset] = buffer_cache.GetDrawIndirectBuffer();
+        const GLvoid* const gl_offset =
+            reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(offset));
+        glBindBuffer(GL_DRAW_INDIRECT_BUFFER, buffer->Handle());
+        if (params.include_count) {
+            const auto [draw_buffer, offset_base] = buffer_cache.GetDrawIndirectCount();
+            glBindBuffer(GL_PARAMETER_BUFFER, draw_buffer->Handle());
+
+            if (params.is_indexed) {
+                const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format);
+                glMultiDrawElementsIndirectCount(primitive_mode, format, gl_offset,
+                                                 static_cast<GLintptr>(offset_base),
+                                                 static_cast<GLsizei>(params.max_draw_counts),
+                                                 static_cast<GLsizei>(params.stride));
+            } else {
+                glMultiDrawArraysIndirectCount(primitive_mode, gl_offset,
+                                               static_cast<GLintptr>(offset_base),
+                                               static_cast<GLsizei>(params.max_draw_counts),
+                                               static_cast<GLsizei>(params.stride));
+            }
+            return;
+        }
+        if (params.is_indexed) {
+            const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format);
+            glMultiDrawElementsIndirect(primitive_mode, format, gl_offset,
+                                        static_cast<GLsizei>(params.max_draw_counts),
+                                        static_cast<GLsizei>(params.stride));
+        } else {
+            glMultiDrawArraysIndirect(primitive_mode, gl_offset,
+                                      static_cast<GLsizei>(params.max_draw_counts),
+                                      static_cast<GLsizei>(params.stride));
+        }
+    });
+    buffer_cache.SetDrawIndirect(nullptr);
+}
+
 void RasterizerOpenGL::DispatchCompute() {
     ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
     if (!pipeline) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index fc183c3ca2..efd19f8807 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -69,6 +69,7 @@ public:
     ~RasterizerOpenGL() override;
 
     void Draw(bool is_indexed, u32 instance_count) override;
+    void DrawIndirect() override;
     void Clear(u32 layer_count) override;
     void DispatchCompute() override;
     void ResetCounter(VideoCore::QueryType type) override;
@@ -121,6 +122,9 @@ private:
     static constexpr size_t MAX_IMAGES = 48;
     static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
 
+    template <typename Func>
+    void PrepareDraw(bool is_indexed, Func&&);
+
     /// Syncs state to match guest's
     void SyncState();
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f8868a0121..bf991afee1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -51,7 +51,7 @@ using VideoCommon::LoadPipelines;
 using VideoCommon::SerializePipeline;
 using Context = ShaderContext::Context;
 
-constexpr u32 CACHE_VERSION = 7;
+constexpr u32 CACHE_VERSION = 8;
 
 template <typename Container>
 auto MakeSpan(Container& container) {
@@ -350,6 +350,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
         regs.tessellation.params.output_primitives.Value() ==
         Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
     graphics_key.xfb_enabled.Assign(regs.transform_feedback_enabled != 0 ? 1 : 0);
+    graphics_key.app_stage.Assign(maxwell3d->engine_state);
     if (graphics_key.xfb_enabled) {
         SetXfbState(graphics_key.xfb_state, regs);
     }