From 2b58652f0897053d4da04deb586490220ab5a774 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 27 Jul 2019 19:40:10 -0300
Subject: [PATCH] maxwell_3d: Slow implementation of passed samples (query 21)

Implements GL_SAMPLES_PASSED by waiting immediately for queries.
---
 src/video_core/CMakeLists.txt                 |  2 +
 src/video_core/engines/maxwell_3d.cpp         | 39 ++++++++----
 src/video_core/engines/maxwell_3d.h           | 38 ++++++++++--
 src/video_core/rasterizer_interface.h         | 10 ++++
 .../renderer_opengl/gl_query_cache.cpp        | 59 +++++++++++++++++++
 .../renderer_opengl/gl_query_cache.h          | 41 +++++++++++++
 .../renderer_opengl/gl_rasterizer.cpp         | 24 ++++++++
 .../renderer_opengl/gl_rasterizer.h           |  5 ++
 8 files changed, 201 insertions(+), 17 deletions(-)
 create mode 100644 src/video_core/renderer_opengl/gl_query_cache.cpp
 create mode 100644 src/video_core/renderer_opengl/gl_query_cache.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index db9332d003..3208f4993d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -74,6 +74,8 @@ add_library(video_core STATIC
     renderer_opengl/gl_stream_buffer.h
     renderer_opengl/gl_texture_cache.cpp
     renderer_opengl/gl_texture_cache.h
+    renderer_opengl/gl_query_cache.cpp
+    renderer_opengl/gl_query_cache.h
     renderer_opengl/maxwell_to_gl.h
     renderer_opengl/renderer_opengl.cpp
     renderer_opengl/renderer_opengl.h
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 0b3e8749b8..fe91ff6a04 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -400,6 +400,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
         ProcessQueryCondition();
         break;
     }
+    case MAXWELL3D_REG_INDEX(counter_reset): {
+        ProcessCounterReset();
+        break;
+    }
     case MAXWELL3D_REG_INDEX(sync_info): {
         ProcessSyncPoint();
         break;
@@ -544,23 +548,23 @@ void Maxwell3D::ProcessQueryGet() {
                "Units other than CROP are unimplemented");
 
     switch (regs.query.query_get.operation) {
-    case Regs::QueryOperation::Release: {
-        const u64 result = regs.query.query_sequence;
-        StampQueryResult(result, regs.query.query_get.short_query == 0);
+    case Regs::QueryOperation::Release:
+        StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
         break;
-    }
-    case Regs::QueryOperation::Acquire: {
-        // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
-        // to write a value that matches the current payload.
+    case Regs::QueryOperation::Acquire:
+        // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
+        // matches the current payload.
         UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
         break;
-    }
     case Regs::QueryOperation::Counter: {
-        u64 result{};
+        u64 result;
         switch (regs.query.query_get.select) {
         case Regs::QuerySelect::Zero:
             result = 0;
             break;
+        case Regs::QuerySelect::SamplesPassed:
+            result = rasterizer.Query(VideoCore::QueryType::SamplesPassed);
+            break;
         default:
             result = 1;
             UNIMPLEMENTED_MSG("Unimplemented query select type {}",
@@ -569,15 +573,13 @@ void Maxwell3D::ProcessQueryGet() {
         StampQueryResult(result, regs.query.query_get.short_query == 0);
         break;
     }
-    case Regs::QueryOperation::Trap: {
+    case Regs::QueryOperation::Trap:
         UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
         break;
-    }
-    default: {
+    default:
         UNIMPLEMENTED_MSG("Unknown query operation");
         break;
     }
-    }
 }
 
 void Maxwell3D::ProcessQueryCondition() {
@@ -619,6 +621,17 @@ void Maxwell3D::ProcessQueryCondition() {
     }
 }
 
+void Maxwell3D::ProcessCounterReset() {
+    switch (regs.counter_reset) {
+    case Regs::CounterReset::SampleCnt:
+        rasterizer.ResetCounter(VideoCore::QueryType::SamplesPassed);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("counter_reset={}", static_cast<u32>(regs.counter_reset));
+        break;
+    }
+}
+
 void Maxwell3D::ProcessSyncPoint() {
     const u32 sync_point = regs.sync_info.sync_point.Value();
     const u32 increment = regs.sync_info.increment.Value();
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0a2af54e50..d21f678ed0 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -409,6 +409,27 @@ public:
             Linear = 1,
         };
 
+        enum class CounterReset : u32 {
+            SampleCnt = 0x01,
+            Unk02 = 0x02,
+            Unk03 = 0x03,
+            Unk04 = 0x04,
+            EmittedPrimitives = 0x10, // Not tested
+            Unk11 = 0x11,
+            Unk12 = 0x12,
+            Unk13 = 0x13,
+            Unk15 = 0x15,
+            Unk16 = 0x16,
+            Unk17 = 0x17,
+            Unk18 = 0x18,
+            Unk1A = 0x1A,
+            Unk1B = 0x1B,
+            Unk1C = 0x1C,
+            Unk1D = 0x1D,
+            Unk1E = 0x1E,
+            GeneratedPrimitives = 0x1F,
+        };
+
         struct Cull {
             enum class FrontFace : u32 {
                 ClockWise = 0x0900,
@@ -857,7 +878,7 @@ public:
                     BitField<7, 1, u32> c7;
                 } clip_distance_enabled;
 
-                INSERT_UNION_PADDING_WORDS(0x1);
+                u32 samplecnt_enable;
 
                 float point_size;
 
@@ -865,7 +886,11 @@ public:
 
                 u32 point_sprite_enable;
 
-                INSERT_UNION_PADDING_WORDS(0x5);
+                INSERT_UNION_PADDING_WORDS(0x3);
+
+                CounterReset counter_reset;
+
+                INSERT_UNION_PADDING_WORDS(0x1);
 
                 u32 zeta_enable;
 
@@ -1412,12 +1437,15 @@ private:
     /// Handles a write to the QUERY_GET register.
     void ProcessQueryGet();
 
-    // Writes the query result accordingly
+    /// Writes the query result accordingly.
     void StampQueryResult(u64 payload, bool long_query);
 
-    // Handles Conditional Rendering
+    /// Handles conditional rendering.
     void ProcessQueryCondition();
 
+    /// Handles counter resets.
+    void ProcessCounterReset();
+
     /// Handles writes to syncing register.
     void ProcessSyncPoint();
 
@@ -1499,8 +1527,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB);
 ASSERT_REG_POSITION(vb_element_base, 0x50D);
 ASSERT_REG_POSITION(vb_base_instance, 0x50E);
 ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
+ASSERT_REG_POSITION(samplecnt_enable, 0x545);
 ASSERT_REG_POSITION(point_size, 0x546);
 ASSERT_REG_POSITION(point_sprite_enable, 0x548);
+ASSERT_REG_POSITION(counter_reset, 0x54C);
 ASSERT_REG_POSITION(zeta_enable, 0x54E);
 ASSERT_REG_POSITION(multisample_control, 0x54F);
 ASSERT_REG_POSITION(condition, 0x554);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index c586cd6fec..2fc6275398 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -17,6 +17,10 @@ class MemoryManager;
 
 namespace VideoCore {
 
+enum class QueryType {
+    SamplesPassed,
+};
+
 enum class LoadCallbackStage {
     Prepare,
     Decompile,
@@ -41,6 +45,12 @@ public:
     /// Dispatches a compute shader invocation
     virtual void DispatchCompute(GPUVAddr code_addr) = 0;
 
+    /// Resets the counter of a query
+    virtual void ResetCounter(QueryType type) = 0;
+
+    /// Returns the value of a GPU query
+    virtual u64 Query(QueryType type) = 0;
+
     /// Notify rasterizer that all caches should be flushed to Switch memory
     virtual void FlushAll() = 0;
 
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
new file mode 100644
index 0000000000..1c7dc999ad
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -0,0 +1,59 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_query_cache.h"
+
+namespace OpenGL {
+
+HostCounter::HostCounter(GLenum target) {
+    query.Create(target);
+}
+
+HostCounter::~HostCounter() = default;
+
+void HostCounter::UpdateState(bool enabled) {
+    if (enabled) {
+        Enable();
+    } else {
+        Disable();
+    }
+}
+
+void HostCounter::Reset() {
+    counter = 0;
+    Disable();
+}
+
+u64 HostCounter::Query() {
+    if (!is_beginned) {
+        return counter;
+    }
+    Disable();
+    u64 value;
+    glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
+    Enable();
+
+    counter += value;
+    return counter;
+}
+
+void HostCounter::Enable() {
+    if (is_beginned) {
+        return;
+    }
+    is_beginned = true;
+    glBeginQuery(GL_SAMPLES_PASSED, query.handle);
+}
+
+void HostCounter::Disable() {
+    if (!is_beginned) {
+        return;
+    }
+    glEndQuery(GL_SAMPLES_PASSED);
+    is_beginned = false;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
new file mode 100644
index 0000000000..52c6546bf9
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -0,0 +1,41 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+namespace OpenGL {
+
+class HostCounter final {
+public:
+    explicit HostCounter(GLenum target);
+    ~HostCounter();
+
+    /// Enables or disables the counter as required.
+    void UpdateState(bool enabled);
+
+    /// Resets the counter disabling it if needed.
+    void Reset();
+
+    /// Returns the current value of the query.
+    /// @note It may harm precision of future queries if the counter is not disabled.
+    u64 Query();
+
+private:
+    /// Enables the counter when disabled.
+    void Enable();
+
+    /// Disables the counter when enabled.
+    void Disable();
+
+    OGLQuery query;     ///< OpenGL query.
+    u64 counter{};      ///< Added values of the counter.
+    bool is_beginned{}; ///< True when the OpenGL query is beginned.
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index b0eb14c8b5..8d132732a1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -547,6 +547,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
     MICROPROFILE_SCOPE(OpenGL_Drawing);
     auto& gpu = system.GPU().Maxwell3D();
 
+    const auto& regs = gpu.regs;
+    samples_passed.UpdateState(regs.samplecnt_enable);
+
     SyncRasterizeEnable(state);
     SyncColorMask();
     SyncFragmentColorClampState();
@@ -709,6 +712,27 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
     glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
 }
 
+void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
+    switch (type) {
+    case VideoCore::QueryType::SamplesPassed:
+        samples_passed.Reset();
+        break;
+    default:
+        UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type));
+        break;
+    }
+}
+
+u64 RasterizerOpenGL::Query(VideoCore::QueryType type) {
+    switch (type) {
+    case VideoCore::QueryType::SamplesPassed:
+        return samples_passed.Query();
+    default:
+        UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type));
+        return 1;
+    }
+}
+
 void RasterizerOpenGL::FlushAll() {}
 
 void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 0501f38289..32bcaf8c2e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -24,6 +24,7 @@
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_framebuffer_cache.h"
+#include "video_core/renderer_opengl/gl_query_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_sampler_cache.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -61,6 +62,8 @@ public:
     bool DrawMultiBatch(bool is_indexed) override;
     void Clear() override;
     void DispatchCompute(GPUVAddr code_addr) override;
+    void ResetCounter(VideoCore::QueryType type) override;
+    u64 Query(VideoCore::QueryType type) override;
     void FlushAll() override;
     void FlushRegion(CacheAddr addr, u64 size) override;
     void InvalidateRegion(CacheAddr addr, u64 size) override;
@@ -221,6 +224,8 @@ private:
     GLintptr SetupIndexBuffer();
 
     void SetupShaders(GLenum primitive_mode);
+
+    HostCounter samples_passed{GL_SAMPLES_PASSED};
 };
 
 } // namespace OpenGL