From 0cb3bcfbb7081456dbe8bbe262350f85c7ebf3f7 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 27 Jan 2020 22:48:15 -0400
Subject: [PATCH 1/3] Maxwell3D: Correct query reporting.

---
 src/video_core/engines/maxwell_3d.cpp | 99 ++++++++++++++-------------
 src/video_core/engines/maxwell_3d.h   | 16 +++--
 2 files changed, 61 insertions(+), 54 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7cea146f01..2a58557952 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -519,61 +519,66 @@ void Maxwell3D::ProcessFirmwareCall4() {
     regs.reg_array[0xd00] = 1;
 }
 
-void Maxwell3D::ProcessQueryGet() {
-    const GPUVAddr sequence_address{regs.query.QueryAddress()};
-    // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
-    // VAddr before writing.
-
-    // TODO(Subv): Support the other query units.
-    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
-               "Units other than CROP are unimplemented");
-
-    u64 result = 0;
-
-    // TODO(Subv): Support the other query variables
-    switch (regs.query.query_get.select) {
-    case Regs::QuerySelect::Zero:
-        // This seems to actually write the query sequence to the query address.
-        result = regs.query.query_sequence;
-        break;
-    default:
-        result = 1;
-        UNIMPLEMENTED_MSG("Unimplemented query select type {}",
-                          static_cast<u32>(regs.query.query_get.select.Value()));
-    }
-
-    // TODO(Subv): Research and implement how query sync conditions work.
-
+void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
     struct LongQueryResult {
         u64_le value;
         u64_le timestamp;
     };
     static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
+    const GPUVAddr sequence_address{regs.query.QueryAddress()};
+    if (long_query) {
+        // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
+        // GPU, this command may actually take a while to complete in real hardware due to GPU
+        // wait queues.
+        LongQueryResult query_result{};
+        query_result.value = payload;
+        // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
+        query_result.timestamp = system.CoreTiming().GetTicks();
+        memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+    } else {
+        memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
+    }
+}
 
-    switch (regs.query.query_get.mode) {
-    case Regs::QueryMode::Write:
-    case Regs::QueryMode::Write2: {
-        u32 sequence = regs.query.query_sequence;
-        if (regs.query.query_get.short_query) {
-            // Write the current query sequence to the sequence address.
-            // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
-            // query.
-            memory_manager.Write<u32>(sequence_address, sequence);
-        } else {
-            // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
-            // GPU, this command may actually take a while to complete in real hardware due to GPU
-            // wait queues.
-            LongQueryResult query_result{};
-            query_result.value = result;
-            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-            query_result.timestamp = system.CoreTiming().GetTicks();
-            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
-        }
+void Maxwell3D::ProcessQueryGet() {
+    // TODO(Subv): Support the other query units.
+    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
+               "Units other than CROP are unimplemented");
+
+    switch (regs.query.query_get.operation) {
+    case Regs::QueryOperation::Release: {
+        u64 result = regs.query.query_sequence;
+        StampQueryResult(result, regs.query.query_get.short_query == 0);
+        break;
+    }
+    case Regs::QueryOperation::Acquire: {
+        // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
+        // to write a value that matches the current payload.
+        UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
+        break;
+    }
+    case Regs::QueryOperation::Counter: {
+        u64 result{};
+        switch (regs.query.query_get.select) {
+        case Regs::QuerySelect::Zero:
+            result = 0;
+            break;
+        default:
+            result = 1;
+            UNIMPLEMENTED_MSG("Unimplemented query select type {}",
+                              static_cast<u32>(regs.query.query_get.select.Value()));
+        }
+        StampQueryResult(result, regs.query.query_get.short_query == 0);
+        break;
+    }
+    case Regs::QueryOperation::Trap: {
+        UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
+        break;
+    }
+    default: {
+        UNIMPLEMENTED_MSG("Unknown query operation");
         break;
     }
-    default:
-        UNIMPLEMENTED_MSG("Query mode {} not implemented",
-                          static_cast<u32>(regs.query.query_get.mode.Value()));
     }
 }
 
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index e437bacb7b..78e0557653 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -71,12 +71,11 @@ public:
         static constexpr std::size_t MaxConstBuffers = 18;
         static constexpr std::size_t MaxConstBufferSize = 0x10000;
 
-        enum class QueryMode : u32 {
-            Write = 0,
-            Sync = 1,
-            // TODO(Subv): It is currently unknown what the difference between method 2 and method 0
-            // is.
-            Write2 = 2,
+        enum class QueryOperation : u32 {
+            Release = 0,
+            Acquire = 1,
+            Counter = 2,
+            Trap = 3,
         };
 
         enum class QueryUnit : u32 {
@@ -1077,7 +1076,7 @@ public:
                     u32 query_sequence;
                     union {
                         u32 raw;
-                        BitField<0, 2, QueryMode> mode;
+                        BitField<0, 2, QueryOperation> operation;
                         BitField<4, 1, u32> fence;
                         BitField<12, 4, QueryUnit> unit;
                         BitField<16, 1, QuerySyncCondition> sync_cond;
@@ -1409,6 +1408,9 @@ private:
     /// Handles a write to the QUERY_GET register.
     void ProcessQueryGet();
 
+    // Writes the query result accordingly
+    void StampQueryResult(u64 payload, bool long_query);
+
     // Handles Conditional Rendering
     void ProcessQueryCondition();
 

From 8e9a4944dbbb4a22d149bb989faf32db0a979766 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 10:32:51 -0400
Subject: [PATCH 2/3] GPU: Implement GPU Clock correctly.

---
 src/video_core/engines/maxwell_3d.cpp |  3 ++-
 src/video_core/gpu.cpp                | 14 +++++++++++++-
 src/video_core/gpu.h                  |  2 ++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2a58557952..a7e1dee040 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -9,6 +9,7 @@
 #include "core/core_timing.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
+#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/textures/texture.h"
@@ -533,7 +534,7 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
         LongQueryResult query_result{};
         query_result.value = payload;
         // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-        query_result.timestamp = system.CoreTiming().GetTicks();
+        query_result.timestamp = system.GPU().GetTicks();
         memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
     } else {
         memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 062ca83b81..4aca39faf1 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -6,6 +6,7 @@
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/memory.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_compute.h"
@@ -122,6 +123,17 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
     return true;
 }
 
+// This values were reversed engineered by fincs from NVN
+// The gpu clock is reported in units of 385/625 nanoseconds
+constexpr u64 gpu_ticks_num = 384;
+constexpr u64 gpu_ticks_den = 625;
+
+u64 GPU::GetTicks() const {
+    const u64 cpu_ticks = system.CoreTiming().GetTicks();
+    const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+    return (nanoseconds * gpu_ticks_num) / gpu_ticks_den;
+}
+
 void GPU::FlushCommands() {
     renderer.Rasterizer().FlushCommands();
 }
@@ -340,7 +352,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
         block.sequence = regs.semaphore_sequence;
         // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
         // CoreTiming
-        block.timestamp = system.CoreTiming().GetTicks();
+        block.timestamp = GetTicks();
         memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
                                    sizeof(block));
     } else {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b648317bbe..07727210c1 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -192,6 +192,8 @@ public:
 
     bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
 
+    u64 GetTicks() const;
+
     std::unique_lock<std::mutex> LockSync() {
         return std::unique_lock{sync_mutex};
     }

From d6ed31b9faeb2b1ee7c04098e614a73ca11869af Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 13 Feb 2020 18:16:07 -0400
Subject: [PATCH 3/3] GPU: Address Feedback.

---
 src/video_core/engines/maxwell_3d.cpp |  7 ++-----
 src/video_core/gpu.cpp                | 14 ++++++++------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a7e1dee040..0b3e8749b8 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -531,10 +531,7 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
         // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
         // GPU, this command may actually take a while to complete in real hardware due to GPU
         // wait queues.
-        LongQueryResult query_result{};
-        query_result.value = payload;
-        // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-        query_result.timestamp = system.GPU().GetTicks();
+        LongQueryResult query_result{payload, system.GPU().GetTicks()};
         memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
     } else {
         memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
@@ -548,7 +545,7 @@ void Maxwell3D::ProcessQueryGet() {
 
     switch (regs.query.query_get.operation) {
     case Regs::QueryOperation::Release: {
-        u64 result = regs.query.query_sequence;
+        const u64 result = regs.query.query_sequence;
         StampQueryResult(result, regs.query.query_get.short_query == 0);
         break;
     }
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4aca39faf1..4419ab7353 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -123,15 +123,17 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
     return true;
 }
 
-// This values were reversed engineered by fincs from NVN
-// The gpu clock is reported in units of 385/625 nanoseconds
-constexpr u64 gpu_ticks_num = 384;
-constexpr u64 gpu_ticks_den = 625;
-
 u64 GPU::GetTicks() const {
+    // This values were reversed engineered by fincs from NVN
+    // The gpu clock is reported in units of 385/625 nanoseconds
+    constexpr u64 gpu_ticks_num = 384;
+    constexpr u64 gpu_ticks_den = 625;
+
     const u64 cpu_ticks = system.CoreTiming().GetTicks();
     const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
-    return (nanoseconds * gpu_ticks_num) / gpu_ticks_den;
+    const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
+    const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
+    return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
 }
 
 void GPU::FlushCommands() {