From 94aa9da562457e1fed4911d1cda770c3e42bd419 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Sun, 27 Jul 2014 17:34:11 +0200
Subject: [PATCH] Pica: Add triangle clipper.

---
 src/video_core/CMakeLists.txt             |   6 +-
 src/video_core/clipper.cpp                | 178 ++++++++++++++++++++++
 src/video_core/clipper.h                  |  21 +++
 src/video_core/pica.h                     |  22 ++-
 src/video_core/primitive_assembly.cpp     |   7 +-
 src/video_core/video_core.vcxproj         |   2 +
 src/video_core/video_core.vcxproj.filters |   2 +
 7 files changed, 230 insertions(+), 8 deletions(-)
 create mode 100644 src/video_core/clipper.cpp
 create mode 100644 src/video_core/clipper.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index b06f14db00..828bf30fc3 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,11 +1,13 @@
-set(SRCS    command_processor.cpp
+set(SRCS    clipper.cpp
+            command_processor.cpp
             primitive_assembly.cpp
             utils.cpp
             vertex_shader.cpp
             video_core.cpp
             renderer_opengl/renderer_opengl.cpp)
 
-set(HEADERS command_processor.h
+set(HEADERS clipper.h
+            command_processor.h
             math.h
             primitive_assembly.h
             utils.h
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
new file mode 100644
index 0000000000..e9ab6242c2
--- /dev/null
+++ b/src/video_core/clipper.cpp
@@ -0,0 +1,178 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "clipper.h"
+#include "pica.h"
+#include "vertex_shader.h"
+
+namespace Pica {
+
+namespace Clipper {
+
+struct ClippingEdge {
+public:
+    enum Type {
+        POS_X = 0,
+        NEG_X = 1,
+        POS_Y = 2,
+        NEG_Y = 3,
+        POS_Z = 4,
+        NEG_Z = 5,
+    };
+
+    ClippingEdge(Type type, float24 position) : type(type), pos(position) {}
+
+    bool IsInside(const OutputVertex& vertex) const {
+        switch (type) {
+        case POS_X: return vertex.pos.x <= pos * vertex.pos.w;
+        case NEG_X: return vertex.pos.x >= pos * vertex.pos.w;
+        case POS_Y: return vertex.pos.y <= pos * vertex.pos.w;
+        case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w;
+
+        // TODO: Check z compares ... should be 0..1 instead?
+        case POS_Z: return vertex.pos.z <= pos * vertex.pos.w;
+
+        default:
+        case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w;
+        }
+    }
+
+    bool IsOutSide(const OutputVertex& vertex) const {
+        return !IsInside(vertex);
+    }
+
+    OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
+        auto dotpr = [this](const OutputVertex& vtx) {
+            switch (type) {
+            case POS_X: return vtx.pos.x - vtx.pos.w;
+            case NEG_X: return -vtx.pos.x - vtx.pos.w;
+            case POS_Y: return vtx.pos.y - vtx.pos.w;
+            case NEG_Y: return -vtx.pos.y - vtx.pos.w;
+
+            // TODO: Verify z clipping
+            case POS_Z: return vtx.pos.z - vtx.pos.w;
+
+            default:
+            case NEG_Z: return -vtx.pos.w;
+            }
+        };
+
+        float24 dp = dotpr(v0);
+        float24 dp_prev = dotpr(v1);
+        float24 factor = dp_prev / (dp_prev - dp);
+
+        return OutputVertex::Lerp(factor, v0, v1);
+    }
+
+private:
+    Type type;
+    float24 pos;
+};
+
+static void InitScreenCoordinates(OutputVertex& vtx)
+{
+    struct {
+        float24 halfsize_x;
+        float24 offset_x;
+        float24 halfsize_y;
+        float24 offset_y;
+        float24 zscale;
+        float24 offset_z;
+    } viewport;
+
+    viewport.halfsize_x = float24::FromRawFloat24(registers.viewport_size_x);
+    viewport.halfsize_y = float24::FromRawFloat24(registers.viewport_size_y);
+    viewport.offset_x   = float24::FromFloat32(registers.viewport_corner.x);
+    viewport.offset_y   = float24::FromFloat32(registers.viewport_corner.y);
+    viewport.zscale     = float24::FromRawFloat24(registers.viewport_depth_range);
+    viewport.offset_z   = float24::FromRawFloat24(registers.viewport_depth_far_plane);
+
+    // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
+    vtx.screenpos[0] = (vtx.pos.x / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_x / float24::FromFloat32(2.0) + viewport.offset_x;
+    vtx.screenpos[1] = (vtx.pos.y / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
+    vtx.screenpos[2] = viewport.offset_z - vtx.pos.z / vtx.pos.w * viewport.zscale;
+}
+
+void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
+
+    // TODO (neobrain):
+    // The list of output vertices has some fixed maximum size,
+    // however I haven't taken the time to figure out what it is exactly.
+    // For now, we hence just assume a maximal size of 1000 vertices.
+    const size_t max_vertices = 1000;
+    std::vector<OutputVertex> buffer_vertices;
+    std::vector<OutputVertex*> output_list{ &v0, &v1, &v2 };
+
+    // Make sure to reserve space for all vertices.
+    // Without this, buffer reallocation would invalidate references.
+    buffer_vertices.reserve(max_vertices);
+
+    // Simple implementation of the Sutherland-Hodgman clipping algorithm.
+    // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
+    for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)),
+                       ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)),
+                       ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)),
+                       ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)),
+                       ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)),
+                       ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) {
+
+        const std::vector<OutputVertex*> input_list = output_list;
+        output_list.clear();
+
+        const OutputVertex* reference_vertex = input_list.back();
+
+        for (const auto& vertex : input_list) {
+            // NOTE: This algorithm changes vertex order in some cases!
+            if (edge.IsInside(*vertex)) {
+                if (edge.IsOutSide(*reference_vertex)) {
+                    buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex));
+                    output_list.push_back(&(buffer_vertices.back()));
+                }
+
+                output_list.push_back(vertex);
+            } else if (edge.IsInside(*reference_vertex)) {
+                buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex));
+                output_list.push_back(&(buffer_vertices.back()));
+            }
+
+            reference_vertex = vertex;
+        }
+
+        // Need to have at least a full triangle to continue...
+        if (output_list.size() < 3)
+            return;
+    }
+
+    InitScreenCoordinates(*(output_list[0]));
+    InitScreenCoordinates(*(output_list[1]));
+
+    for (int i = 0; i < output_list.size() - 2; i ++) {
+        OutputVertex& vtx0 = *(output_list[0]);
+        OutputVertex& vtx1 = *(output_list[i+1]);
+        OutputVertex& vtx2 = *(output_list[i+2]);
+
+        InitScreenCoordinates(vtx2);
+
+        DEBUG_LOG(GPU,
+                  "Triangle %d/%d (%d buffer vertices) at position (%.3f, %.3f, %.3f, %.3f), "
+                  "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and "
+                  "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)",
+                  i,output_list.size(), buffer_vertices.size(),
+                  vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(),output_list.size(),
+                  vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(),
+                  vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(),
+                  vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(),
+                  vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(),
+                  vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32());
+
+        // TODO: Send triangle to rasterizer
+    }
+}
+
+
+} // namespace
+
+} // namespace
diff --git a/src/video_core/clipper.h b/src/video_core/clipper.h
new file mode 100644
index 0000000000..14d31ca1eb
--- /dev/null
+++ b/src/video_core/clipper.h
@@ -0,0 +1,21 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#pragma once
+
+namespace Pica {
+
+namespace VertexShader {
+    struct OutputVertex;
+}
+
+namespace Clipper {
+
+using VertexShader::OutputVertex;
+
+void ProcessTriangle(OutputVertex& v0, OutputVertex& v1, OutputVertex& v2);
+
+} // namespace
+
+} // namespace
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 6bbd3ce33d..1ced0d3230 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -50,7 +50,12 @@ struct Regs {
     INSERT_PADDING_WORDS(0x1);
     BitField<0, 24, u32> viewport_size_y;
 
-    INSERT_PADDING_WORDS(0xc);
+    INSERT_PADDING_WORDS(0x9);
+
+    BitField<0, 24, u32> viewport_depth_range; // float24
+    BitField<0, 24, u32> viewport_depth_far_plane; // float24
+
+    INSERT_PADDING_WORDS(0x1);
 
     union {
         // Maps components of output vertex attributes to semantics
@@ -82,7 +87,14 @@ struct Regs {
         BitField<24, 5, Semantic> map_w;
     } vs_output_attributes[7];
 
-    INSERT_PADDING_WORDS(0x1a9);
+    INSERT_PADDING_WORDS(0x11);
+
+    union {
+        BitField< 0, 16, u32> x;
+        BitField<16, 16, u32> y;
+    } viewport_corner;
+
+    INSERT_PADDING_WORDS(0x197);
 
     struct {
         enum class Format : u64 {
@@ -340,6 +352,9 @@ struct Regs {
 
         ADD_FIELD(viewport_size_x);
         ADD_FIELD(viewport_size_y);
+        ADD_FIELD(viewport_depth_range);
+        ADD_FIELD(viewport_depth_far_plane);
+        ADD_FIELD(viewport_corner);
         ADD_FIELD(vertex_attributes);
         ADD_FIELD(index_array);
         ADD_FIELD(num_vertices);
@@ -391,8 +406,11 @@ private:
 
 ASSERT_REG_POSITION(viewport_size_x, 0x41);
 ASSERT_REG_POSITION(viewport_size_y, 0x43);
+ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
+ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e);
 ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
 ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
+ASSERT_REG_POSITION(viewport_corner, 0x68);
 ASSERT_REG_POSITION(vertex_attributes, 0x200);
 ASSERT_REG_POSITION(index_array, 0x227);
 ASSERT_REG_POSITION(num_vertices, 0x228);
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index b2196d13cf..2354ffb99e 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2
 // Refer to the license.txt file included.
 
+#include "clipper.h"
 #include "pica.h"
 #include "primitive_assembly.h"
 #include "vertex_shader.h"
@@ -23,8 +24,7 @@ void SubmitVertex(OutputVertex& vtx)
             } else {
                 buffer_index = 0;
 
-                // TODO
-                // Clipper::ProcessTriangle(buffer[0], buffer[1], vtx);
+                Clipper::ProcessTriangle(buffer[0], buffer[1], vtx);
             }
             break;
 
@@ -32,8 +32,7 @@ void SubmitVertex(OutputVertex& vtx)
             if (buffer_index == 2) {
                 buffer_index = 0;
 
-                // TODO
-                // Clipper::ProcessTriangle(buffer[0], buffer[1], vtx);
+                Clipper::ProcessTriangle(buffer[0], buffer[1], vtx);
 
                 buffer[1] = vtx;
             } else {
diff --git a/src/video_core/video_core.vcxproj b/src/video_core/video_core.vcxproj
index 9cf3b0858e..99ab63dce1 100644
--- a/src/video_core/video_core.vcxproj
+++ b/src/video_core/video_core.vcxproj
@@ -20,6 +20,7 @@
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="renderer_opengl\renderer_opengl.cpp" />
+    <ClCompile Include="clipper.cpp" />
     <ClCompile Include="command_processor.cpp" />
     <ClCompile Include="primitive_assembly.cpp" />
     <ClCompile Include="utils.cpp" />
@@ -27,6 +28,7 @@
     <ClCompile Include="video_core.cpp" />
   </ItemGroup>
   <ItemGroup>
+    <ClInclude Include="clipper.h" />
     <ClInclude Include="command_processor.h" />
     <ClInclude Include="gpu_debugger.h" />
     <ClInclude Include="math.h" />
diff --git a/src/video_core/video_core.vcxproj.filters b/src/video_core/video_core.vcxproj.filters
index 9da20b2849..5222f2fa0a 100644
--- a/src/video_core/video_core.vcxproj.filters
+++ b/src/video_core/video_core.vcxproj.filters
@@ -9,6 +9,7 @@
     <ClCompile Include="renderer_opengl\renderer_opengl.cpp">
       <Filter>renderer_opengl</Filter>
     </ClCompile>
+    <ClCompile Include="clipper.cpp" />
     <ClCompile Include="command_processor.cpp" />
     <ClCompile Include="primitive_assembly.cpp" />
     <ClCompile Include="utils.cpp" />
@@ -19,6 +20,7 @@
     <ClInclude Include="renderer_opengl\renderer_opengl.h">
       <Filter>renderer_opengl</Filter>
     </ClInclude>
+    <ClInclude Include="clipper.h" />
     <ClInclude Include="command_processor.h" />
     <ClInclude Include="gpu_debugger.h" />
     <ClInclude Include="math.h" />