From 38c7b20475cb2c718b2d126acf07dd480c9b5038 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 9 Sep 2015 18:30:03 -0400
Subject: [PATCH 01/32] pica: Add pica_types module and move float24
 definition.

---
 src/video_core/CMakeLists.txt |   1 +
 src/video_core/pica.h         | 114 +------------------------------
 src/video_core/pica_types.h   | 124 ++++++++++++++++++++++++++++++++++
 3 files changed, 127 insertions(+), 112 deletions(-)
 create mode 100644 src/video_core/pica_types.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index c3d7294d58..4b5d298f36 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -33,6 +33,7 @@ set(HEADERS
             command_processor.h
             gpu_debugger.h
             pica.h
+            pica_types.h
             primitive_assembly.h
             rasterizer.h
             rasterizer_interface.h
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 2f1b2dec4f..b8db7869a7 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -16,6 +16,8 @@
 #include "common/vector_math.h"
 #include "common/logging/log.h"
 
+#include "pica_types.h"
+
 namespace Pica {
 
 // Returns index corresponding to the Regs member labeled by field_name
@@ -1026,118 +1028,6 @@ static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig st
 static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be");
 static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be");
 
-struct float24 {
-    static float24 FromFloat32(float val) {
-        float24 ret;
-        ret.value = val;
-        return ret;
-    }
-
-    // 16 bit mantissa, 7 bit exponent, 1 bit sign
-    // TODO: No idea if this works as intended
-    static float24 FromRawFloat24(u32 hex) {
-        float24 ret;
-        if ((hex & 0xFFFFFF) == 0) {
-            ret.value = 0;
-        } else {
-            u32 mantissa = hex & 0xFFFF;
-            u32 exponent = (hex >> 16) & 0x7F;
-            u32 sign = hex >> 23;
-            ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f));
-            if (sign)
-                ret.value = -ret.value;
-        }
-        return ret;
-    }
-
-    static float24 Zero() {
-        return FromFloat32(0.f);
-    }
-
-    // Not recommended for anything but logging
-    float ToFloat32() const {
-        return value;
-    }
-
-    float24 operator * (const float24& flt) const {
-        if ((this->value == 0.f && !std::isnan(flt.value)) ||
-            (flt.value == 0.f && !std::isnan(this->value)))
-            // PICA gives 0 instead of NaN when multiplying by inf
-            return Zero();
-        return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
-    }
-
-    float24 operator / (const float24& flt) const {
-        return float24::FromFloat32(ToFloat32() / flt.ToFloat32());
-    }
-
-    float24 operator + (const float24& flt) const {
-        return float24::FromFloat32(ToFloat32() + flt.ToFloat32());
-    }
-
-    float24 operator - (const float24& flt) const {
-        return float24::FromFloat32(ToFloat32() - flt.ToFloat32());
-    }
-
-    float24& operator *= (const float24& flt) {
-        if ((this->value == 0.f && !std::isnan(flt.value)) ||
-            (flt.value == 0.f && !std::isnan(this->value)))
-            // PICA gives 0 instead of NaN when multiplying by inf
-            *this = Zero();
-        else value *= flt.ToFloat32();
-        return *this;
-    }
-
-    float24& operator /= (const float24& flt) {
-        value /= flt.ToFloat32();
-        return *this;
-    }
-
-    float24& operator += (const float24& flt) {
-        value += flt.ToFloat32();
-        return *this;
-    }
-
-    float24& operator -= (const float24& flt) {
-        value -= flt.ToFloat32();
-        return *this;
-    }
-
-    float24 operator - () const {
-        return float24::FromFloat32(-ToFloat32());
-    }
-
-    bool operator < (const float24& flt) const {
-        return ToFloat32() < flt.ToFloat32();
-    }
-
-    bool operator > (const float24& flt) const {
-        return ToFloat32() > flt.ToFloat32();
-    }
-
-    bool operator >= (const float24& flt) const {
-        return ToFloat32() >= flt.ToFloat32();
-    }
-
-    bool operator <= (const float24& flt) const {
-        return ToFloat32() <= flt.ToFloat32();
-    }
-
-    bool operator == (const float24& flt) const {
-        return ToFloat32() == flt.ToFloat32();
-    }
-
-    bool operator != (const float24& flt) const {
-        return ToFloat32() != flt.ToFloat32();
-    }
-
-private:
-    // Stored as a regular float, merely for convenience
-    // TODO: Perform proper arithmetic on this!
-    float value;
-};
-static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float");
-
 /// Struct used to describe current Pica state
 struct State {
     /// Pica registers
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
new file mode 100644
index 0000000000..de798aa817
--- /dev/null
+++ b/src/video_core/pica_types.h
@@ -0,0 +1,124 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Pica {
+
+struct float24 {
+    static float24 FromFloat32(float val) {
+        float24 ret;
+        ret.value = val;
+        return ret;
+    }
+
+    // 16 bit mantissa, 7 bit exponent, 1 bit sign
+    // TODO: No idea if this works as intended
+    static float24 FromRawFloat24(u32 hex) {
+        float24 ret;
+        if ((hex & 0xFFFFFF) == 0) {
+            ret.value = 0;
+        } else {
+            u32 mantissa = hex & 0xFFFF;
+            u32 exponent = (hex >> 16) & 0x7F;
+            u32 sign = hex >> 23;
+            ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f));
+            if (sign)
+                ret.value = -ret.value;
+        }
+        return ret;
+    }
+
+    static float24 Zero() {
+        return FromFloat32(0.f);
+    }
+
+    // Not recommended for anything but logging
+    float ToFloat32() const {
+        return value;
+    }
+
+    float24 operator * (const float24& flt) const {
+        if ((this->value == 0.f && !std::isnan(flt.value)) ||
+            (flt.value == 0.f && !std::isnan(this->value)))
+            // PICA gives 0 instead of NaN when multiplying by inf
+            return Zero();
+        return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
+    }
+
+    float24 operator / (const float24& flt) const {
+        return float24::FromFloat32(ToFloat32() / flt.ToFloat32());
+    }
+
+    float24 operator + (const float24& flt) const {
+        return float24::FromFloat32(ToFloat32() + flt.ToFloat32());
+    }
+
+    float24 operator - (const float24& flt) const {
+        return float24::FromFloat32(ToFloat32() - flt.ToFloat32());
+    }
+
+    float24& operator *= (const float24& flt) {
+        if ((this->value == 0.f && !std::isnan(flt.value)) ||
+            (flt.value == 0.f && !std::isnan(this->value)))
+            // PICA gives 0 instead of NaN when multiplying by inf
+            *this = Zero();
+        else value *= flt.ToFloat32();
+        return *this;
+    }
+
+    float24& operator /= (const float24& flt) {
+        value /= flt.ToFloat32();
+        return *this;
+    }
+
+    float24& operator += (const float24& flt) {
+        value += flt.ToFloat32();
+        return *this;
+    }
+
+    float24& operator -= (const float24& flt) {
+        value -= flt.ToFloat32();
+        return *this;
+    }
+
+    float24 operator - () const {
+        return float24::FromFloat32(-ToFloat32());
+    }
+
+    bool operator < (const float24& flt) const {
+        return ToFloat32() < flt.ToFloat32();
+    }
+
+    bool operator > (const float24& flt) const {
+        return ToFloat32() > flt.ToFloat32();
+    }
+
+    bool operator >= (const float24& flt) const {
+        return ToFloat32() >= flt.ToFloat32();
+    }
+
+    bool operator <= (const float24& flt) const {
+        return ToFloat32() <= flt.ToFloat32();
+    }
+
+    bool operator == (const float24& flt) const {
+        return ToFloat32() == flt.ToFloat32();
+    }
+
+    bool operator != (const float24& flt) const {
+        return ToFloat32() != flt.ToFloat32();
+    }
+
+private:
+    // Stored as a regular float, merely for convenience
+    // TODO: Perform proper arithmetic on this!
+    float value;
+};
+
+static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float");
+
+} // namespace Pica

From 4369767c721e9633fc6cd9c49b6142ff9b2fa8ea Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 12 Sep 2015 18:47:15 -0400
Subject: [PATCH 02/32] pica: Add decodings for distance attenuation and LUT
 registers.

---
 src/video_core/pica.h | 105 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 104 insertions(+), 1 deletion(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index b8db7869a7..81a568e885 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -643,7 +643,110 @@ struct Regs {
         }
     }
 
-    INSERT_PADDING_WORDS(0xe0);
+    INSERT_PADDING_WORDS(0x20);
+
+    struct {
+        union LightColor {
+            BitField< 0, 10, u32> b;
+            BitField<10, 10, u32> g;
+            BitField<20, 10, u32> r;
+
+            Math::Vec3f ToVec3f() const {
+                return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f);
+            }
+        };
+
+        struct LightSrc {
+            LightColor specular_0;  // material.specular_0 * light.specular_0
+            LightColor specular_1;  // material.specular_1 * light.specular_1
+            LightColor diffuse;     // material.diffuse * light.diffuse
+            LightColor ambient;     // material.ambient * light.ambient
+
+            struct {
+                // Encoded as 16-bit floating point
+                u16 x;
+                u16 y;
+                u16 z;
+                u16 unk;
+
+                INSERT_PADDING_WORDS(0x3);
+
+                // 1.f if 0, otherwise 0.f
+                BitField<0, 1, u32> w;
+            } position;
+
+
+            BitField<0, 20, u32> dist_atten_bias;
+            BitField<0, 20, u32> dist_atten_scale;
+
+            INSERT_PADDING_WORDS(0x4);
+        };
+        static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words");
+
+        LightSrc light[8];
+        LightColor global_ambient; // emission + (material.ambient * lighting.ambient)
+        INSERT_PADDING_WORDS(0x1);
+        BitField<0, 3, u32> src_num; // number of enabled lights - 1
+        INSERT_PADDING_WORDS(0x1);
+
+        union {
+            // Each bit specifies whether distance attenuation should be applied for the
+            // corresponding light
+
+            BitField<24, 1, u32> light_0;
+            BitField<25, 1, u32> light_1;
+            BitField<26, 1, u32> light_2;
+            BitField<27, 1, u32> light_3;
+            BitField<28, 1, u32> light_4;
+            BitField<29, 1, u32> light_5;
+            BitField<30, 1, u32> light_6;
+            BitField<31, 1, u32> light_7;
+
+            bool IsEnabled(unsigned index) const {
+                const unsigned enable[] = { light_0, light_1, light_2, light_3, light_4, light_5, light_6, light_7 };
+                return enable[index] == 0;
+            }
+        } dist_atten_enable;
+
+        union {
+            BitField<0, 8, u32> index;      ///< Index at which to set data in the LUT
+            BitField<8, 5, u32> type;       ///< Type of LUT for which to set data
+        } lut_config;
+
+        BitField<0, 1, u32> disable;
+        INSERT_PADDING_WORDS(0x1);
+
+        // When data is written to any of these registers, it gets written to the lookup table of
+        // the selected type at the selected index, specified above in the `lut_config` register.
+        // With each write, `lut_config.index` is incremented. It does not matter which of these
+        // registers is written to, the behavior will be the same.
+        u32 lut_data[8];
+
+        INSERT_PADDING_WORDS(0x9);
+
+        union {
+            // There are 8 light enable "slots", corresponding to the total number of lights
+            // supported by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num'
+            // above), the first N slots below will be set to integers within the range of 0-7,
+            // corresponding to the actual light that is enabled for each slot.
+
+            BitField< 0, 3, u32> slot_0;
+            BitField< 4, 3, u32> slot_1;
+            BitField< 8, 3, u32> slot_2;
+            BitField<12, 3, u32> slot_3;
+            BitField<16, 3, u32> slot_4;
+            BitField<20, 3, u32> slot_5;
+            BitField<24, 3, u32> slot_6;
+            BitField<28, 3, u32> slot_7;
+
+            unsigned GetNum(unsigned index) const {
+                const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 };
+                return enable_slots[index];
+            }
+        } light_enable;
+    } lighting;
+
+    INSERT_PADDING_WORDS(0x26);
 
     enum class VertexAttributeFormat : u64 {
         BYTE = 0,

From 281bc90ad2afe16853178a56e0127cff8b53eb14 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 12 Sep 2015 18:56:12 -0400
Subject: [PATCH 03/32] pica: Implement fragment lighting LUTs.

---
 src/video_core/command_processor.cpp | 15 +++++++++++++++
 src/video_core/pica.h                | 19 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 59c75042c7..7409534b6f 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -464,6 +464,21 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
             break;
         }
 
+        case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8):
+        case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9):
+        case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca):
+        case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb):
+        case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
+        case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
+        case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
+        case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf):
+        {
+            auto& lut_config = regs.lighting.lut_config;
+            g_state.lighting.luts[lut_config.type][lut_config.index].raw = value;
+            lut_config.index = lut_config.index + 1;
+            break;
+        }
+
         default:
             break;
     }
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 81a568e885..b09484de43 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -1156,6 +1156,25 @@ struct State {
     ShaderSetup vs;
     ShaderSetup gs;
 
+    struct {
+        union LutEntry {
+            // Used for raw access
+            u32 raw;
+
+            // LUT value, encoded as 12-bit fixed point, with 12 fraction bits
+            BitField< 0, 12, u32> value;
+
+            // Used by HW for efficient interpolation, Citra does not use these
+            BitField<12, 12, u32> difference;
+
+            float ToFloat() {
+                return static_cast<float>(value) / 4095.f;
+            }
+        };
+
+        std::array<LutEntry, 256> luts[24];
+    } lighting;
+
     /// Current Pica command list
     struct {
         const u32* head_ptr;

From b0030755708849eb27fe2bf1cc481c5ab905468e Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 9 Sep 2015 22:39:43 -0400
Subject: [PATCH 04/32] pica: Implement decoding of basic fragment lighting
 components. - Diffuse - Distance attenuation - float16/float20 types - Vertex
 Shader 'view' output

---
 src/video_core/clipper.cpp       |  2 +
 src/video_core/pica.h            | 63 +++++++++++++++++++++++++++-----
 src/video_core/pica_types.h      | 56 ++++++++++++++++++++++++++++
 src/video_core/shader/shader.cpp |  6 ++-
 src/video_core/shader/shader.h   |  8 ++--
 5 files changed, 120 insertions(+), 15 deletions(-)

diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 5d609da065..3a09d62f4e 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -68,6 +68,8 @@ static void InitScreenCoordinates(OutputVertex& vtx)
 
     float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
     vtx.color *= inv_w;
+    vtx.view *= inv_w;
+    vtx.quat *= inv_w;
     vtx.tc0 *= inv_w;
     vtx.tc1 *= inv_w;
     vtx.tc2 *= inv_w;
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index b09484de43..178a4b83fa 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -241,7 +241,8 @@ struct Regs {
     TextureConfig texture0;
     INSERT_PADDING_WORDS(0x8);
     BitField<0, 4, TextureFormat> texture0_format;
-    INSERT_PADDING_WORDS(0x2);
+    BitField<0, 1, u32> fragment_lighting_enable;
+    INSERT_PADDING_WORDS(0x1);
     TextureConfig texture1;
     BitField<0, 4, TextureFormat> texture1_format;
     INSERT_PADDING_WORDS(0x2);
@@ -645,6 +646,22 @@ struct Regs {
 
     INSERT_PADDING_WORDS(0x20);
 
+    enum class LightingSampler {
+        Distribution0 = 0,
+        Distribution1 = 1,
+        Fresnel = 3,
+        Blue = 4,
+        Green = 5,
+        Red = 6,
+        SpotlightAttenuation = 8,
+        DistanceAttenuation = 16,
+    };
+
+    enum class LightingLutInput {
+        NH = 0, // Cosine of the angle between the normal and half-angle vectors
+        LN = 3, // Cosine of the angle between the light and the normal vectors
+    };
+
     struct {
         union LightColor {
             BitField< 0, 10, u32> b;
@@ -664,17 +681,21 @@ struct Regs {
 
             struct {
                 // Encoded as 16-bit floating point
-                u16 x;
-                u16 y;
-                u16 z;
-                u16 unk;
+                union {
+                    BitField< 0, 16, u32> x;
+                    BitField<16, 16, u32> y;
+                };
+                union {
+                    BitField< 0, 16, u32> z;
+                };
 
                 INSERT_PADDING_WORDS(0x3);
 
-                // 1.f if 0, otherwise 0.f
-                BitField<0, 1, u32> w;
-            } position;
-
+                union {
+                    BitField<0, 1, u32> w; // 1.f if 0, otherwise 0.f
+                    BitField<1, 1, u32> two_sided_diffuse; // when disabled, clamp dot-product to 0
+                };
+            };
 
             BitField<0, 20, u32> dist_atten_bias;
             BitField<0, 20, u32> dist_atten_scale;
@@ -722,7 +743,27 @@ struct Regs {
         // registers is written to, the behavior will be the same.
         u32 lut_data[8];
 
-        INSERT_PADDING_WORDS(0x9);
+        union {
+            BitField< 1, 1, u32> d0;
+            BitField< 5, 1, u32> d1;
+            BitField< 9, 1, u32> sp;
+            BitField<13, 1, u32> fr;
+            BitField<17, 1, u32> rb;
+            BitField<21, 1, u32> rg;
+            BitField<25, 1, u32> rr;
+        } abs_lut_input;
+
+        union {
+            BitField< 0, 3, u32> d0;
+            BitField< 4, 3, u32> d1;
+            BitField< 8, 3, u32> sp;
+            BitField<12, 3, u32> fr;
+            BitField<16, 3, u32> rb;
+            BitField<20, 3, u32> rg;
+            BitField<24, 3, u32> rr;
+        } lut_input;
+
+        INSERT_PADDING_WORDS(0x7);
 
         union {
             // There are 8 light enable "slots", corresponding to the total number of lights
@@ -1095,6 +1136,7 @@ ASSERT_REG_POSITION(viewport_corner, 0x68);
 ASSERT_REG_POSITION(texture0_enable, 0x80);
 ASSERT_REG_POSITION(texture0, 0x81);
 ASSERT_REG_POSITION(texture0_format, 0x8e);
+ASSERT_REG_POSITION(fragment_lighting_enable, 0x8f);
 ASSERT_REG_POSITION(texture1, 0x91);
 ASSERT_REG_POSITION(texture1_format, 0x96);
 ASSERT_REG_POSITION(texture2, 0x99);
@@ -1109,6 +1151,7 @@ ASSERT_REG_POSITION(tev_stage5, 0xf8);
 ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
 ASSERT_REG_POSITION(output_merger, 0x100);
 ASSERT_REG_POSITION(framebuffer, 0x110);
+ASSERT_REG_POSITION(lighting, 0x140);
 ASSERT_REG_POSITION(vertex_attributes, 0x200);
 ASSERT_REG_POSITION(index_array, 0x227);
 ASSERT_REG_POSITION(num_vertices, 0x228);
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
index de798aa817..a34421c5d5 100644
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@@ -121,4 +121,60 @@ private:
 
 static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float");
 
+struct float16 {
+    // 10 bit mantissa, 5 bit exponent, 1 bit sign
+    // TODO: No idea if this works as intended
+    static float16 FromRawFloat16(u32 hex) {
+        float16 ret;
+        if ((hex & 0xFFFF) == 0) {
+            ret.value = 0;
+        } else {
+            u32 mantissa = hex & 0x3FF;
+            u32 exponent = (hex >> 10) & 0x1F;
+            u32 sign = (hex >> 15) & 1;
+            ret.value = std::pow(2.0f, (float)exponent - 15.0f) * (1.0f + mantissa * std::pow(2.0f, -10.f));
+            if (sign)
+                ret.value = -ret.value;
+        }
+        return ret;
+    }
+
+    float ToFloat32() const {
+        return value;
+    }
+
+private:
+    // Stored as a regular float, merely for convenience
+    // TODO: Perform proper arithmetic on this!
+    float value;
+};
+
+struct float20 {
+    // 12 bit mantissa, 7 bit exponent, 1 bit sign
+    // TODO: No idea if this works as intended
+    static float20 FromRawFloat20(u32 hex) {
+        float20 ret;
+        if ((hex & 0xFFFFF) == 0) {
+            ret.value = 0;
+        } else {
+            u32 mantissa = hex & 0xFFF;
+            u32 exponent = (hex >> 12) & 0x7F;
+            u32 sign = (hex >> 19) & 1;
+            ret.value = std::pow(2.0f, (float)exponent - 63.0f) * (1.0f + mantissa * std::pow(2.0f, -12.f));
+            if (sign)
+                ret.value = -ret.value;
+        }
+        return ret;
+    }
+
+    float ToFloat32() const {
+        return value;
+    }
+
+private:
+    // Stored as a regular float, merely for convenience
+    // TODO: Perform proper arithmetic on this!
+    float value;
+};
+
 } // namespace Pica
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 59f54236b9..44c234ed86 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -134,11 +134,13 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
             std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
     }
 
-    LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
+    LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
+        "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
         ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
         ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
         ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
-        ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
+        ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(),
+        ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
 
     return ret;
 }
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 1c6fa592c9..f068cd93f2 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -37,17 +37,19 @@ struct OutputVertex {
     Math::Vec4<float24> color;
     Math::Vec2<float24> tc0;
     Math::Vec2<float24> tc1;
-    float24 pad[6];
+    INSERT_PADDING_WORDS(2);
+    Math::Vec3<float24> view;
+    INSERT_PADDING_WORDS(1);
     Math::Vec2<float24> tc2;
 
     // Padding for optimal alignment
-    float24 pad2[4];
+    INSERT_PADDING_WORDS(4);
 
     // Attributes used to store intermediate results
 
     // position after perspective divide
     Math::Vec3<float24> screenpos;
-    float24 pad3;
+    INSERT_PADDING_WORDS(1);
 
     // Linear interpolation
     // factor: 0=this, 1=vtx

From afbef525163af1b28e5b7493e58383d442762228 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 12 Nov 2015 17:33:21 -0500
Subject: [PATCH 05/32] renderer_opengl: Implement diffuse component of HW
 fragment lighting.

---
 src/video_core/pica.h                         |  21 +--
 .../renderer_opengl/gl_rasterizer.cpp         | 141 ++++++++++++++++++
 .../renderer_opengl/gl_rasterizer.h           |  61 +++++++-
 .../renderer_opengl/gl_shader_gen.cpp         |  53 ++++++-
 .../renderer_opengl/gl_shader_util.h          |   2 +
 src/video_core/renderer_opengl/pica_to_gl.h   |   7 +
 6 files changed, 270 insertions(+), 15 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 178a4b83fa..b82ecf68aa 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -662,17 +662,18 @@ struct Regs {
         LN = 3, // Cosine of the angle between the light and the normal vectors
     };
 
+    union LightColor {
+        BitField< 0, 10, u32> b;
+        BitField<10, 10, u32> g;
+        BitField<20, 10, u32> r;
+
+        Math::Vec3f ToVec3f() const {
+            // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component
+            return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f);
+        }
+    };
+
     struct {
-        union LightColor {
-            BitField< 0, 10, u32> b;
-            BitField<10, 10, u32> g;
-            BitField<20, 10, u32> r;
-
-            Math::Vec3f ToVec3f() const {
-                return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f);
-            }
-        };
-
         struct LightSrc {
             LightColor specular_0;  // material.specular_0 * light.specular_0
             LightColor specular_1;  // material.specular_1 * light.specular_1
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6441e2586b..1e51a76550 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -75,6 +75,12 @@ void RasterizerOpenGL::InitObjects() {
     glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1);
     glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2);
 
+    glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat));
+    glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT);
+
+    glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view));
+    glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
+
     SetShader();
 
     // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
@@ -283,6 +289,98 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
     case PICA_REG_INDEX(tev_combiner_buffer_color):
         SyncCombinerColor();
         break;
+
+    // Fragment lighting diffuse color
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[0].diffuse, 0x142 + 0 * 0x10):
+        SyncLightDiffuse(0);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[1].diffuse, 0x142 + 1 * 0x10):
+        SyncLightDiffuse(1);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[2].diffuse, 0x142 + 2 * 0x10):
+        SyncLightDiffuse(2);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[3].diffuse, 0x142 + 3 * 0x10):
+        SyncLightDiffuse(3);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[4].diffuse, 0x142 + 4 * 0x10):
+        SyncLightDiffuse(4);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[5].diffuse, 0x142 + 5 * 0x10):
+        SyncLightDiffuse(5);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[6].diffuse, 0x142 + 6 * 0x10):
+        SyncLightDiffuse(6);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[7].diffuse, 0x142 + 7 * 0x10):
+        SyncLightDiffuse(7);
+        break;
+
+    // Fragment lighting ambient color
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[0].ambient, 0x143 + 0 * 0x10):
+        SyncLightAmbient(0);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[1].ambient, 0x143 + 1 * 0x10):
+        SyncLightAmbient(1);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[2].ambient, 0x143 + 2 * 0x10):
+        SyncLightAmbient(2);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[3].ambient, 0x143 + 3 * 0x10):
+        SyncLightAmbient(3);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[4].ambient, 0x143 + 4 * 0x10):
+        SyncLightAmbient(4);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[5].ambient, 0x143 + 5 * 0x10):
+        SyncLightAmbient(5);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[6].ambient, 0x143 + 6 * 0x10):
+        SyncLightAmbient(6);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[7].ambient, 0x143 + 7 * 0x10):
+        SyncLightAmbient(7);
+        break;
+
+     // Fragment lighting position
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10):
+        SyncLightPosition(0);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[1].x, 0x144 + 1 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[1].z, 0x145 + 1 * 0x10):
+        SyncLightPosition(1);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[2].x, 0x144 + 2 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[2].z, 0x145 + 2 * 0x10):
+        SyncLightPosition(2);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[3].x, 0x144 + 3 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[3].z, 0x145 + 3 * 0x10):
+        SyncLightPosition(3);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[4].x, 0x144 + 4 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[4].z, 0x145 + 4 * 0x10):
+        SyncLightPosition(4);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[5].x, 0x144 + 5 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[5].z, 0x145 + 5 * 0x10):
+        SyncLightPosition(5);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[6].x, 0x144 + 6 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[6].z, 0x145 + 6 * 0x10):
+        SyncLightPosition(6);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[7].x, 0x144 + 7 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[7].z, 0x145 + 7 * 0x10):
+        SyncLightPosition(7);
+        break;
+
+    // Fragment lighting global ambient color (emission + ambient * ambient)
+    case PICA_REG_INDEX_WORKAROUND(lighting.global_ambient, 0x1c0):
+        SyncGlobalAmbient();
+        break;
+
     }
 }
 
@@ -503,6 +601,13 @@ void RasterizerOpenGL::SetShader() {
     auto& tev_stages = Pica::g_state.regs.GetTevStages();
     for (int index = 0; index < tev_stages.size(); ++index)
         SyncTevConstColor(index, tev_stages[index]);
+
+    SyncGlobalAmbient();
+    for (int light_index = 0; light_index < 8; light_index++) {
+        SyncLightDiffuse(light_index);
+        SyncLightAmbient(light_index);
+        SyncLightPosition(light_index);
+    }
 }
 
 void RasterizerOpenGL::SyncFramebuffer() {
@@ -683,6 +788,42 @@ void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevS
     }
 }
 
+void RasterizerOpenGL::SyncGlobalAmbient() {
+    auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient);
+    if (color != uniform_block_data.data.lighting_global_ambient) {
+        uniform_block_data.data.lighting_global_ambient = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightDiffuse(int light_index) {
+    auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
+    if (color != uniform_block_data.data.light_src[light_index].diffuse) {
+        uniform_block_data.data.light_src[light_index].diffuse = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightAmbient(int light_index) {
+    auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient);
+    if (color != uniform_block_data.data.light_src[light_index].ambient) {
+        uniform_block_data.data.light_src[light_index].ambient = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightPosition(int light_index) {
+    std::array<GLfloat, 3> position = {
+        Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
+        Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
+        Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() };
+
+    if (position != uniform_block_data.data.light_src[light_index].position) {
+        uniform_block_data.data.light_src[light_index].position = position;
+        uniform_block_data.dirty = true;
+    }
+}
+
 void RasterizerOpenGL::SyncDrawState() {
     const auto& regs = Pica::g_state.regs;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 569beaa5c3..698ca5c4cc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -71,6 +71,18 @@ struct PicaShaderConfig {
             regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
             regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
 
+        // Fragment lighting
+
+        res.lighting_enabled = !regs.lighting.disable;
+        res.num_lights = regs.lighting.src_num + 1;
+
+        for (unsigned light_index = 0; light_index < res.num_lights; ++light_index) {
+            unsigned num = regs.lighting.light_enable.GetNum(light_index);
+            res.light_src[light_index].num = num;
+            res.light_src[light_index].directional = regs.lighting.light[num].w;
+            res.light_src[light_index].two_sided_diffuse = regs.lighting.light[num].two_sided_diffuse;
+        }
+
         return res;
     }
 
@@ -89,6 +101,16 @@ struct PicaShaderConfig {
     Pica::Regs::CompareFunc alpha_test_func;
     std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {};
     u8 combiner_buffer_input;
+
+    struct {
+        unsigned num;
+        bool directional;
+        bool two_sided_diffuse;
+        bool dist_atten_enabled;
+    } light_src[8];
+
+    bool lighting_enabled;
+    unsigned num_lights;
 };
 
 namespace std {
@@ -182,6 +204,13 @@ private:
             tex_coord1[1] = v.tc1.y.ToFloat32();
             tex_coord2[0] = v.tc2.x.ToFloat32();
             tex_coord2[1] = v.tc2.y.ToFloat32();
+            normquat[0] = v.quat.x.ToFloat32();
+            normquat[1] = v.quat.y.ToFloat32();
+            normquat[2] = v.quat.z.ToFloat32();
+            normquat[3] = v.quat.w.ToFloat32();
+            view[0] = v.view.x.ToFloat32();
+            view[1] = v.view.y.ToFloat32();
+            view[2] = v.view.z.ToFloat32();
         }
 
         GLfloat position[4];
@@ -189,6 +218,17 @@ private:
         GLfloat tex_coord0[2];
         GLfloat tex_coord1[2];
         GLfloat tex_coord2[2];
+        GLfloat normquat[4];
+        GLfloat view[3];
+    };
+
+    struct LightSrc {
+        std::array<GLfloat, 3> diffuse;
+        INSERT_PADDING_WORDS(1);
+        std::array<GLfloat, 3> ambient;
+        INSERT_PADDING_WORDS(1);
+        std::array<GLfloat, 3> position;
+        INSERT_PADDING_WORDS(1);
     };
 
     /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned
@@ -198,11 +238,14 @@ private:
         std::array<GLfloat, 4> tev_combiner_buffer_color;
         GLint alphatest_ref;
         GLfloat depth_offset;
-        INSERT_PADDING_BYTES(8);
+        INSERT_PADDING_WORDS(2);
+        std::array<GLfloat, 3> lighting_global_ambient;
+        INSERT_PADDING_WORDS(1);
+        LightSrc light_src[8];
     };
 
-    static_assert(sizeof(UniformData) == 0x80, "The size of the UniformData structure has changed, update the structure in the shader");
-    static_assert(sizeof(UniformData) < 16000, "UniformData structure must be less than 16kb as per the OpenGL spec");
+    static_assert(sizeof(UniformData) == 0x210, "The size of the UniformData structure has changed, update the structure in the shader");
+    static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
 
     /// Reconfigure the OpenGL color texture to use the given format and dimensions
     void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
@@ -249,6 +292,18 @@ private:
     /// Syncs the TEV combiner color buffer to match the PICA register
     void SyncCombinerColor();
 
+    /// Syncs the lighting global ambient color to match the PICA register
+    void SyncGlobalAmbient();
+
+    /// Syncs the specified light's diffuse color to match the PICA register
+    void SyncLightDiffuse(int light_index);
+
+    /// Syncs the specified light's ambient color to match the PICA register
+    void SyncLightAmbient(int light_index);
+
+    /// Syncs the specified light's position to match the PICA register
+    void SyncLightPosition(int light_index);
+
     /// Syncs the remaining OpenGL drawing state to match the current PICA state
     void SyncDrawState();
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 22022f7f4f..5bc588b0b2 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -32,8 +32,7 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
         out += "primary_color";
         break;
     case Source::PrimaryFragmentColor:
-        // HACK: Until we implement fragment lighting, use primary_color
-        out += "primary_color";
+        out += "primary_fragment_color";
         break;
     case Source::SecondaryFragmentColor:
         // HACK: Until we implement fragment lighting, use zero
@@ -324,24 +323,67 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
     std::string out = R"(
 #version 330 core
 #define NUM_TEV_STAGES 6
+#define NUM_LIGHTS 8
 
 in vec4 primary_color;
 in vec2 texcoord[3];
+in vec4 normquat;
+in vec3 view;
 
 out vec4 color;
 
+struct LightSrc {
+    vec3 diffuse;
+    vec3 ambient;
+    vec3 position;
+};
+
 layout (std140) uniform shader_data {
     vec4 const_color[NUM_TEV_STAGES];
     vec4 tev_combiner_buffer_color;
     int alphatest_ref;
     float depth_offset;
+    vec3 lighting_global_ambient;
+    LightSrc light_src[NUM_LIGHTS];
 };
 
 uniform sampler2D tex[3];
 
 void main() {
+vec4 primary_fragment_color = vec4(0.0);
 )";
 
+    if (config.lighting_enabled) {
+        out += "vec3 normal = normalize(vec3(\n";
+        out += "          2.f*(normquat.x*normquat.z + normquat.y*normquat.w),\n";
+        out += "          2.f*(normquat.y*normquat.z + normquat.x*normquat.w),\n";
+        out += "    1.f - 2.f*(normquat.x*normquat.x + normquat.y*normquat.y)));\n";
+        out += "vec4 secondary_color = vec4(0.0);\n";
+        out += "vec3 diffuse_sum = vec3(0.0);\n";
+        out += "vec3 fragment_position = -view;\n";
+
+        for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) {
+            unsigned num = config.light_src[light_index].num;
+
+            std::string light_vector;
+            if (config.light_src[light_index].directional)
+                light_vector = "normalize(-light_src[" + std::to_string(num) + "].position)";
+            else
+                light_vector = "normalize(light_src[" + std::to_string(num) + "].position - fragment_position)";
+
+            std::string dot_product;
+            if (config.light_src[light_index].two_sided_diffuse)
+                dot_product = "abs(dot(" + light_vector + ", normal))";
+            else
+                dot_product = "max(dot(" + light_vector + ", normal), 0.0)";
+
+            out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * 1.0;\n";
+        }
+
+        out += "diffuse_sum += lighting_global_ambient;\n";
+        out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
+    }
+
     // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
     if (config.alpha_test_func == Regs::CompareFunc::Never) {
         out += "discard; }";
@@ -369,21 +411,28 @@ void main() {
 
 std::string GenerateVertexShader() {
     std::string out = "#version 330 core\n";
+
     out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION)  + ") in vec4 vert_position;\n";
     out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR)     + ") in vec4 vert_color;\n";
     out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n";
     out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n";
     out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n";
+    out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT)  + ") in vec4 vert_normquat;\n";
+    out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW)      + ") in vec3 vert_view;\n";
 
     out += R"(
 out vec4 primary_color;
 out vec2 texcoord[3];
+out vec4 normquat;
+out vec3 view;
 
 void main() {
     primary_color = vert_color;
     texcoord[0] = vert_texcoord0;
     texcoord[1] = vert_texcoord1;
     texcoord[2] = vert_texcoord2;
+    normquat = vert_normquat;
+    view = vert_view;
     gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
 }
 )";
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 046aae14f6..097242f6fd 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -14,6 +14,8 @@ enum Attributes {
     ATTRIBUTE_TEXCOORD0,
     ATTRIBUTE_TEXCOORD1,
     ATTRIBUTE_TEXCOORD2,
+    ATTRIBUTE_NORMQUAT,
+    ATTRIBUTE_VIEW,
 };
 
 /**
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 04c1d1a347..346c9391da 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -183,4 +183,11 @@ inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) {
            } };
 }
 
+inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) {
+    return { { color.r / 255.0f,
+               color.g / 255.0f,
+               color.b / 255.0f
+           } };
+}
+
 } // namespace

From e9af70eaf3e9d190b2c75c039b004beb71f0e436 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 13 Nov 2015 22:52:20 -0500
Subject: [PATCH 06/32] renderer_opengl: Implement HW fragment lighting LUTs
 within our default UBO.

---
 src/video_core/pica.h                         |  2 +-
 .../renderer_opengl/gl_rasterizer.cpp         | 63 +++++++++++++++----
 .../renderer_opengl/gl_rasterizer.h           |  9 ++-
 .../renderer_opengl/gl_shader_gen.cpp         |  7 +++
 4 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index b82ecf68aa..aad9effdc3 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -1216,7 +1216,7 @@ struct State {
             }
         };
 
-        std::array<LutEntry, 256> luts[24];
+        std::array<std::array<LutEntry, 256>, 24> luts;
     } lighting;
 
     /// Current Pica command list
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 1e51a76550..80693fa292 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -162,6 +162,13 @@ void RasterizerOpenGL::DrawTriangles() {
         state.draw.shader_dirty = false;
     }
 
+    for (unsigned index = 0; index < Pica::g_state.lighting.luts.size(); index++) {
+        if (uniform_block_data.lut_dirty[index]) {
+            SyncLightingLUT(index);
+            uniform_block_data.lut_dirty[index] = false;
+        }
+    }
+
     if (uniform_block_data.dirty) {
         glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
         uniform_block_data.dirty = false;
@@ -381,6 +388,21 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
         SyncGlobalAmbient();
         break;
 
+    // Fragment lighting lookup tables
+    case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8):
+    case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9):
+    case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca):
+    case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb):
+    case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
+    case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
+    case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
+    case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf):
+    {
+        auto& lut_config = regs.lighting.lut_config;
+        uniform_block_data.lut_dirty[lut_config.type] = true;
+        break;
+    }
+
     }
 }
 
@@ -593,20 +615,23 @@ void RasterizerOpenGL::SetShader() {
 
         unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
         glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
-    }
 
-    // Update uniforms
-    SyncAlphaTest();
-    SyncCombinerColor();
-    auto& tev_stages = Pica::g_state.regs.GetTevStages();
-    for (int index = 0; index < tev_stages.size(); ++index)
-        SyncTevConstColor(index, tev_stages[index]);
+        // Update uniforms
+        SyncAlphaTest();
+        SyncCombinerColor();
+        auto& tev_stages = Pica::g_state.regs.GetTevStages();
+        for (int index = 0; index < tev_stages.size(); ++index)
+            SyncTevConstColor(index, tev_stages[index]);
 
-    SyncGlobalAmbient();
-    for (int light_index = 0; light_index < 8; light_index++) {
-        SyncLightDiffuse(light_index);
-        SyncLightAmbient(light_index);
-        SyncLightPosition(light_index);
+        for (unsigned index = 0; index < Pica::g_state.lighting.luts.size(); ++index)
+            SyncLightingLUT(index);
+
+        SyncGlobalAmbient();
+        for (int light_index = 0; light_index < 8; light_index++) {
+            SyncLightDiffuse(light_index);
+            SyncLightAmbient(light_index);
+            SyncLightPosition(light_index);
+        }
     }
 }
 
@@ -796,6 +821,20 @@ void RasterizerOpenGL::SyncGlobalAmbient() {
     }
 }
 
+void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
+    auto& lut = uniform_block_data.data.lighting_lut[lut_index / 4];
+    std::array<std::array<GLfloat, 4>, 256> new_lut;
+
+    for (int offset = 0; offset < new_lut.size(); ++offset) {
+        new_lut[offset][lut_index & 3] = Pica::g_state.lighting.luts[lut_index][offset].ToFloat();
+    }
+
+    if (new_lut != lut) {
+        lut = new_lut;
+        uniform_block_data.dirty = true;
+    }
+}
+
 void RasterizerOpenGL::SyncLightDiffuse(int light_index) {
     auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
     if (color != uniform_block_data.data.light_src[light_index].diffuse) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 698ca5c4cc..fa4a78cb17 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -242,10 +242,11 @@ private:
         std::array<GLfloat, 3> lighting_global_ambient;
         INSERT_PADDING_WORDS(1);
         LightSrc light_src[8];
+        std::array<std::array<std::array<GLfloat, 4>, 256>, 6> lighting_lut;
     };
 
-    static_assert(sizeof(UniformData) == 0x210, "The size of the UniformData structure has changed, update the structure in the shader");
-    static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
+    static_assert(sizeof(UniformData) == 0x6210, "The size of the UniformData structure has changed, update the structure in the shader");
+    static_assert(sizeof(UniformData) < 32768, "UniformData structure must be less than 32kb");
 
     /// Reconfigure the OpenGL color texture to use the given format and dimensions
     void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
@@ -295,6 +296,9 @@ private:
     /// Syncs the lighting global ambient color to match the PICA register
     void SyncGlobalAmbient();
 
+    /// Syncs the lighting lookup tables
+    void SyncLightingLUT(unsigned index);
+
     /// Syncs the specified light's diffuse color to match the PICA register
     void SyncLightDiffuse(int light_index);
 
@@ -346,6 +350,7 @@ private:
 
     struct {
         UniformData data;
+        bool lut_dirty[24];
         bool dirty;
     } uniform_block_data;
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 5bc588b0b2..4e02671ddb 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -324,6 +324,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
 #version 330 core
 #define NUM_TEV_STAGES 6
 #define NUM_LIGHTS 8
+#define LIGHTING_LUT_SIZE 256
 
 in vec4 primary_color;
 in vec2 texcoord[3];
@@ -345,6 +346,12 @@ layout (std140) uniform shader_data {
     float depth_offset;
     vec3 lighting_global_ambient;
     LightSrc light_src[NUM_LIGHTS];
+    vec4 lighting_lut_0[LIGHTING_LUT_SIZE];
+    vec4 lighting_lut_1[LIGHTING_LUT_SIZE];
+    vec4 lighting_lut_2[LIGHTING_LUT_SIZE];
+    vec4 lighting_lut_3[LIGHTING_LUT_SIZE];
+    vec4 lighting_lut_4[LIGHTING_LUT_SIZE];
+    vec4 lighting_lut_5[LIGHTING_LUT_SIZE];
 };
 
 uniform sampler2D tex[3];

From e34fa6365ff87af247b0ae8ed880c4032bcb2ed0 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 13 Nov 2015 23:04:19 -0500
Subject: [PATCH 07/32] renderer_opengl: Implement HW fragment lighting
 distance attenuation.

---
 .../renderer_opengl/gl_rasterizer.h           | 34 ++++++++++++-------
 .../renderer_opengl/gl_shader_gen.cpp         | 19 +++++++++--
 2 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index fa4a78cb17..ba0b058024 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -78,9 +78,13 @@ struct PicaShaderConfig {
 
         for (unsigned light_index = 0; light_index < res.num_lights; ++light_index) {
             unsigned num = regs.lighting.light_enable.GetNum(light_index);
+            const auto& light = regs.lighting.light[num];
             res.light_src[light_index].num = num;
-            res.light_src[light_index].directional = regs.lighting.light[num].w;
-            res.light_src[light_index].two_sided_diffuse = regs.lighting.light[num].two_sided_diffuse;
+            res.light_src[light_index].directional = light.w;
+            res.light_src[light_index].two_sided_diffuse = light.two_sided_diffuse;
+            res.light_src[light_index].dist_atten_enabled = regs.lighting.dist_atten_enable.IsEnabled(num);
+            res.light_src[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32();
+            res.light_src[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32();
         }
 
         return res;
@@ -98,19 +102,23 @@ struct PicaShaderConfig {
         return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0;
     };
 
-    Pica::Regs::CompareFunc alpha_test_func;
-    std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {};
-    u8 combiner_buffer_input;
-
     struct {
-        unsigned num;
-        bool directional;
-        bool two_sided_diffuse;
-        bool dist_atten_enabled;
-    } light_src[8];
+        Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never;
+        std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {};
+        u8 combiner_buffer_input = 0;
 
-    bool lighting_enabled;
-    unsigned num_lights;
+        struct {
+            unsigned num = 0;
+            bool directional = false;
+            bool two_sided_diffuse = false;
+            bool dist_atten_enabled = false;
+            GLfloat dist_atten_scale = 0.0f;
+            GLfloat dist_atten_bias = 0.0f;
+        } light_src[8];
+
+        bool lighting_enabled = false;
+        unsigned num_lights = 0;
+    };
 };
 
 namespace std {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 4e02671ddb..cf99cff76c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -371,12 +371,13 @@ vec4 primary_fragment_color = vec4(0.0);
 
         for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) {
             unsigned num = config.light_src[light_index].num;
+            std::string light_src = "light_src[" + std::to_string(num) + "]";
 
             std::string light_vector;
             if (config.light_src[light_index].directional)
-                light_vector = "normalize(-light_src[" + std::to_string(num) + "].position)";
+                light_vector = "normalize(-" + light_src + ".position)";
             else
-                light_vector = "normalize(light_src[" + std::to_string(num) + "].position - fragment_position)";
+                light_vector = "normalize(" + light_src + ".position - fragment_position)";
 
             std::string dot_product;
             if (config.light_src[light_index].two_sided_diffuse)
@@ -384,7 +385,19 @@ vec4 primary_fragment_color = vec4(0.0);
             else
                 dot_product = "max(dot(" + light_vector + ", normal), 0.0)";
 
-            out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * 1.0;\n";
+            std::string dist_atten = "1.0";
+            if (config.light_src[light_index].dist_atten_enabled) {
+                std::string scale = std::to_string(config.light_src[light_index].dist_atten_scale);
+                std::string bias = std::to_string(config.light_src[light_index].dist_atten_bias);
+                std::string lut_index = "(" + scale + " * length(fragment_position - " + light_src + ".position) + " + bias + ")";
+                std::string clamped_lut_index = "((clamp(int(" + lut_index + " * 256.0), 0, 255)))";
+
+                unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num);
+
+                dist_atten = "lighting_lut_" + std::to_string(lut_num /4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "]";
+            }
+
+            out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * " + dist_atten + ";\n";
         }
 
         out += "diffuse_sum += lighting_global_ambient;\n";

From bf89870437ebb0d983cfc20c3ac0490169f59f44 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 14 Nov 2015 23:23:08 -0500
Subject: [PATCH 08/32] renderer_opengl: Initial implementation of basic
 specular lighting.

---
 src/video_core/pica.h                         |  8 +-
 .../renderer_opengl/gl_rasterizer.cpp         | 68 +++++++++++++++
 .../renderer_opengl/gl_rasterizer.h           | 20 +++++
 .../renderer_opengl/gl_shader_gen.cpp         | 82 ++++++++++++++++---
 4 files changed, 165 insertions(+), 13 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index aad9effdc3..c63d87a36b 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -659,6 +659,8 @@ struct Regs {
 
     enum class LightingLutInput {
         NH = 0, // Cosine of the angle between the normal and half-angle vectors
+        VH = 1, // Cosine of the angle between the view and half-angle vectors
+        NV = 2, // Cosine of the angle between the normal and the view vector
         LN = 3, // Cosine of the angle between the light and the normal vectors
     };
 
@@ -709,7 +711,11 @@ struct Regs {
         LightColor global_ambient; // emission + (material.ambient * lighting.ambient)
         INSERT_PADDING_WORDS(0x1);
         BitField<0, 3, u32> src_num; // number of enabled lights - 1
-        INSERT_PADDING_WORDS(0x1);
+
+        union {
+            BitField< 4, 4, u32> config;
+            BitField<27, 1, u32> clamp_highlights;
+        } light_env;
 
         union {
             // Each bit specifies whether distance attenuation should be applied for the
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 80693fa292..c6fb37c53c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -297,6 +297,58 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
         SyncCombinerColor();
         break;
 
+    // Fragment lighting specular 0 color
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_0, 0x140 + 0 * 0x10):
+        SyncLightSpecular0(0);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_0, 0x140 + 1 * 0x10):
+        SyncLightSpecular0(1);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_0, 0x140 + 2 * 0x10):
+        SyncLightSpecular0(2);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_0, 0x140 + 3 * 0x10):
+        SyncLightSpecular0(3);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_0, 0x140 + 4 * 0x10):
+        SyncLightSpecular0(4);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_0, 0x140 + 5 * 0x10):
+        SyncLightSpecular0(5);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_0, 0x140 + 6 * 0x10):
+        SyncLightSpecular0(6);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_0, 0x140 + 7 * 0x10):
+        SyncLightSpecular0(7);
+        break;
+
+    // Fragment lighting specular 1 color
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_1, 0x141 + 0 * 0x10):
+        SyncLightSpecular1(0);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_1, 0x141 + 1 * 0x10):
+        SyncLightSpecular1(1);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_1, 0x141 + 2 * 0x10):
+        SyncLightSpecular1(2);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_1, 0x141 + 3 * 0x10):
+        SyncLightSpecular1(3);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_1, 0x141 + 4 * 0x10):
+        SyncLightSpecular1(4);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_1, 0x141 + 5 * 0x10):
+        SyncLightSpecular1(5);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_1, 0x141 + 6 * 0x10):
+        SyncLightSpecular1(6);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_1, 0x141 + 7 * 0x10):
+        SyncLightSpecular1(7);
+        break;
+
     // Fragment lighting diffuse color
     case PICA_REG_INDEX_WORKAROUND(lighting.light[0].diffuse, 0x142 + 0 * 0x10):
         SyncLightDiffuse(0);
@@ -835,6 +887,22 @@ void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
     }
 }
 
+void RasterizerOpenGL::SyncLightSpecular0(int light_index) {
+    auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0);
+    if (color != uniform_block_data.data.light_src[light_index].specular_0) {
+        uniform_block_data.data.light_src[light_index].specular_0 = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightSpecular1(int light_index) {
+    auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1);
+    if (color != uniform_block_data.data.light_src[light_index].specular_1) {
+        uniform_block_data.data.light_src[light_index].specular_1 = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
 void RasterizerOpenGL::SyncLightDiffuse(int light_index) {
     auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
     if (color != uniform_block_data.data.light_src[light_index].diffuse) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index ba0b058024..9e93b8b2fb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -87,6 +87,10 @@ struct PicaShaderConfig {
             res.light_src[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32();
         }
 
+        res.lighting_lut.d0_abs = (regs.lighting.abs_lut_input.d0 == 0);
+        res.lighting_lut.d0_type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value();
+        res.clamp_highlights = regs.lighting.light_env.clamp_highlights;
+
         return res;
     }
 
@@ -118,6 +122,12 @@ struct PicaShaderConfig {
 
         bool lighting_enabled = false;
         unsigned num_lights = 0;
+        bool clamp_highlights = false;
+
+        struct {
+            bool d0_abs = false;
+            Pica::Regs::LightingLutInput d0_type = Pica::Regs::LightingLutInput::NH;
+        } lighting_lut;
     };
 };
 
@@ -231,6 +241,10 @@ private:
     };
 
     struct LightSrc {
+        std::array<GLfloat, 3> specular_0;
+        INSERT_PADDING_WORDS(1);
+        std::array<GLfloat, 3> specular_1;
+        INSERT_PADDING_WORDS(1);
         std::array<GLfloat, 3> diffuse;
         INSERT_PADDING_WORDS(1);
         std::array<GLfloat, 3> ambient;
@@ -316,6 +330,12 @@ private:
     /// Syncs the specified light's position to match the PICA register
     void SyncLightPosition(int light_index);
 
+    /// Syncs the specified light's specular 0 color to match the PICA register
+    void SyncLightSpecular0(int light_index);
+
+    /// Syncs the specified light's specular 1 color to match the PICA register
+    void SyncLightSpecular1(int light_index);
+
     /// Syncs the remaining OpenGL drawing state to match the current PICA state
     void SyncDrawState();
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index cf99cff76c..abcc89f1d3 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -35,8 +35,7 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
         out += "primary_fragment_color";
         break;
     case Source::SecondaryFragmentColor:
-        // HACK: Until we implement fragment lighting, use zero
-        out += "vec4(0.0)";
+        out += "secondary_fragment_color";
         break;
     case Source::Texture0:
         out += "texture(tex[0], texcoord[0])";
@@ -334,6 +333,8 @@ in vec3 view;
 out vec4 color;
 
 struct LightSrc {
+    vec3 specular_0;
+    vec3 specular_1;
     vec3 diffuse;
     vec3 ambient;
     vec3 position;
@@ -358,6 +359,7 @@ uniform sampler2D tex[3];
 
 void main() {
 vec4 primary_fragment_color = vec4(0.0);
+vec4 secondary_fragment_color = vec4(0.0);
 )";
 
     if (config.lighting_enabled) {
@@ -367,41 +369,97 @@ vec4 primary_fragment_color = vec4(0.0);
         out += "    1.f - 2.f*(normquat.x*normquat.x + normquat.y*normquat.y)));\n";
         out += "vec4 secondary_color = vec4(0.0);\n";
         out += "vec3 diffuse_sum = vec3(0.0);\n";
+        out += "vec3 specular_sum = vec3(0.0);\n";
         out += "vec3 fragment_position = -view;\n";
+        out += "vec3 light_vector = vec3(0.0);\n";
+        out += "float dist_atten = 1.0;\n";
+
+        // Gets the index into the specified lookup table for specular lighting
+        auto GetLutIndex = [&](unsigned light_num, Regs::LightingLutInput input, bool abs) {
+            const std::string half_angle = "normalize(view + light_vector)";
+            std::string index;
+            switch (input) {
+            case Regs::LightingLutInput::NH:
+                index  = "dot(normal, " + half_angle + ")";
+                break;
+
+            case Regs::LightingLutInput::VH:
+                index = std::string("dot(view, " + half_angle + ")");
+                break;
+
+            case Regs::LightingLutInput::NV:
+                index = std::string("dot(normal, view)");
+                break;
+
+            case Regs::LightingLutInput::LN:
+                index  = std::string("dot(light_vector, normal)");
+                break;
+
+            default:
+                LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input);
+                UNIMPLEMENTED();
+                break;
+            }
+
+            if (abs) {
+                // In the range of [ 0.f, 1.f]
+                index = config.light_src[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
+                return "clamp(int(" + index + " * 256.0), 0, 255)";
+            } else {
+                // In the range of [-1.f, 1.f]
+                index = "clamp(" + index + ", -1.0, 1.0)";
+                return std::string("uint(int(" + index + " * 127.f) & 0xff)");
+            }
+
+            return std::string();
+        };
 
         for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) {
             unsigned num = config.light_src[light_index].num;
             std::string light_src = "light_src[" + std::to_string(num) + "]";
 
-            std::string light_vector;
             if (config.light_src[light_index].directional)
-                light_vector = "normalize(-" + light_src + ".position)";
+                out += "light_vector = normalize(-" + light_src + ".position);\n";
             else
-                light_vector = "normalize(" + light_src + ".position - fragment_position)";
+                out += "light_vector = normalize(" + light_src + ".position - fragment_position);\n";
 
             std::string dot_product;
             if (config.light_src[light_index].two_sided_diffuse)
-                dot_product = "abs(dot(" + light_vector + ", normal))";
+                dot_product = "abs(dot(light_vector, normal))";
             else
-                dot_product = "max(dot(" + light_vector + ", normal), 0.0)";
+                dot_product = "max(dot(light_vector, normal), 0.0)";
 
-            std::string dist_atten = "1.0";
+            // Compute distance attenuation value
+            out += "dist_atten = 1.0;\n";
             if (config.light_src[light_index].dist_atten_enabled) {
                 std::string scale = std::to_string(config.light_src[light_index].dist_atten_scale);
                 std::string bias = std::to_string(config.light_src[light_index].dist_atten_bias);
                 std::string lut_index = "(" + scale + " * length(fragment_position - " + light_src + ".position) + " + bias + ")";
                 std::string clamped_lut_index = "((clamp(int(" + lut_index + " * 256.0), 0, 255)))";
 
-                unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num);
-
-                dist_atten = "lighting_lut_" + std::to_string(lut_num /4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "]";
+                const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num);
+                out += "dist_atten = lighting_lut_" + std::to_string(lut_num / 4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "];\n";
             }
 
-            out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * " + dist_atten + ";\n";
+            // Compute primary fragment color (diffuse lighting) function
+            out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * dist_atten;\n";
+
+            // Compute secondary fragment color (specular lighting) function
+            std::string clamped_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs);
+            const unsigned lut_num = (unsigned)Regs::LightingSampler::Distribution0;
+            std::string lut_lookup = "lighting_lut_" + std::to_string(lut_num / 4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "]";
+
+            out += "specular_sum += (" + lut_lookup + " * light_src[" + std::to_string(num) + "].specular_0 * dist_atten);\n";
+        }
+
+        out += "float clamp_highlights = 1.0;\n";
+        if (config.clamp_highlights) {
+            out += "if (dot(light_vector, normal) <= 0.0) clamp_highlights = 0.0;\n";
         }
 
         out += "diffuse_sum += lighting_global_ambient;\n";
         out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
+        out += "secondary_fragment_color = vec4(clamp(clamp_highlights * specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
     }
 
     // Do not do any sort of processing if it's obvious we're not going to pass the alpha test

From 021cb0bced1d8045f04b85024b97a07a4d0df12f Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sun, 15 Nov 2015 17:43:01 -0500
Subject: [PATCH 09/32] renderer_opengl: Use textures for fragment shader LUTs
 instead of UBOs.

- Gets us LUT interpolation for free.
- Some older Intel GPU drivers did not support the big UBOs needed to store the LUTs.
---
 .../renderer_opengl/gl_rasterizer.cpp         | 51 ++++++++++++++-----
 .../renderer_opengl/gl_rasterizer.h           | 10 ++--
 .../renderer_opengl/gl_shader_gen.cpp         | 18 +++----
 src/video_core/renderer_opengl/gl_state.cpp   |  8 +++
 src/video_core/renderer_opengl/gl_state.h     |  4 ++
 5 files changed, 64 insertions(+), 27 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c6fb37c53c..6e7d6a40d9 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -126,6 +126,19 @@ void RasterizerOpenGL::InitObjects() {
     glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0);
     glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
 
+    for (size_t i = 0; i < lighting_lut.size(); ++i) {
+        lighting_lut[i].Create();
+        state.lighting_lut[i].texture_1d = lighting_lut[i].handle;
+
+        glActiveTexture(GL_TEXTURE3 + i);
+        glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d);
+
+        glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
+        glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    }
+    state.Apply();
+
     ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE,
                "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER));
 }
@@ -162,7 +175,7 @@ void RasterizerOpenGL::DrawTriangles() {
         state.draw.shader_dirty = false;
     }
 
-    for (unsigned index = 0; index < Pica::g_state.lighting.luts.size(); index++) {
+    for (unsigned index = 0; index < lighting_lut.size(); index++) {
         if (uniform_block_data.lut_dirty[index]) {
             SyncLightingLUT(index);
             uniform_block_data.lut_dirty[index] = false;
@@ -451,7 +464,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
     case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf):
     {
         auto& lut_config = regs.lighting.lut_config;
-        uniform_block_data.lut_dirty[lut_config.type] = true;
+        uniform_block_data.lut_dirty[lut_config.type / 4] = true;
         break;
     }
 
@@ -663,6 +676,20 @@ void RasterizerOpenGL::SetShader() {
         uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]");
         if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); }
 
+        // Set the texture samplers to correspond to different lookup table texture units
+        GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]");
+        if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); }
+        uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]");
+        if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); }
+        uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]");
+        if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); }
+        uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]");
+        if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); }
+        uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]");
+        if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); }
+        uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]");
+        if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); }
+
         current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
 
         unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
@@ -675,9 +702,6 @@ void RasterizerOpenGL::SetShader() {
         for (int index = 0; index < tev_stages.size(); ++index)
             SyncTevConstColor(index, tev_stages[index]);
 
-        for (unsigned index = 0; index < Pica::g_state.lighting.luts.size(); ++index)
-            SyncLightingLUT(index);
-
         SyncGlobalAmbient();
         for (int light_index = 0; light_index < 8; light_index++) {
             SyncLightDiffuse(light_index);
@@ -874,16 +898,19 @@ void RasterizerOpenGL::SyncGlobalAmbient() {
 }
 
 void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
-    auto& lut = uniform_block_data.data.lighting_lut[lut_index / 4];
-    std::array<std::array<GLfloat, 4>, 256> new_lut;
+    std::array<std::array<GLfloat, 4>, 256> new_data;
 
-    for (int offset = 0; offset < new_lut.size(); ++offset) {
-        new_lut[offset][lut_index & 3] = Pica::g_state.lighting.luts[lut_index][offset].ToFloat();
+    for (unsigned offset = 0; offset < new_data.size(); ++offset) {
+        new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat();
+        new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat();
+        new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat();
+        new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat();
     }
 
-    if (new_lut != lut) {
-        lut = new_lut;
-        uniform_block_data.dirty = true;
+    if (new_data != lighting_lut_data[lut_index]) {
+        lighting_lut_data[lut_index] = new_data;
+        glActiveTexture(GL_TEXTURE3 + lut_index);
+        glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data());
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9e93b8b2fb..b50542701a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -264,11 +264,10 @@ private:
         std::array<GLfloat, 3> lighting_global_ambient;
         INSERT_PADDING_WORDS(1);
         LightSrc light_src[8];
-        std::array<std::array<std::array<GLfloat, 4>, 256>, 6> lighting_lut;
     };
 
-    static_assert(sizeof(UniformData) == 0x6210, "The size of the UniformData structure has changed, update the structure in the shader");
-    static_assert(sizeof(UniformData) < 32768, "UniformData structure must be less than 32kb");
+    static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
+    static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
 
     /// Reconfigure the OpenGL color texture to use the given format and dimensions
     void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
@@ -378,7 +377,7 @@ private:
 
     struct {
         UniformData data;
-        bool lut_dirty[24];
+        bool lut_dirty[6];
         bool dirty;
     } uniform_block_data;
 
@@ -386,4 +385,7 @@ private:
     OGLBuffer vertex_buffer;
     OGLBuffer uniform_buffer;
     OGLFramebuffer framebuffer;
+
+    std::array<OGLTexture, 6> lighting_lut;
+    std::array<std::array<std::array<GLfloat, 4>, 256>, 6> lighting_lut_data;
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index abcc89f1d3..cb570c4d26 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -324,6 +324,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
 #define NUM_TEV_STAGES 6
 #define NUM_LIGHTS 8
 #define LIGHTING_LUT_SIZE 256
+#define FLOAT_255 0.99609375
 
 in vec4 primary_color;
 in vec2 texcoord[3];
@@ -347,15 +348,10 @@ layout (std140) uniform shader_data {
     float depth_offset;
     vec3 lighting_global_ambient;
     LightSrc light_src[NUM_LIGHTS];
-    vec4 lighting_lut_0[LIGHTING_LUT_SIZE];
-    vec4 lighting_lut_1[LIGHTING_LUT_SIZE];
-    vec4 lighting_lut_2[LIGHTING_LUT_SIZE];
-    vec4 lighting_lut_3[LIGHTING_LUT_SIZE];
-    vec4 lighting_lut_4[LIGHTING_LUT_SIZE];
-    vec4 lighting_lut_5[LIGHTING_LUT_SIZE];
 };
 
 uniform sampler2D tex[3];
+uniform sampler1D lut[6];
 
 void main() {
 vec4 primary_fragment_color = vec4(0.0);
@@ -404,11 +400,11 @@ vec4 secondary_fragment_color = vec4(0.0);
             if (abs) {
                 // In the range of [ 0.f, 1.f]
                 index = config.light_src[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
-                return "clamp(int(" + index + " * 256.0), 0, 255)";
+                return "clamp(" + index + ", 0.0, FLOAT_255)";
             } else {
                 // In the range of [-1.f, 1.f]
                 index = "clamp(" + index + ", -1.0, 1.0)";
-                return std::string("uint(int(" + index + " * 127.f) & 0xff)");
+                return "clamp(((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0, 0.0, FLOAT_255)";
             }
 
             return std::string();
@@ -435,10 +431,10 @@ vec4 secondary_fragment_color = vec4(0.0);
                 std::string scale = std::to_string(config.light_src[light_index].dist_atten_scale);
                 std::string bias = std::to_string(config.light_src[light_index].dist_atten_bias);
                 std::string lut_index = "(" + scale + " * length(fragment_position - " + light_src + ".position) + " + bias + ")";
-                std::string clamped_lut_index = "((clamp(int(" + lut_index + " * 256.0), 0, 255)))";
+                std::string clamped_lut_index = "((clamp(" + lut_index + ", 0.0, FLOAT_255)))";
 
                 const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num);
-                out += "dist_atten = lighting_lut_" + std::to_string(lut_num / 4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "];\n";
+                out += "dist_atten = texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "];\n";
             }
 
             // Compute primary fragment color (diffuse lighting) function
@@ -447,7 +443,7 @@ vec4 secondary_fragment_color = vec4(0.0);
             // Compute secondary fragment color (specular lighting) function
             std::string clamped_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs);
             const unsigned lut_num = (unsigned)Regs::LightingSampler::Distribution0;
-            std::string lut_lookup = "lighting_lut_" + std::to_string(lut_num / 4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "]";
+            std::string lut_lookup = "texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "]";
 
             out += "specular_sum += (" + lut_lookup + " * light_src[" + std::to_string(num) + "].specular_0 * dist_atten);\n";
         }
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index a823729956..ab4b6c7b12 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -170,6 +170,14 @@ void OpenGLState::Apply() {
         }
     }
 
+    // Lighting LUTs
+    for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) {
+        if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) {
+            glActiveTexture(GL_TEXTURE3 + i);
+            glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d);
+        }
+    }
+
     // Framebuffer
     if (draw.framebuffer != cur_state.draw.framebuffer) {
         glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer);
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b8ab45bb8d..e848058d77 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -61,6 +61,10 @@ public:
         GLuint sampler; // GL_SAMPLER_BINDING
     } texture_units[3];
 
+    struct {
+        GLuint texture_1d; // GL_TEXTURE_BINDING_1D
+    } lighting_lut[6];
+
     struct {
         GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
         GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING

From 603b619cbe81ba1fc4dda83dfd88d99e53c95270 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 16 Nov 2015 20:56:28 -0500
Subject: [PATCH 10/32] gl_shader_gen: View vector needs to be normalized when
 computing half angle vector.

---
 src/video_core/renderer_opengl/gl_shader_gen.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index cb570c4d26..73de94457a 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -368,19 +368,19 @@ vec4 secondary_fragment_color = vec4(0.0);
         out += "vec3 specular_sum = vec3(0.0);\n";
         out += "vec3 fragment_position = -view;\n";
         out += "vec3 light_vector = vec3(0.0);\n";
+        out += "vec3 half_angle_vector = vec3(0.0);\n";
         out += "float dist_atten = 1.0;\n";
 
         // Gets the index into the specified lookup table for specular lighting
         auto GetLutIndex = [&](unsigned light_num, Regs::LightingLutInput input, bool abs) {
-            const std::string half_angle = "normalize(view + light_vector)";
             std::string index;
             switch (input) {
             case Regs::LightingLutInput::NH:
-                index  = "dot(normal, " + half_angle + ")";
+                index  = "dot(normal, half_angle_vector)";
                 break;
 
             case Regs::LightingLutInput::VH:
-                index = std::string("dot(view, " + half_angle + ")");
+                index = std::string("dot(view, half_angle_vector)");
                 break;
 
             case Regs::LightingLutInput::NV:
@@ -441,6 +441,7 @@ vec4 secondary_fragment_color = vec4(0.0);
             out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * dist_atten;\n";
 
             // Compute secondary fragment color (specular lighting) function
+            out += "half_angle_vector = normalize(normalize(view) + light_vector);\n";
             std::string clamped_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs);
             const unsigned lut_num = (unsigned)Regs::LightingSampler::Distribution0;
             std::string lut_lookup = "texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "]";

From bdc72d090458ab8af288304463ea75e975f1327d Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 18 Nov 2015 21:14:50 -0500
Subject: [PATCH 11/32] gl_shader_gen: Fix bug with lighting where clamp
 highlights was only applied to last light.

---
 src/video_core/renderer_opengl/gl_shader_gen.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 73de94457a..7821170dbb 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -369,6 +369,7 @@ vec4 secondary_fragment_color = vec4(0.0);
         out += "vec3 fragment_position = -view;\n";
         out += "vec3 light_vector = vec3(0.0);\n";
         out += "vec3 half_angle_vector = vec3(0.0);\n";
+        out += "float clamp_highlights = 1.0;\n";
         out += "float dist_atten = 1.0;\n";
 
         // Gets the index into the specified lookup table for specular lighting
@@ -446,17 +447,16 @@ vec4 secondary_fragment_color = vec4(0.0);
             const unsigned lut_num = (unsigned)Regs::LightingSampler::Distribution0;
             std::string lut_lookup = "texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "]";
 
-            out += "specular_sum += (" + lut_lookup + " * light_src[" + std::to_string(num) + "].specular_0 * dist_atten);\n";
-        }
+            if (config.clamp_highlights) {
+                out += "clamp_highlights = (dot(light_vector, normal) <= 0.0) ? 0.0 : 1.0;\n";
+            }
 
-        out += "float clamp_highlights = 1.0;\n";
-        if (config.clamp_highlights) {
-            out += "if (dot(light_vector, normal) <= 0.0) clamp_highlights = 0.0;\n";
+            out += "specular_sum += clamp_highlights * " + lut_lookup + " * light_src[" + std::to_string(num) + "].specular_0 * dist_atten;\n";
         }
 
         out += "diffuse_sum += lighting_global_ambient;\n";
         out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
-        out += "secondary_fragment_color = vec4(clamp(clamp_highlights * specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
+        out += "secondary_fragment_color = vec4(clamp(specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
     }
 
     // Do not do any sort of processing if it's obvious we're not going to pass the alpha test

From 5f3bad8fb19004eebc1aec7df295d9c807a64fef Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 18 Nov 2015 21:16:06 -0500
Subject: [PATCH 12/32] gl_shader_gen: Fix directional lights.

---
 src/video_core/renderer_opengl/gl_shader_gen.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 7821170dbb..8bc8e2b362 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -416,7 +416,7 @@ vec4 secondary_fragment_color = vec4(0.0);
             std::string light_src = "light_src[" + std::to_string(num) + "]";
 
             if (config.light_src[light_index].directional)
-                out += "light_vector = normalize(-" + light_src + ".position);\n";
+                out += "light_vector = normalize(" + light_src + ".position);\n";
             else
                 out += "light_vector = normalize(" + light_src + ".position - fragment_position);\n";
 

From 76f303538b8fd5c4bed1f5878058fb4c18fb045f Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 18 Nov 2015 22:36:01 -0500
Subject: [PATCH 13/32] gl_shader_gen: Reorganize and cleanup lighting code.

- No functional difference.
---
 .../renderer_opengl/gl_shader_gen.cpp         | 207 +++++++++---------
 1 file changed, 107 insertions(+), 100 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 8bc8e2b362..10cb2d0652 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -318,6 +318,111 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
         out += "next_combiner_buffer.a = last_tex_env_out.a;\n";
 }
 
+/// Writes the code to emulate fragment lighting
+static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
+    // Define lighting globals
+    out += "vec3 diffuse_sum = vec3(0.0);\n";
+    out += "vec3 specular_sum = vec3(0.0);\n";
+    out += "vec3 light_vector = vec3(0.0);\n";
+
+    // Convert interpolated quaternion to a GL fragment normal
+    out += "vec3 normal = normalize(vec3(\n";
+    out += "          2.f*(normquat.x*normquat.z + normquat.y*normquat.w),\n";
+    out += "          2.f*(normquat.y*normquat.z + normquat.x*normquat.w),\n";
+    out += "    1.f - 2.f*(normquat.x*normquat.x + normquat.y*normquat.y)));\n";
+
+    // Gets the index into the specified lookup table for specular lighting
+    auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) {
+        const std::string half_angle = "normalize(normalize(view) + light_vector)";
+        std::string index;
+        switch (input) {
+        case Regs::LightingLutInput::NH:
+            index = "dot(normal, " + half_angle + ")";
+            break;
+
+        case Regs::LightingLutInput::VH:
+            index = std::string("dot(view, " + half_angle + ")");
+            break;
+
+        case Regs::LightingLutInput::NV:
+            index = std::string("dot(normal, view)");
+            break;
+
+        case Regs::LightingLutInput::LN:
+            index = std::string("dot(light_vector, normal)");
+            break;
+
+        default:
+            LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input);
+            UNIMPLEMENTED();
+            break;
+        }
+
+        if (abs) {
+            // LUT index is in the range of (0.0, 1.0)
+            index = config.light_src[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
+            return "clamp(" + index + ", 0.0, FLOAT_255)";
+        } else {
+            // LUT index is in the range of (-1.0, 1.0)
+            index = "clamp(" + index + ", -1.0, 1.0)";
+            return "clamp(((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0, 0.0, FLOAT_255)";
+        }
+
+        return std::string();
+    };
+
+    // Gets the lighting lookup table value given the specified sampler and index
+    auto GetLutValue = [](Regs::LightingSampler sampler, std::string lut_index) {
+        return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " +
+                           lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]");
+    };
+
+    // Write the code to emulate each enabled light
+    for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) {
+        unsigned num = config.light_src[light_index].num;
+        const auto& light_config = config.light_src[light_index];
+        std::string light_src = "light_src[" + std::to_string(num) + "]";
+
+        // Compute light vector (directional or positional)
+        if (light_config.directional)
+            out += "light_vector = normalize(" + light_src + ".position);\n";
+        else
+            out += "light_vector = normalize(" + light_src + ".position + view);\n";
+
+        // Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided
+        std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)";
+
+        // If enabled, compute distance attenuation value
+        std::string dist_atten = "1.0";
+        if (light_config.dist_atten_enabled) {
+            std::string scale = std::to_string(light_config.dist_atten_scale);
+            std::string bias = std::to_string(light_config.dist_atten_bias);
+            std::string lut_index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")";
+            lut_index = "((clamp(" + lut_index + ", 0.0, FLOAT_255)))";
+            const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num);
+            dist_atten = GetLutValue((Regs::LightingSampler)lut_num, lut_index);
+        }
+
+        // Compute primary fragment color (diffuse lighting) function
+        out += "diffuse_sum += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n";
+
+        // If enabled, clamp specular component if lighting result is negative
+        std::string clamp_highlights = config.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
+
+        // Lookup specular distribution 0 LUT value
+        std::string d0_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs);
+        std::string d0_lut_value = GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index);
+
+        // Compute secondary fragment color (specular lighting) function
+        out += "specular_sum += " + clamp_highlights + " * " + d0_lut_value + " * " + light_src + ".specular_0 * " + dist_atten + ";\n";
+    }
+
+    // Sum final lighting result
+    out += "diffuse_sum += lighting_global_ambient;\n";
+    out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
+    out += "secondary_fragment_color = vec4(clamp(specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
+}
+
 std::string GenerateFragmentShader(const PicaShaderConfig& config) {
     std::string out = R"(
 #version 330 core
@@ -358,106 +463,8 @@ vec4 primary_fragment_color = vec4(0.0);
 vec4 secondary_fragment_color = vec4(0.0);
 )";
 
-    if (config.lighting_enabled) {
-        out += "vec3 normal = normalize(vec3(\n";
-        out += "          2.f*(normquat.x*normquat.z + normquat.y*normquat.w),\n";
-        out += "          2.f*(normquat.y*normquat.z + normquat.x*normquat.w),\n";
-        out += "    1.f - 2.f*(normquat.x*normquat.x + normquat.y*normquat.y)));\n";
-        out += "vec4 secondary_color = vec4(0.0);\n";
-        out += "vec3 diffuse_sum = vec3(0.0);\n";
-        out += "vec3 specular_sum = vec3(0.0);\n";
-        out += "vec3 fragment_position = -view;\n";
-        out += "vec3 light_vector = vec3(0.0);\n";
-        out += "vec3 half_angle_vector = vec3(0.0);\n";
-        out += "float clamp_highlights = 1.0;\n";
-        out += "float dist_atten = 1.0;\n";
-
-        // Gets the index into the specified lookup table for specular lighting
-        auto GetLutIndex = [&](unsigned light_num, Regs::LightingLutInput input, bool abs) {
-            std::string index;
-            switch (input) {
-            case Regs::LightingLutInput::NH:
-                index  = "dot(normal, half_angle_vector)";
-                break;
-
-            case Regs::LightingLutInput::VH:
-                index = std::string("dot(view, half_angle_vector)");
-                break;
-
-            case Regs::LightingLutInput::NV:
-                index = std::string("dot(normal, view)");
-                break;
-
-            case Regs::LightingLutInput::LN:
-                index  = std::string("dot(light_vector, normal)");
-                break;
-
-            default:
-                LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input);
-                UNIMPLEMENTED();
-                break;
-            }
-
-            if (abs) {
-                // In the range of [ 0.f, 1.f]
-                index = config.light_src[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
-                return "clamp(" + index + ", 0.0, FLOAT_255)";
-            } else {
-                // In the range of [-1.f, 1.f]
-                index = "clamp(" + index + ", -1.0, 1.0)";
-                return "clamp(((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0, 0.0, FLOAT_255)";
-            }
-
-            return std::string();
-        };
-
-        for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) {
-            unsigned num = config.light_src[light_index].num;
-            std::string light_src = "light_src[" + std::to_string(num) + "]";
-
-            if (config.light_src[light_index].directional)
-                out += "light_vector = normalize(" + light_src + ".position);\n";
-            else
-                out += "light_vector = normalize(" + light_src + ".position - fragment_position);\n";
-
-            std::string dot_product;
-            if (config.light_src[light_index].two_sided_diffuse)
-                dot_product = "abs(dot(light_vector, normal))";
-            else
-                dot_product = "max(dot(light_vector, normal), 0.0)";
-
-            // Compute distance attenuation value
-            out += "dist_atten = 1.0;\n";
-            if (config.light_src[light_index].dist_atten_enabled) {
-                std::string scale = std::to_string(config.light_src[light_index].dist_atten_scale);
-                std::string bias = std::to_string(config.light_src[light_index].dist_atten_bias);
-                std::string lut_index = "(" + scale + " * length(fragment_position - " + light_src + ".position) + " + bias + ")";
-                std::string clamped_lut_index = "((clamp(" + lut_index + ", 0.0, FLOAT_255)))";
-
-                const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num);
-                out += "dist_atten = texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "];\n";
-            }
-
-            // Compute primary fragment color (diffuse lighting) function
-            out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * dist_atten;\n";
-
-            // Compute secondary fragment color (specular lighting) function
-            out += "half_angle_vector = normalize(normalize(view) + light_vector);\n";
-            std::string clamped_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs);
-            const unsigned lut_num = (unsigned)Regs::LightingSampler::Distribution0;
-            std::string lut_lookup = "texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "]";
-
-            if (config.clamp_highlights) {
-                out += "clamp_highlights = (dot(light_vector, normal) <= 0.0) ? 0.0 : 1.0;\n";
-            }
-
-            out += "specular_sum += clamp_highlights * " + lut_lookup + " * light_src[" + std::to_string(num) + "].specular_0 * dist_atten;\n";
-        }
-
-        out += "diffuse_sum += lighting_global_ambient;\n";
-        out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
-        out += "secondary_fragment_color = vec4(clamp(specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
-    }
+    if (config.lighting_enabled)
+        WriteLighting(out, config);
 
     // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
     if (config.alpha_test_func == Regs::CompareFunc::Never) {

From 6878ba7608b14b6508f8de8f0070acdba6bb1837 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 18 Nov 2015 22:55:24 -0500
Subject: [PATCH 14/32] gl_rasterizer: Minor naming refactor on Pica register
 naming.

---
 src/video_core/pica.h                         | 31 ++++++++++---------
 .../renderer_opengl/gl_rasterizer.h           | 10 +++---
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index c63d87a36b..1808d4396c 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -715,26 +715,29 @@ struct Regs {
         union {
             BitField< 4, 4, u32> config;
             BitField<27, 1, u32> clamp_highlights;
-        } light_env;
+        };
 
         union {
             // Each bit specifies whether distance attenuation should be applied for the
             // corresponding light
 
-            BitField<24, 1, u32> light_0;
-            BitField<25, 1, u32> light_1;
-            BitField<26, 1, u32> light_2;
-            BitField<27, 1, u32> light_3;
-            BitField<28, 1, u32> light_4;
-            BitField<29, 1, u32> light_5;
-            BitField<30, 1, u32> light_6;
-            BitField<31, 1, u32> light_7;
+            BitField<24, 1, u32> dist_atten_enable_light_0;
+            BitField<25, 1, u32> dist_atten_enable_light_1;
+            BitField<26, 1, u32> dist_atten_enable_light_2;
+            BitField<27, 1, u32> dist_atten_enable_light_3;
+            BitField<28, 1, u32> dist_atten_enable_light_4;
+            BitField<29, 1, u32> dist_atten_enable_light_5;
+            BitField<30, 1, u32> dist_atten_enable_light_6;
+            BitField<31, 1, u32> dist_atten_enable_light_7;
+        };
 
-            bool IsEnabled(unsigned index) const {
-                const unsigned enable[] = { light_0, light_1, light_2, light_3, light_4, light_5, light_6, light_7 };
-                return enable[index] == 0;
-            }
-        } dist_atten_enable;
+        bool IsDistAttenEnabled(unsigned index) const {
+            const unsigned enable[] = { dist_atten_enable_light_0, dist_atten_enable_light_1,
+                                        dist_atten_enable_light_2, dist_atten_enable_light_3,
+                                        dist_atten_enable_light_4, dist_atten_enable_light_5,
+                                        dist_atten_enable_light_6, dist_atten_enable_light_7 };
+            return enable[index] == 0;
+        }
 
         union {
             BitField<0, 8, u32> index;      ///< Index at which to set data in the LUT
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index b50542701a..17bda2d1d6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,16 +80,16 @@ struct PicaShaderConfig {
             unsigned num = regs.lighting.light_enable.GetNum(light_index);
             const auto& light = regs.lighting.light[num];
             res.light_src[light_index].num = num;
-            res.light_src[light_index].directional = light.w;
-            res.light_src[light_index].two_sided_diffuse = light.two_sided_diffuse;
-            res.light_src[light_index].dist_atten_enabled = regs.lighting.dist_atten_enable.IsEnabled(num);
+            res.light_src[light_index].directional = light.w != 0;
+            res.light_src[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
+            res.light_src[light_index].dist_atten_enabled = regs.lighting.IsDistAttenEnabled(num);
             res.light_src[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32();
             res.light_src[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32();
         }
 
-        res.lighting_lut.d0_abs = (regs.lighting.abs_lut_input.d0 == 0);
+        res.lighting_lut.d0_abs = regs.lighting.abs_lut_input.d0 == 0;
         res.lighting_lut.d0_type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value();
-        res.clamp_highlights = regs.lighting.light_env.clamp_highlights;
+        res.clamp_highlights = regs.lighting.clamp_highlights != 0;
 
         return res;
     }

From 6307999116d250a9805c0d7ae2c131407772fc3f Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 18 Nov 2015 23:17:25 -0500
Subject: [PATCH 15/32] pica: Cleanup and add some comments to lighting
 registers.

---
 src/video_core/pica.h                         | 36 +++++++++----------
 .../renderer_opengl/gl_rasterizer.h           |  2 +-
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 1808d4396c..48854dda21 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -695,8 +695,8 @@ struct Regs {
                 INSERT_PADDING_WORDS(0x3);
 
                 union {
-                    BitField<0, 1, u32> w; // 1.f if 0, otherwise 0.f
-                    BitField<1, 1, u32> two_sided_diffuse; // when disabled, clamp dot-product to 0
+                    BitField<0, 1, u32> directional;
+                    BitField<1, 1, u32> two_sided_diffuse; // 1: GL_TRUE, 0: GL_FALSE; when disabled, clamp dot-product to 0
                 };
             };
 
@@ -714,21 +714,21 @@ struct Regs {
 
         union {
             BitField< 4, 4, u32> config;
-            BitField<27, 1, u32> clamp_highlights;
+            BitField<27, 1, u32> clamp_highlights; // 1: GL_TRUE, 0: GL_FALSE
         };
 
         union {
             // Each bit specifies whether distance attenuation should be applied for the
             // corresponding light
 
-            BitField<24, 1, u32> dist_atten_enable_light_0;
-            BitField<25, 1, u32> dist_atten_enable_light_1;
-            BitField<26, 1, u32> dist_atten_enable_light_2;
-            BitField<27, 1, u32> dist_atten_enable_light_3;
-            BitField<28, 1, u32> dist_atten_enable_light_4;
-            BitField<29, 1, u32> dist_atten_enable_light_5;
-            BitField<30, 1, u32> dist_atten_enable_light_6;
-            BitField<31, 1, u32> dist_atten_enable_light_7;
+            BitField<24, 1, u32> dist_atten_enable_light_0; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<25, 1, u32> dist_atten_enable_light_1; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<26, 1, u32> dist_atten_enable_light_2; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<27, 1, u32> dist_atten_enable_light_3; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<28, 1, u32> dist_atten_enable_light_4; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<29, 1, u32> dist_atten_enable_light_5; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<30, 1, u32> dist_atten_enable_light_6; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<31, 1, u32> dist_atten_enable_light_7; // 0: GL_TRUE, 1: GL_FALSE
         };
 
         bool IsDistAttenEnabled(unsigned index) const {
@@ -754,13 +754,13 @@ struct Regs {
         u32 lut_data[8];
 
         union {
-            BitField< 1, 1, u32> d0;
-            BitField< 5, 1, u32> d1;
-            BitField< 9, 1, u32> sp;
-            BitField<13, 1, u32> fr;
-            BitField<17, 1, u32> rb;
-            BitField<21, 1, u32> rg;
-            BitField<25, 1, u32> rr;
+            BitField< 1, 1, u32> d0; // 0: GL_TRUE, 1: GL_FALSE
+            BitField< 5, 1, u32> d1; // 0: GL_TRUE, 1: GL_FALSE
+            BitField< 9, 1, u32> sp; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<13, 1, u32> fr; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<17, 1, u32> rb; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<21, 1, u32> rg; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<25, 1, u32> rr; // 0: GL_TRUE, 1: GL_FALSE
         } abs_lut_input;
 
         union {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 17bda2d1d6..6be161efd4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,7 +80,7 @@ struct PicaShaderConfig {
             unsigned num = regs.lighting.light_enable.GetNum(light_index);
             const auto& light = regs.lighting.light[num];
             res.light_src[light_index].num = num;
-            res.light_src[light_index].directional = light.w != 0;
+            res.light_src[light_index].directional = light.directional != 0;
             res.light_src[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
             res.light_src[light_index].dist_atten_enabled = regs.lighting.IsDistAttenEnabled(num);
             res.light_src[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32();

From 3d89dacd56064c3c49cd1ae9482a0221f1912f56 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 18 Nov 2015 23:40:18 -0500
Subject: [PATCH 16/32] gl_shader_gen: Refactor lighting config to match Pica
 register naming.

- Also implement D0 LUT enable.
---
 src/video_core/pica.h                         |  2 +
 .../renderer_opengl/gl_rasterizer.h           | 56 ++++++++++---------
 .../renderer_opengl/gl_shader_gen.cpp         | 30 +++++-----
 3 files changed, 48 insertions(+), 40 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 48854dda21..b1cf072f11 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -718,6 +718,8 @@ struct Regs {
         };
 
         union {
+            BitField<16, 1, u32> lut_enable_d0; // 0: GL_TRUE, 1: GL_FALSE
+
             // Each bit specifies whether distance attenuation should be applied for the
             // corresponding light
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 6be161efd4..2042be786f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -73,23 +73,24 @@ struct PicaShaderConfig {
 
         // Fragment lighting
 
-        res.lighting_enabled = !regs.lighting.disable;
-        res.num_lights = regs.lighting.src_num + 1;
+        res.lighting.enable = !regs.lighting.disable;
+        res.lighting.src_num = regs.lighting.src_num + 1;
 
-        for (unsigned light_index = 0; light_index < res.num_lights; ++light_index) {
+        for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) {
             unsigned num = regs.lighting.light_enable.GetNum(light_index);
             const auto& light = regs.lighting.light[num];
-            res.light_src[light_index].num = num;
-            res.light_src[light_index].directional = light.directional != 0;
-            res.light_src[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
-            res.light_src[light_index].dist_atten_enabled = regs.lighting.IsDistAttenEnabled(num);
-            res.light_src[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32();
-            res.light_src[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32();
+            res.lighting.light[light_index].num = num;
+            res.lighting.light[light_index].directional = light.directional != 0;
+            res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
+            res.lighting.light[light_index].dist_atten_enable = regs.lighting.IsDistAttenEnabled(num);
+            res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32();
+            res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32();
         }
 
-        res.lighting_lut.d0_abs = regs.lighting.abs_lut_input.d0 == 0;
-        res.lighting_lut.d0_type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value();
-        res.clamp_highlights = regs.lighting.clamp_highlights != 0;
+        res.lighting.lut_d0.enable = regs.lighting.lut_enable_d0 == 0;
+        res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.d0 == 0;
+        res.lighting.lut_d0.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value();
+        res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
 
         return res;
     }
@@ -112,22 +113,25 @@ struct PicaShaderConfig {
         u8 combiner_buffer_input = 0;
 
         struct {
-            unsigned num = 0;
-            bool directional = false;
-            bool two_sided_diffuse = false;
-            bool dist_atten_enabled = false;
-            GLfloat dist_atten_scale = 0.0f;
-            GLfloat dist_atten_bias = 0.0f;
-        } light_src[8];
+            struct {
+                unsigned num = 0;
+                bool directional = false;
+                bool two_sided_diffuse = false;
+                bool dist_atten_enable = false;
+                GLfloat dist_atten_scale = 0.0f;
+                GLfloat dist_atten_bias = 0.0f;
+            } light[8];
 
-        bool lighting_enabled = false;
-        unsigned num_lights = 0;
-        bool clamp_highlights = false;
+            bool enable = false;
+            unsigned src_num = 0;
+            bool clamp_highlights = false;
 
-        struct {
-            bool d0_abs = false;
-            Pica::Regs::LightingLutInput d0_type = Pica::Regs::LightingLutInput::NH;
-        } lighting_lut;
+            struct {
+                bool enable = false;
+                bool abs_input = false;
+                Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH;
+            } lut_d0;
+        } lighting;
     };
 };
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 10cb2d0652..a2770cc6e0 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -360,7 +360,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
 
         if (abs) {
             // LUT index is in the range of (0.0, 1.0)
-            index = config.light_src[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
+            index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
             return "clamp(" + index + ", 0.0, FLOAT_255)";
         } else {
             // LUT index is in the range of (-1.0, 1.0)
@@ -378,10 +378,9 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
     };
 
     // Write the code to emulate each enabled light
-    for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) {
-        unsigned num = config.light_src[light_index].num;
-        const auto& light_config = config.light_src[light_index];
-        std::string light_src = "light_src[" + std::to_string(num) + "]";
+    for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) {
+        const auto& light_config = config.lighting.light[light_index];
+        std::string light_src = "light_src[" + std::to_string(light_config.num) + "]";
 
         // Compute light vector (directional or positional)
         if (light_config.directional)
@@ -394,12 +393,12 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
 
         // If enabled, compute distance attenuation value
         std::string dist_atten = "1.0";
-        if (light_config.dist_atten_enabled) {
+        if (light_config.dist_atten_enable) {
             std::string scale = std::to_string(light_config.dist_atten_scale);
             std::string bias = std::to_string(light_config.dist_atten_bias);
             std::string lut_index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")";
             lut_index = "((clamp(" + lut_index + ", 0.0, FLOAT_255)))";
-            const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num);
+            const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num);
             dist_atten = GetLutValue((Regs::LightingSampler)lut_num, lut_index);
         }
 
@@ -407,11 +406,14 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
         out += "diffuse_sum += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n";
 
         // If enabled, clamp specular component if lighting result is negative
-        std::string clamp_highlights = config.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
+        std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
 
-        // Lookup specular distribution 0 LUT value
-        std::string d0_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs);
-        std::string d0_lut_value = GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index);
+        // Lookup specular "distribution 0" LUT value
+        std::string d0_lut_value = "1.0";
+        if (config.lighting.lut_d0.enable) {
+            std::string d0_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input);
+            d0_lut_value = GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index);
+        }
 
         // Compute secondary fragment color (specular lighting) function
         out += "specular_sum += " + clamp_highlights + " * " + d0_lut_value + " * " + light_src + ".specular_0 * " + dist_atten + ";\n";
@@ -463,15 +465,15 @@ vec4 primary_fragment_color = vec4(0.0);
 vec4 secondary_fragment_color = vec4(0.0);
 )";
 
-    if (config.lighting_enabled)
-        WriteLighting(out, config);
-
     // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
     if (config.alpha_test_func == Regs::CompareFunc::Never) {
         out += "discard; }";
         return out;
     }
 
+    if (config.lighting.enable)
+        WriteLighting(out, config);
+
     out += "vec4 combiner_buffer = vec4(0.0);\n";
     out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n";
     out += "vec4 last_tex_env_out = vec4(0.0);\n";

From 781b0465795fb80404e2790be2d10bfb1f7149aa Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 19 Nov 2015 19:00:42 -0500
Subject: [PATCH 17/32] gl_shader_gen: Add support for D0 LUT scaling.

---
 src/video_core/pica.h                         | 68 ++++++++++++++++++-
 .../renderer_opengl/gl_rasterizer.h           |  4 +-
 .../renderer_opengl/gl_shader_gen.cpp         |  2 +-
 3 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index b1cf072f11..5d27da5d14 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -657,6 +657,44 @@ struct Regs {
         DistanceAttenuation = 16,
     };
 
+    /**
+     * Pica fragment lighting supports using different LUTs for each lighting component:
+     * Reflectance R, G, and B channels, distribution function for specular components 0 and 1,
+     * fresnel factor, and spotlight attenuation. Furthermore, which LUTs are used for each channel
+     * (or whether a channel is enabled at all) is specified by various pre-defined lighting
+     * configurations. With configurations that require more LUTs, more cycles are required on HW to
+     * perform lighting computations.
+     */
+    enum class LightingConfig {
+        Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight
+        Config1 = 1, ///< Reflect Red, Fresnel, Spotlight
+        Config2 = 2, ///< Reflect Red, Distribution 0/1
+        Config3 = 3, ///< Distribution 0/1, Fresnel
+        Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight
+        Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight
+        Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight
+        Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight
+                     ///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration
+    };
+
+    /// Selects which lighting components are affected by fresnel
+    enum class LightingFresnelSelector {
+        None = 0,                             ///< Fresnel is disabled
+        PrimaryAlpha = 1,                     ///< Primary (diffuse) lighting alpha is affected by fresnel
+        SecondaryAlpha = 2,                   ///< Secondary (specular) lighting alpha is affected by fresnel
+        Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel
+    };
+
+    /// Factor used to scale the output of a lighting LUT
+    enum class LightingScale {
+        Scale1 = 0,   ///< Scale is 1x
+        Scale2 = 1,   ///< Scale is 2x
+        Scale4 = 2,   ///< Scale is 4x
+        Scale8 = 3,   ///< Scale is 8x
+        Scale1_4 = 6, ///< Scale is 0.25x
+        Scale1_2 = 7, ///< Scale is 0.5x
+    };
+
     enum class LightingLutInput {
         NH = 0, // Cosine of the angle between the normal and half-angle vectors
         VH = 1, // Cosine of the angle between the view and half-angle vectors
@@ -775,7 +813,35 @@ struct Regs {
             BitField<24, 3, u32> rr;
         } lut_input;
 
-        INSERT_PADDING_WORDS(0x7);
+        union {
+            BitField< 0, 3, LightingScale> d0;
+            BitField< 4, 3, LightingScale> d1;
+            BitField< 8, 3, LightingScale> sp;
+            BitField<12, 3, LightingScale> fr;
+            BitField<16, 3, LightingScale> rb;
+            BitField<20, 3, LightingScale> rg;
+            BitField<24, 3, LightingScale> rr;
+
+            static float GetScale(LightingScale scale) {
+                switch (scale) {
+                case LightingScale::Scale1:
+                    return 1.0f;
+                case LightingScale::Scale2:
+                    return 2.0f;
+                case LightingScale::Scale4:
+                    return 4.0f;
+                case LightingScale::Scale8:
+                    return 8.0f;
+                case LightingScale::Scale1_4:
+                    return 0.25f;
+                case LightingScale::Scale1_2:
+                    return 0.5f;
+                }
+                return 0.0f;
+            }
+        } lut_scale;
+
+        INSERT_PADDING_WORDS(0x6);
 
         union {
             // There are 8 light enable "slots", corresponding to the total number of lights
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 2042be786f..72ded8f22e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -90,6 +90,7 @@ struct PicaShaderConfig {
         res.lighting.lut_d0.enable = regs.lighting.lut_enable_d0 == 0;
         res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.d0 == 0;
         res.lighting.lut_d0.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value();
+        res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
         res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
 
         return res;
@@ -130,7 +131,8 @@ struct PicaShaderConfig {
                 bool enable = false;
                 bool abs_input = false;
                 Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH;
-            } lut_d0;
+                float scale = 1.0f;
+            } lut_d0, lut_d1, lut_fr;
         } lighting;
     };
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index a2770cc6e0..9044a38137 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -412,7 +412,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
         std::string d0_lut_value = "1.0";
         if (config.lighting.lut_d0.enable) {
             std::string d0_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input);
-            d0_lut_value = GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index);
+            d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index) + ")";
         }
 
         // Compute secondary fragment color (specular lighting) function

From 0e67c21c9e5bb0e213d3b13bdd7592ff2a44a31c Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 19 Nov 2015 22:42:06 -0500
Subject: [PATCH 18/32] gl_shader_gen: Implement fragment lighting specular 1
 component.

---
 src/video_core/pica.h                         | 27 +++++++++++++------
 .../renderer_opengl/gl_rasterizer.h           |  8 ++++++
 .../renderer_opengl/gl_shader_gen.cpp         | 17 +++++++++---
 3 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 5d27da5d14..83af6a1271 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -713,6 +713,16 @@ struct Regs {
         }
     };
 
+    static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) {
+        switch (sampler) {
+        case LightingSampler::Distribution0:
+            return (config != LightingConfig::Config1);
+        case LightingSampler::Distribution1:
+            return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5);
+        }
+        return false;
+    }
+
     struct {
         struct LightSrc {
             LightColor specular_0;  // material.specular_0 * light.specular_0
@@ -751,12 +761,13 @@ struct Regs {
         BitField<0, 3, u32> src_num; // number of enabled lights - 1
 
         union {
-            BitField< 4, 4, u32> config;
+            BitField< 4, 4, LightingConfig> config;
             BitField<27, 1, u32> clamp_highlights; // 1: GL_TRUE, 0: GL_FALSE
         };
 
         union {
             BitField<16, 1, u32> lut_enable_d0; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<17, 1, u32> lut_enable_d1; // 0: GL_TRUE, 1: GL_FALSE
 
             // Each bit specifies whether distance attenuation should be applied for the
             // corresponding light
@@ -804,13 +815,13 @@ struct Regs {
         } abs_lut_input;
 
         union {
-            BitField< 0, 3, u32> d0;
-            BitField< 4, 3, u32> d1;
-            BitField< 8, 3, u32> sp;
-            BitField<12, 3, u32> fr;
-            BitField<16, 3, u32> rb;
-            BitField<20, 3, u32> rg;
-            BitField<24, 3, u32> rr;
+            BitField< 0, 3, LightingLutInput> d0;
+            BitField< 4, 3, LightingLutInput> d1;
+            BitField< 8, 3, LightingLutInput> sp;
+            BitField<12, 3, LightingLutInput> fr;
+            BitField<16, 3, LightingLutInput> rb;
+            BitField<20, 3, LightingLutInput> rg;
+            BitField<24, 3, LightingLutInput> rr;
         } lut_input;
 
         union {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 72ded8f22e..788618ed21 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -91,6 +91,13 @@ struct PicaShaderConfig {
         res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.d0 == 0;
         res.lighting.lut_d0.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value();
         res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
+
+        res.lighting.lut_d1.enable = regs.lighting.lut_enable_d1 == 0;
+        res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.d1 == 0;
+        res.lighting.lut_d1.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d1.Value();
+        res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
+
+        res.lighting.config = regs.lighting.config;
         res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
 
         return res;
@@ -126,6 +133,7 @@ struct PicaShaderConfig {
             bool enable = false;
             unsigned src_num = 0;
             bool clamp_highlights = false;
+            Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
 
             struct {
                 bool enable = false;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 9044a38137..4f8b675bf8 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -408,15 +408,26 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
         // If enabled, clamp specular component if lighting result is negative
         std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
 
-        // Lookup specular "distribution 0" LUT value
+        // Specular 0 component
         std::string d0_lut_value = "1.0";
-        if (config.lighting.lut_d0.enable) {
+        if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) {
+            // Lookup specular "distribution 0" LUT value
             std::string d0_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input);
             d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index) + ")";
         }
+        std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
+
+        // Specular 1 component
+        std::string d1_lut_value = "1.0";
+        if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) {
+            // Lookup specular "distribution 1" LUT value
+            std::string d1_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input);
+            d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, d1_lut_index) + ")";
+        }
+        std::string specular_1 = "(" + d1_lut_value + " * " + light_src + ".specular_1)";
 
         // Compute secondary fragment color (specular lighting) function
-        out += "specular_sum += " + clamp_highlights + " * " + d0_lut_value + " * " + light_src + ".specular_0 * " + dist_atten + ";\n";
+        out += "specular_sum += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n";
     }
 
     // Sum final lighting result

From c37de30cfc21cd6d742eed27a996a273f5ec2ca1 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 23 Nov 2015 20:26:09 -0500
Subject: [PATCH 19/32] gl_shader_gen: Implement fragment lighting fresnel
 effect.

---
 src/video_core/pica.h                         |  5 +++
 .../renderer_opengl/gl_rasterizer.h           |  7 ++++
 .../renderer_opengl/gl_shader_gen.cpp         | 35 ++++++++++++++-----
 3 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 83af6a1271..76db510388 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -713,12 +713,15 @@ struct Regs {
         }
     };
 
+    /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration
     static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) {
         switch (sampler) {
         case LightingSampler::Distribution0:
             return (config != LightingConfig::Config1);
         case LightingSampler::Distribution1:
             return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5);
+        case LightingSampler::Fresnel:
+            return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4);
         }
         return false;
     }
@@ -761,6 +764,7 @@ struct Regs {
         BitField<0, 3, u32> src_num; // number of enabled lights - 1
 
         union {
+            BitField< 2, 2, LightingFresnelSelector> fresnel_selector;
             BitField< 4, 4, LightingConfig> config;
             BitField<27, 1, u32> clamp_highlights; // 1: GL_TRUE, 0: GL_FALSE
         };
@@ -768,6 +772,7 @@ struct Regs {
         union {
             BitField<16, 1, u32> lut_enable_d0; // 0: GL_TRUE, 1: GL_FALSE
             BitField<17, 1, u32> lut_enable_d1; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<19, 1, u32> lut_enable_fr; // 0: GL_TRUE, 1: GL_FALSE
 
             // Each bit specifies whether distance attenuation should be applied for the
             // corresponding light
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 788618ed21..1d4d73ae19 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -97,7 +97,13 @@ struct PicaShaderConfig {
         res.lighting.lut_d1.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d1.Value();
         res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
 
+        res.lighting.lut_fr.enable = regs.lighting.lut_enable_fr == 0;
+        res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.fr == 0;
+        res.lighting.lut_fr.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.fr.Value();
+        res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
+
         res.lighting.config = regs.lighting.config;
+        res.lighting.fresnel_selector = regs.lighting.fresnel_selector;
         res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
 
         return res;
@@ -134,6 +140,7 @@ struct PicaShaderConfig {
             unsigned src_num = 0;
             bool clamp_highlights = false;
             Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
+            Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
 
             struct {
                 bool enable = false;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 4f8b675bf8..6487172b49 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -321,8 +321,8 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
 /// Writes the code to emulate fragment lighting
 static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
     // Define lighting globals
-    out += "vec3 diffuse_sum = vec3(0.0);\n";
-    out += "vec3 specular_sum = vec3(0.0);\n";
+    out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n";
+    out += "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n";
     out += "vec3 light_vector = vec3(0.0);\n";
 
     // Convert interpolated quaternion to a GL fragment normal
@@ -402,9 +402,6 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
             dist_atten = GetLutValue((Regs::LightingSampler)lut_num, lut_index);
         }
 
-        // Compute primary fragment color (diffuse lighting) function
-        out += "diffuse_sum += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n";
-
         // If enabled, clamp specular component if lighting result is negative
         std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
 
@@ -426,14 +423,34 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
         }
         std::string specular_1 = "(" + d1_lut_value + " * " + light_src + ".specular_1)";
 
+        // Fresnel
+        if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
+            // Lookup fresnel LUT value
+            std::string fr_lut_index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input);
+            std::string fr_lut_value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, fr_lut_index) + ")";
+
+            // Enabled for difffuse lighting alpha component
+            if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
+                config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::BothAlpha)
+                out += "diffuse_sum.a  *= " + fr_lut_value + ";\n";
+
+            // Enabled for the specular lighting alpha component
+            if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha ||
+                config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::BothAlpha)
+                out += "specular_sum.a *= " + fr_lut_value + ";\n";
+        }
+
+        // Compute primary fragment color (diffuse lighting) function
+        out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n";
+
         // Compute secondary fragment color (specular lighting) function
-        out += "specular_sum += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n";
+        out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n";
     }
 
     // Sum final lighting result
-    out += "diffuse_sum += lighting_global_ambient;\n";
-    out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
-    out += "secondary_fragment_color = vec4(clamp(specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n";
+    out += "diffuse_sum.rgb += lighting_global_ambient;\n";
+    out += "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n";
+    out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n";
 }
 
 std::string GenerateFragmentShader(const PicaShaderConfig& config) {

From 01b407638cdee1b2435018d730a698aa1c65d6a4 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 24 Nov 2015 22:59:14 -0500
Subject: [PATCH 20/32] gl_shader_gen: View should be normalized.

---
 src/video_core/renderer_opengl/gl_shader_gen.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 6487172b49..4f87c58461 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -341,11 +341,11 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
             break;
 
         case Regs::LightingLutInput::VH:
-            index = std::string("dot(view, " + half_angle + ")");
+            index = std::string("dot(normalize(view), " + half_angle + ")");
             break;
 
         case Regs::LightingLutInput::NV:
-            index = std::string("dot(normal, view)");
+            index = std::string("dot(normal, normalize(view))");
             break;
 
         case Regs::LightingLutInput::LN:

From 348c9c9ff32a493a005da72dba1832da118e4b0b Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 25 Nov 2015 20:25:02 -0500
Subject: [PATCH 21/32] gl_shader_gen: Implement lighting red, green, and blue
 reflection.

---
 src/video_core/pica.h                         | 18 +++++-
 .../renderer_opengl/gl_rasterizer.h           | 18 +++++-
 .../renderer_opengl/gl_shader_gen.cpp         | 62 ++++++++++++++-----
 3 files changed, 77 insertions(+), 21 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 76db510388..267070e457 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -650,9 +650,9 @@ struct Regs {
         Distribution0 = 0,
         Distribution1 = 1,
         Fresnel = 3,
-        Blue = 4,
-        Green = 5,
-        Red = 6,
+        ReflectBlue = 4,
+        ReflectGreen = 5,
+        ReflectRed = 6,
         SpotlightAttenuation = 8,
         DistanceAttenuation = 16,
     };
@@ -718,10 +718,19 @@ struct Regs {
         switch (sampler) {
         case LightingSampler::Distribution0:
             return (config != LightingConfig::Config1);
+
         case LightingSampler::Distribution1:
             return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5);
+
         case LightingSampler::Fresnel:
             return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4);
+
+        case LightingSampler::ReflectRed:
+            return (config != LightingConfig::Config3);
+
+        case LightingSampler::ReflectGreen:
+        case LightingSampler::ReflectBlue:
+            return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7);
         }
         return false;
     }
@@ -773,6 +782,9 @@ struct Regs {
             BitField<16, 1, u32> lut_enable_d0; // 0: GL_TRUE, 1: GL_FALSE
             BitField<17, 1, u32> lut_enable_d1; // 0: GL_TRUE, 1: GL_FALSE
             BitField<19, 1, u32> lut_enable_fr; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<20, 1, u32> lut_enable_rr; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<21, 1, u32> lut_enable_rg; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<22, 1, u32> lut_enable_rb; // 0: GL_TRUE, 1: GL_FALSE
 
             // Each bit specifies whether distance attenuation should be applied for the
             // corresponding light
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 1d4d73ae19..62a4d89534 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -102,6 +102,21 @@ struct PicaShaderConfig {
         res.lighting.lut_fr.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.fr.Value();
         res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
 
+        res.lighting.lut_rr.enable = regs.lighting.lut_enable_rr == 0;
+        res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.rr == 0;
+        res.lighting.lut_rr.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rr.Value();
+        res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
+
+        res.lighting.lut_rg.enable = regs.lighting.lut_enable_rg == 0;
+        res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.rg == 0;
+        res.lighting.lut_rg.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rg.Value();
+        res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
+
+        res.lighting.lut_rb.enable = regs.lighting.lut_enable_rb == 0;
+        res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.rb == 0;
+        res.lighting.lut_rb.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rb.Value();
+        res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
+
         res.lighting.config = regs.lighting.config;
         res.lighting.fresnel_selector = regs.lighting.fresnel_selector;
         res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
@@ -139,6 +154,7 @@ struct PicaShaderConfig {
             bool enable = false;
             unsigned src_num = 0;
             bool clamp_highlights = false;
+
             Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
             Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
 
@@ -147,7 +163,7 @@ struct PicaShaderConfig {
                 bool abs_input = false;
                 Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH;
                 float scale = 1.0f;
-            } lut_d0, lut_d1, lut_fr;
+            } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
         } lighting;
     };
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 4f87c58461..984aef5860 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -321,9 +321,10 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
 /// Writes the code to emulate fragment lighting
 static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
     // Define lighting globals
-    out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n";
-    out += "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n";
-    out += "vec3 light_vector = vec3(0.0);\n";
+    out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
+           "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
+           "vec3 light_vector = vec3(0.0);\n"
+           "vec3 refl_value = vec3(0.0);\n";
 
     // Convert interpolated quaternion to a GL fragment normal
     out += "vec3 normal = normalize(vec3(\n";
@@ -396,10 +397,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
         if (light_config.dist_atten_enable) {
             std::string scale = std::to_string(light_config.dist_atten_scale);
             std::string bias = std::to_string(light_config.dist_atten_bias);
-            std::string lut_index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")";
-            lut_index = "((clamp(" + lut_index + ", 0.0, FLOAT_255)))";
+            std::string index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")";
+            index = "((clamp(" + index + ", 0.0, FLOAT_255)))";
             const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num);
-            dist_atten = GetLutValue((Regs::LightingSampler)lut_num, lut_index);
+            dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index);
         }
 
         // If enabled, clamp specular component if lighting result is negative
@@ -409,35 +410,62 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
         std::string d0_lut_value = "1.0";
         if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) {
             // Lookup specular "distribution 0" LUT value
-            std::string d0_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input);
-            d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index) + ")";
+            std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input);
+            d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
         }
         std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
 
+        // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
+        if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) {
+            std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input);
+            std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
+            out += "refl_value.r = " + value + ";\n";
+        } else {
+            out += "refl_value.r = 1.0;\n";
+        }
+
+        // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
+        if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) {
+            std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input);
+            std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
+            out += "refl_value.g = " + value + ";\n";
+        } else {
+            out += "refl_value.g = refl_value.r;\n";
+        }
+
+        // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
+        if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) {
+            std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input);
+            std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
+            out += "refl_value.b = " + value + ";\n";
+        } else {
+            out += "refl_value.b = refl_value.r;\n";
+        }
+
         // Specular 1 component
         std::string d1_lut_value = "1.0";
         if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) {
             // Lookup specular "distribution 1" LUT value
-            std::string d1_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input);
-            d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, d1_lut_index) + ")";
+            std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input);
+            d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
         }
-        std::string specular_1 = "(" + d1_lut_value + " * " + light_src + ".specular_1)";
+        std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
 
         // Fresnel
         if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
             // Lookup fresnel LUT value
-            std::string fr_lut_index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input);
-            std::string fr_lut_value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, fr_lut_index) + ")";
+            std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input);
+            std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
 
             // Enabled for difffuse lighting alpha component
             if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
-                config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::BothAlpha)
-                out += "diffuse_sum.a  *= " + fr_lut_value + ";\n";
+                config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
+                out += "diffuse_sum.a  *= " + value + ";\n";
 
             // Enabled for the specular lighting alpha component
             if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha ||
-                config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::BothAlpha)
-                out += "specular_sum.a *= " + fr_lut_value + ";\n";
+                config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
+                out += "specular_sum.a *= " + value + ";\n";
         }
 
         // Compute primary fragment color (diffuse lighting) function

From 449902b5583d6a2dbb1e4aea9802da5ad2493981 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 25 Nov 2015 20:30:27 -0500
Subject: [PATCH 22/32] gl_shader_gen: Fix bug in LUT range (should within
 range [0, 255] not [0, 256]).

---
 src/video_core/renderer_opengl/gl_shader_gen.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 984aef5860..d59f2054b5 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -362,11 +362,11 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
         if (abs) {
             // LUT index is in the range of (0.0, 1.0)
             index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
-            return "clamp(" + index + ", 0.0, FLOAT_255)";
+            return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))";
         } else {
             // LUT index is in the range of (-1.0, 1.0)
             index = "clamp(" + index + ", -1.0, 1.0)";
-            return "clamp(((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0, 0.0, FLOAT_255)";
+            return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)";
         }
 
         return std::string();
@@ -487,7 +487,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
 #define NUM_TEV_STAGES 6
 #define NUM_LIGHTS 8
 #define LIGHTING_LUT_SIZE 256
-#define FLOAT_255 0.99609375
+#define FLOAT_255 (255.0 / 256.0)
 
 in vec4 primary_color;
 in vec2 texcoord[3];

From 9dfb223d26a7d700e38a4c0eec9d32d78c42f91d Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 25 Nov 2015 20:49:48 -0500
Subject: [PATCH 23/32] gl_rasterizer: Initial implementation of bump mapping.

---
 src/video_core/pica.h                         |  9 ++++++
 .../renderer_opengl/gl_rasterizer.h           |  6 ++++
 .../renderer_opengl/gl_shader_gen.cpp         | 32 ++++++++++++++++---
 3 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 267070e457..809b16d2b2 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -702,6 +702,12 @@ struct Regs {
         LN = 3, // Cosine of the angle between the light and the normal vectors
     };
 
+    enum class LightingBumpMode : u32 {
+        None = 0,
+        NormalMap = 1,
+        TangentMap = 2,
+    };
+
     union LightColor {
         BitField< 0, 10, u32> b;
         BitField<10, 10, u32> g;
@@ -775,7 +781,10 @@ struct Regs {
         union {
             BitField< 2, 2, LightingFresnelSelector> fresnel_selector;
             BitField< 4, 4, LightingConfig> config;
+            BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2
             BitField<27, 1, u32> clamp_highlights; // 1: GL_TRUE, 0: GL_FALSE
+            BitField<28, 2, LightingBumpMode> bump_mode; // 1: GL_TRUE, 0: GL_FALSE
+            BitField<30, 1, u32> bump_renorm; // 0: GL_TRUE, 1: GL_FALSE
         };
 
         union {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 62a4d89534..d7eac52138 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -119,6 +119,9 @@ struct PicaShaderConfig {
 
         res.lighting.config = regs.lighting.config;
         res.lighting.fresnel_selector = regs.lighting.fresnel_selector;
+        res.lighting.bump_mode = regs.lighting.bump_mode;
+        res.lighting.bump_selector = regs.lighting.bump_selector;
+        res.lighting.bump_renorm = regs.lighting.bump_renorm == 0;
         res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
 
         return res;
@@ -153,6 +156,9 @@ struct PicaShaderConfig {
 
             bool enable = false;
             unsigned src_num = 0;
+            Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None;
+            unsigned bump_selector = 0;
+            bool bump_renorm = false;
             bool clamp_highlights = false;
 
             Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index d59f2054b5..ee4b54ab99 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -326,11 +326,28 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
            "vec3 light_vector = vec3(0.0);\n"
            "vec3 refl_value = vec3(0.0);\n";
 
-    // Convert interpolated quaternion to a GL fragment normal
-    out += "vec3 normal = normalize(vec3(\n";
-    out += "          2.f*(normquat.x*normquat.z + normquat.y*normquat.w),\n";
-    out += "          2.f*(normquat.y*normquat.z + normquat.x*normquat.w),\n";
-    out += "    1.f - 2.f*(normquat.x*normquat.x + normquat.y*normquat.y)));\n";
+    // Compute fragment normals
+    if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
+        // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture
+        std::string bump_selector = std::to_string(config.lighting.bump_selector);
+        out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n";
+
+        // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result
+        if (config.lighting.bump_renorm) {
+            std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
+            out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
+        }
+    } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
+        // Bump mapping is enabled using a tangent map
+        LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
+        UNIMPLEMENTED();
+    } else {
+        // No bump mapping - surface local normal is just a unit normal
+        out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
+    }
+
+    // Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace
+    out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
 
     // Gets the index into the specified lookup table for specular lighting
     auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) {
@@ -516,6 +533,11 @@ layout (std140) uniform shader_data {
 uniform sampler2D tex[3];
 uniform sampler1D lut[6];
 
+// Rotate the vector v by the quaternion q
+vec3 quaternion_rotate(vec4 q, vec3 v) {
+    return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v);
+}
+
 void main() {
 vec4 primary_fragment_color = vec4(0.0);
 vec4 secondary_fragment_color = vec4(0.0);

From c229503f4a01b390f348d9f6c742921e7fc1ed48 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 14 Dec 2015 21:14:54 -0500
Subject: [PATCH 24/32] gl_rasterizer: Fix PicaShaderConfig on GCC.

---
 .../renderer_opengl/gl_rasterizer.h           | 56 +++++++++----------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d7eac52138..111448b706 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -139,39 +139,37 @@ struct PicaShaderConfig {
         return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0;
     };
 
+    Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never;
+    std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {};
+    u8 combiner_buffer_input = 0;
+
     struct {
-        Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never;
-        std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {};
-        u8 combiner_buffer_input = 0;
+        struct {
+            unsigned num = 0;
+            bool directional = false;
+            bool two_sided_diffuse = false;
+            bool dist_atten_enable = false;
+            GLfloat dist_atten_scale = 0.0f;
+            GLfloat dist_atten_bias = 0.0f;
+        } light[8];
+
+        bool enable = false;
+        unsigned src_num = 0;
+        Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None;
+        unsigned bump_selector = 0;
+        bool bump_renorm = false;
+        bool clamp_highlights = false;
+
+        Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
+        Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
 
         struct {
-            struct {
-                unsigned num = 0;
-                bool directional = false;
-                bool two_sided_diffuse = false;
-                bool dist_atten_enable = false;
-                GLfloat dist_atten_scale = 0.0f;
-                GLfloat dist_atten_bias = 0.0f;
-            } light[8];
-
             bool enable = false;
-            unsigned src_num = 0;
-            Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None;
-            unsigned bump_selector = 0;
-            bool bump_renorm = false;
-            bool clamp_highlights = false;
-
-            Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
-            Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
-
-            struct {
-                bool enable = false;
-                bool abs_input = false;
-                Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH;
-                float scale = 1.0f;
-            } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
-        } lighting;
-    };
+            bool abs_input = false;
+            Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH;
+            float scale = 1.0f;
+        } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
+    } lighting;
 };
 
 namespace std {

From 310a1c30ca430013621df77cf3e6a1a6d4513b98 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 14 Dec 2015 22:14:29 -0500
Subject: [PATCH 25/32] gl_rasterizer: Remove unnecessary casts.

---
 src/video_core/renderer_opengl/gl_rasterizer.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 111448b706..4e681f9ead 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -89,32 +89,32 @@ struct PicaShaderConfig {
 
         res.lighting.lut_d0.enable = regs.lighting.lut_enable_d0 == 0;
         res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.d0 == 0;
-        res.lighting.lut_d0.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value();
+        res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
         res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
 
         res.lighting.lut_d1.enable = regs.lighting.lut_enable_d1 == 0;
         res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.d1 == 0;
-        res.lighting.lut_d1.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d1.Value();
+        res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
         res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
 
         res.lighting.lut_fr.enable = regs.lighting.lut_enable_fr == 0;
         res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.fr == 0;
-        res.lighting.lut_fr.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.fr.Value();
+        res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
         res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
 
         res.lighting.lut_rr.enable = regs.lighting.lut_enable_rr == 0;
         res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.rr == 0;
-        res.lighting.lut_rr.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rr.Value();
+        res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
         res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
 
         res.lighting.lut_rg.enable = regs.lighting.lut_enable_rg == 0;
         res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.rg == 0;
-        res.lighting.lut_rg.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rg.Value();
+        res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
         res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
 
         res.lighting.lut_rb.enable = regs.lighting.lut_enable_rb == 0;
         res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.rb == 0;
-        res.lighting.lut_rb.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rb.Value();
+        res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
         res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
 
         res.lighting.config = regs.lighting.config;

From d171822dcecc7b234d63147270d21307605a6347 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 16 Dec 2015 18:49:20 -0500
Subject: [PATCH 26/32] command_processor: Add an assertion to ensure LUTs are
 not written past their boundaries.

---
 src/video_core/command_processor.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 7409534b6f..6540ccb264 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -474,6 +474,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
         case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf):
         {
             auto& lut_config = regs.lighting.lut_config;
+
+            ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!");
+
             g_state.lighting.luts[lut_config.type][lut_config.index].raw = value;
             lut_config.index = lut_config.index + 1;
             break;

From a949fd5f2560b94dc8e8571497d0cfbebdb6bed7 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 16 Dec 2015 23:23:50 -0500
Subject: [PATCH 27/32] pica_types: Replace float24/20/16 with a template
 class.

---
 src/video_core/clipper.cpp                    |   8 +-
 src/video_core/command_processor.cpp          |  16 +-
 src/video_core/pica_types.h                   | 158 +++++++-----------
 .../renderer_opengl/gl_rasterizer.cpp         |  14 +-
 .../renderer_opengl/gl_rasterizer.h           |   4 +-
 5 files changed, 83 insertions(+), 117 deletions(-)

diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 3a09d62f4e..a385589d2d 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -59,12 +59,12 @@ static void InitScreenCoordinates(OutputVertex& vtx)
     } viewport;
 
     const auto& regs = g_state.regs;
-    viewport.halfsize_x = float24::FromRawFloat24(regs.viewport_size_x);
-    viewport.halfsize_y = float24::FromRawFloat24(regs.viewport_size_y);
+    viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x);
+    viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y);
     viewport.offset_x   = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x));
     viewport.offset_y   = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y));
-    viewport.zscale     = float24::FromRawFloat24(regs.viewport_depth_range);
-    viewport.offset_z   = float24::FromRawFloat24(regs.viewport_depth_far_plane);
+    viewport.zscale     = float24::FromRaw(regs.viewport_depth_range);
+    viewport.offset_z   = float24::FromRaw(regs.viewport_depth_far_plane);
 
     float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
     vtx.color *= inv_w;
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 6540ccb264..5dfedfe311 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -98,10 +98,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
                 Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index];
 
                 // NOTE: The destination component order indeed is "backwards"
-                attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8);
-                attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
-                attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF));
-                attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF);
+                attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
+                attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
+                attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF));
+                attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
 
                 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
                           attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
@@ -418,10 +418,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
                         uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
                 } else {
                     // TODO: Untested
-                    uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8);
-                    uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
-                    uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
-                    uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF);
+                    uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
+                    uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
+                    uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
+                    uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
                 }
 
                 LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
index a34421c5d5..53f61f2871 100644
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@@ -4,35 +4,51 @@
 
 #pragma once
 
+#include <cstring>
+
 #include "common/common_types.h"
 
 namespace Pica {
 
-struct float24 {
-    static float24 FromFloat32(float val) {
-        float24 ret;
+/**
+ * Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision
+ * floating point.
+ *
+ * When decoding, format is as follows:
+ *  - The first `M` bits are the mantissa
+ *  - The next `E` bits are the exponent
+ *  - The last bit is the sign bit
+ *
+ * @todo Verify on HW if this conversion is sufficently accurate.
+ */
+template<unsigned M, unsigned E>
+struct Float {
+public:
+    static Float<M, E> FromFloat32(float val) {
+        Float<M, E> ret;
         ret.value = val;
         return ret;
     }
 
-    // 16 bit mantissa, 7 bit exponent, 1 bit sign
-    // TODO: No idea if this works as intended
-    static float24 FromRawFloat24(u32 hex) {
-        float24 ret;
-        if ((hex & 0xFFFFFF) == 0) {
-            ret.value = 0;
-        } else {
-            u32 mantissa = hex & 0xFFFF;
-            u32 exponent = (hex >> 16) & 0x7F;
-            u32 sign = hex >> 23;
-            ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f));
-            if (sign)
-                ret.value = -ret.value;
-        }
-        return ret;
+    static Float<M, E> FromRaw(u32 hex) {
+        Float<M, E> res;
+
+        const int width = M + E + 1;
+        const int bias = 128 - (1 << (E - 1));
+        const int exponent = (hex >> M) & ((1 << E) - 1);
+        const unsigned mantissa = hex & ((1 << M) - 1);
+
+        if (hex & ((1 << (width - 1)) - 1))
+            hex = ((hex >> (E + M)) << 31) | (mantissa << (23 - M)) | ((exponent + bias) << 23);
+        else
+            hex = ((hex >> (E + M)) << 31);
+
+        std::memcpy(&res.value, &hex, sizeof(float));
+
+        return res;
     }
 
-    static float24 Zero() {
+    static Float<M, E> Zero() {
         return FromFloat32(0.f);
     }
 
@@ -41,27 +57,27 @@ struct float24 {
         return value;
     }
 
-    float24 operator * (const float24& flt) const {
+    Float<M, E> operator * (const Float<M, E>& flt) const {
         if ((this->value == 0.f && !std::isnan(flt.value)) ||
             (flt.value == 0.f && !std::isnan(this->value)))
             // PICA gives 0 instead of NaN when multiplying by inf
             return Zero();
-        return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
+        return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32());
     }
 
-    float24 operator / (const float24& flt) const {
-        return float24::FromFloat32(ToFloat32() / flt.ToFloat32());
+    Float<M, E> operator / (const Float<M, E>& flt) const {
+        return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32());
     }
 
-    float24 operator + (const float24& flt) const {
-        return float24::FromFloat32(ToFloat32() + flt.ToFloat32());
+    Float<M, E> operator + (const Float<M, E>& flt) const {
+        return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32());
     }
 
-    float24 operator - (const float24& flt) const {
-        return float24::FromFloat32(ToFloat32() - flt.ToFloat32());
+    Float<M, E> operator - (const Float<M, E>& flt) const {
+        return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32());
     }
 
-    float24& operator *= (const float24& flt) {
+    Float<M, E>& operator *= (const Float<M, E>& flt) {
         if ((this->value == 0.f && !std::isnan(flt.value)) ||
             (flt.value == 0.f && !std::isnan(this->value)))
             // PICA gives 0 instead of NaN when multiplying by inf
@@ -70,111 +86,61 @@ struct float24 {
         return *this;
     }
 
-    float24& operator /= (const float24& flt) {
+    Float<M, E>& operator /= (const Float<M, E>& flt) {
         value /= flt.ToFloat32();
         return *this;
     }
 
-    float24& operator += (const float24& flt) {
+    Float<M, E>& operator += (const Float<M, E>& flt) {
         value += flt.ToFloat32();
         return *this;
     }
 
-    float24& operator -= (const float24& flt) {
+    Float<M, E>& operator -= (const Float<M, E>& flt) {
         value -= flt.ToFloat32();
         return *this;
     }
 
-    float24 operator - () const {
-        return float24::FromFloat32(-ToFloat32());
+    Float<M, E> operator - () const {
+        return Float<M, E>::FromFloat32(-ToFloat32());
     }
 
-    bool operator < (const float24& flt) const {
+    bool operator < (const Float<M, E>& flt) const {
         return ToFloat32() < flt.ToFloat32();
     }
 
-    bool operator > (const float24& flt) const {
+    bool operator > (const Float<M, E>& flt) const {
         return ToFloat32() > flt.ToFloat32();
     }
 
-    bool operator >= (const float24& flt) const {
+    bool operator >= (const Float<M, E>& flt) const {
         return ToFloat32() >= flt.ToFloat32();
     }
 
-    bool operator <= (const float24& flt) const {
+    bool operator <= (const Float<M, E>& flt) const {
         return ToFloat32() <= flt.ToFloat32();
     }
 
-    bool operator == (const float24& flt) const {
+    bool operator == (const Float<M, E>& flt) const {
         return ToFloat32() == flt.ToFloat32();
     }
 
-    bool operator != (const float24& flt) const {
+    bool operator != (const Float<M, E>& flt) const {
         return ToFloat32() != flt.ToFloat32();
     }
 
 private:
+    static const unsigned MASK = (1 << (M + E + 1)) - 1;
+    static const unsigned MANTISSA_MASK = (1 << M) - 1;
+    static const unsigned EXPONENT_MASK = (1 << E) - 1;
+
     // Stored as a regular float, merely for convenience
     // TODO: Perform proper arithmetic on this!
     float value;
 };
 
-static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float");
-
-struct float16 {
-    // 10 bit mantissa, 5 bit exponent, 1 bit sign
-    // TODO: No idea if this works as intended
-    static float16 FromRawFloat16(u32 hex) {
-        float16 ret;
-        if ((hex & 0xFFFF) == 0) {
-            ret.value = 0;
-        } else {
-            u32 mantissa = hex & 0x3FF;
-            u32 exponent = (hex >> 10) & 0x1F;
-            u32 sign = (hex >> 15) & 1;
-            ret.value = std::pow(2.0f, (float)exponent - 15.0f) * (1.0f + mantissa * std::pow(2.0f, -10.f));
-            if (sign)
-                ret.value = -ret.value;
-        }
-        return ret;
-    }
-
-    float ToFloat32() const {
-        return value;
-    }
-
-private:
-    // Stored as a regular float, merely for convenience
-    // TODO: Perform proper arithmetic on this!
-    float value;
-};
-
-struct float20 {
-    // 12 bit mantissa, 7 bit exponent, 1 bit sign
-    // TODO: No idea if this works as intended
-    static float20 FromRawFloat20(u32 hex) {
-        float20 ret;
-        if ((hex & 0xFFFFF) == 0) {
-            ret.value = 0;
-        } else {
-            u32 mantissa = hex & 0xFFF;
-            u32 exponent = (hex >> 12) & 0x7F;
-            u32 sign = (hex >> 19) & 1;
-            ret.value = std::pow(2.0f, (float)exponent - 63.0f) * (1.0f + mantissa * std::pow(2.0f, -12.f));
-            if (sign)
-                ret.value = -ret.value;
-        }
-        return ret;
-    }
-
-    float ToFloat32() const {
-        return value;
-    }
-
-private:
-    // Stored as a regular float, merely for convenience
-    // TODO: Perform proper arithmetic on this!
-    float value;
-};
+using float24 = Float<16, 7>;
+using float20 = Float<12, 7>;
+using float16 = Float<10, 5>;
 
 } // namespace Pica
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6e7d6a40d9..d70d62eded 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -810,8 +810,8 @@ void RasterizerOpenGL::SyncCullMode() {
 }
 
 void RasterizerOpenGL::SyncDepthModifiers() {
-    float depth_scale = -Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_range).ToFloat32();
-    float depth_offset = Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f;
+    float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
+    float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f;
 
     // TODO: Implement scale modifier
     uniform_block_data.data.depth_offset = depth_offset;
@@ -948,9 +948,9 @@ void RasterizerOpenGL::SyncLightAmbient(int light_index) {
 
 void RasterizerOpenGL::SyncLightPosition(int light_index) {
     std::array<GLfloat, 3> position = {
-        Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
-        Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
-        Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() };
+        Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
+        Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
+        Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() };
 
     if (position != uniform_block_data.data.light_src[light_index].position) {
         uniform_block_data.data.light_src[light_index].position = position;
@@ -962,8 +962,8 @@ void RasterizerOpenGL::SyncDrawState() {
     const auto& regs = Pica::g_state.regs;
 
     // Sync the viewport
-    GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2;
-    GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2;
+    GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
+    GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
 
     // OpenGL uses different y coordinates, so negate corner offset and flip origin
     // TODO: Ensure viewport_corner.x should not be negated or origin flipped
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4e681f9ead..b9c1d61bd4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -83,8 +83,8 @@ struct PicaShaderConfig {
             res.lighting.light[light_index].directional = light.directional != 0;
             res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
             res.lighting.light[light_index].dist_atten_enable = regs.lighting.IsDistAttenEnabled(num);
-            res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32();
-            res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32();
+            res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
+            res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
         }
 
         res.lighting.lut_d0.enable = regs.lighting.lut_enable_d0 == 0;

From b694423d09b618f245306d069b60cec44958565d Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 28 Dec 2015 21:03:53 -0500
Subject: [PATCH 28/32] pica_types: Fix typo in docstring.

---
 src/video_core/pica_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
index 53f61f2871..ecf45654bc 100644
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@@ -19,7 +19,7 @@ namespace Pica {
  *  - The next `E` bits are the exponent
  *  - The last bit is the sign bit
  *
- * @todo Verify on HW if this conversion is sufficently accurate.
+ * @todo Verify on HW if this conversion is sufficiently accurate.
  */
 template<unsigned M, unsigned E>
 struct Float {

From 8e9318f20a6fbcd511cf0f1b06b041ea1663467f Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 28 Jan 2016 23:29:33 -0500
Subject: [PATCH 29/32] gl_rasterizer: Fix issue with interpolation of opposite
 quaternions.

---
 .../renderer_opengl/gl_rasterizer.cpp         | 28 +++++++++++++++++--
 .../renderer_opengl/gl_rasterizer.h           |  8 +++++-
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index d70d62eded..6ed67efebd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -158,12 +158,34 @@ void RasterizerOpenGL::Reset() {
     res_cache.InvalidateAll();
 }
 
+/**
+ * This is a helper function to resolve an issue with opposite quaternions being interpolated by
+ * OpenGL. See below for a detailed description of this issue (yuriks):
+ *
+ * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
+ * interpolate two quaternions that are opposite, instead of going from one rotation to another
+ * using the shortest path, you'll go around the longest path. You can test if two quaternions are
+ * opposite by checking if Dot(Q1, W2) < 0. In that case, you can flip either of them, therefore
+ * making Dot(-Q1, W2) positive.
+ *
+ * NOTE: This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This
+ * should be correct for nearly all cases, however a more correct implementation (but less trivial
+ * and perhaps unnecessary) would be to handle this per-fragment, by interpolating the quaternions
+ * manually using two Lerps, and doing this correction before each Lerp.
+ */
+static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) {
+    Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() };
+    Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() };
+
+    return (Math::Dot(a, b) < 0.f);
+}
+
 void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0,
                                    const Pica::Shader::OutputVertex& v1,
                                    const Pica::Shader::OutputVertex& v2) {
-    vertex_batch.emplace_back(v0);
-    vertex_batch.emplace_back(v1);
-    vertex_batch.emplace_back(v2);
+    vertex_batch.emplace_back(v0, false);
+    vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
+    vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
 }
 
 void RasterizerOpenGL::DrawTriangles() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index b9c1d61bd4..99266854cd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -248,7 +248,7 @@ private:
 
     /// Structure that the hardware rendered vertices are composed of
     struct HardwareVertex {
-        HardwareVertex(const Pica::Shader::OutputVertex& v) {
+        HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
             position[0] = v.pos.x.ToFloat32();
             position[1] = v.pos.y.ToFloat32();
             position[2] = v.pos.z.ToFloat32();
@@ -270,6 +270,12 @@ private:
             view[0] = v.view.x.ToFloat32();
             view[1] = v.view.y.ToFloat32();
             view[2] = v.view.z.ToFloat32();
+
+            if (flip_quaternion) {
+                for (float& x : normquat) {
+                    x = -x;
+                }
+            }
         }
 
         GLfloat position[4];

From aaa7beeda8be312294a32e620a172c33cb231866 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 4 Feb 2016 00:03:20 -0500
Subject: [PATCH 30/32] renderer_opengl: Use GLvec3/GLvec4 aliases for commonly
 used types.

---
 .../renderer_opengl/gl_rasterizer.cpp         |  4 ++--
 .../renderer_opengl/gl_rasterizer.h           | 19 ++++++++++---------
 src/video_core/renderer_opengl/pica_to_gl.h   |  5 ++++-
 .../renderer_opengl/renderer_opengl.cpp       |  4 ++--
 4 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6ed67efebd..b7d19bf943 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -920,7 +920,7 @@ void RasterizerOpenGL::SyncGlobalAmbient() {
 }
 
 void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
-    std::array<std::array<GLfloat, 4>, 256> new_data;
+    std::array<GLvec4, 256> new_data;
 
     for (unsigned offset = 0; offset < new_data.size(); ++offset) {
         new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat();
@@ -969,7 +969,7 @@ void RasterizerOpenGL::SyncLightAmbient(int light_index) {
 }
 
 void RasterizerOpenGL::SyncLightPosition(int light_index) {
-    std::array<GLfloat, 3> position = {
+    GLvec3 position = {
         Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
         Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
         Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() };
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 99266854cd..e7fec30cfe 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -17,6 +17,7 @@
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/pica_to_gl.h"
 #include "video_core/shader/shader_interpreter.h"
 
 /**
@@ -288,27 +289,27 @@ private:
     };
 
     struct LightSrc {
-        std::array<GLfloat, 3> specular_0;
+        GLvec3 specular_0;
         INSERT_PADDING_WORDS(1);
-        std::array<GLfloat, 3> specular_1;
+        GLvec3 specular_1;
         INSERT_PADDING_WORDS(1);
-        std::array<GLfloat, 3> diffuse;
+        GLvec3 diffuse;
         INSERT_PADDING_WORDS(1);
-        std::array<GLfloat, 3> ambient;
+        GLvec3 ambient;
         INSERT_PADDING_WORDS(1);
-        std::array<GLfloat, 3> position;
+        GLvec3 position;
         INSERT_PADDING_WORDS(1);
     };
 
     /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned
     struct UniformData {
         // A vec4 color for each of the six tev stages
-        std::array<GLfloat, 4> const_color[6];
-        std::array<GLfloat, 4> tev_combiner_buffer_color;
+        GLvec4 const_color[6];
+        GLvec4 tev_combiner_buffer_color;
         GLint alphatest_ref;
         GLfloat depth_offset;
         INSERT_PADDING_WORDS(2);
-        std::array<GLfloat, 3> lighting_global_ambient;
+        GLvec3 lighting_global_ambient;
         INSERT_PADDING_WORDS(1);
         LightSrc light_src[8];
     };
@@ -434,5 +435,5 @@ private:
     OGLFramebuffer framebuffer;
 
     std::array<OGLTexture, 6> lighting_lut;
-    std::array<std::array<std::array<GLfloat, 4>, 256>, 6> lighting_lut_data;
+    std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
 };
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 346c9391da..3d6c4e9e53 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -10,6 +10,9 @@
 
 #include "video_core/pica.h"
 
+using GLvec3 = std::array<GLfloat, 3>;
+using GLvec4 = std::array<GLfloat, 4>;
+
 namespace PicaToGL {
 
 inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) {
@@ -175,7 +178,7 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) {
     return stencil_op_table[(unsigned)action];
 }
 
-inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) {
+inline GLvec4 ColorRGBA8(const u32 color) {
     return { { (color >>  0 & 0xFF) / 255.0f,
                (color >>  8 & 0xFF) / 255.0f,
                (color >> 16 & 0xFF) / 255.0f,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a6a38f0afc..ca3a6a6b49 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -81,8 +81,8 @@ struct ScreenRectVertex {
  * The projection part of the matrix is trivial, hence these operations are represented
  * by a 3x2 matrix.
  */
-static std::array<GLfloat, 3*2> MakeOrthographicMatrix(const float width, const float height) {
-    std::array<GLfloat, 3*2> matrix;
+static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) {
+    std::array<GLfloat, 3 * 2> matrix;
 
     matrix[0] = 2.f / width; matrix[2] = 0.f;           matrix[4] = -1.f;
     matrix[1] = 0.f;         matrix[3] = -2.f / height; matrix[5] = 1.f;

From c4d318f6915702e09866442f78d78747251779cb Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 4 Feb 2016 00:13:17 -0500
Subject: [PATCH 31/32] gl_rasterizer: Use alignas(16) instead of explicit
 padding.

---
 .../renderer_opengl/gl_rasterizer.h           | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index e7fec30cfe..208a7bcb62 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -289,16 +289,11 @@ private:
     };
 
     struct LightSrc {
-        GLvec3 specular_0;
-        INSERT_PADDING_WORDS(1);
-        GLvec3 specular_1;
-        INSERT_PADDING_WORDS(1);
-        GLvec3 diffuse;
-        INSERT_PADDING_WORDS(1);
-        GLvec3 ambient;
-        INSERT_PADDING_WORDS(1);
-        GLvec3 position;
-        INSERT_PADDING_WORDS(1);
+        alignas(16) GLvec3 specular_0;
+        alignas(16) GLvec3 specular_1;
+        alignas(16) GLvec3 diffuse;
+        alignas(16) GLvec3 ambient;
+        alignas(16) GLvec3 position;
     };
 
     /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned
@@ -308,9 +303,7 @@ private:
         GLvec4 tev_combiner_buffer_color;
         GLint alphatest_ref;
         GLfloat depth_offset;
-        INSERT_PADDING_WORDS(2);
-        GLvec3 lighting_global_ambient;
-        INSERT_PADDING_WORDS(1);
+        alignas(16) GLvec3 lighting_global_ambient;
         LightSrc light_src[8];
     };
 

From 19557aaab3434a9a6e9b6730a76923de053084cd Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 4 Feb 2016 21:51:56 -0500
Subject: [PATCH 32/32] pica: Cleanup lighting register definitions and
 documentation.

---
 src/video_core/pica.h                         | 69 ++++++++++---------
 .../renderer_opengl/gl_rasterizer.h           | 30 ++++----
 2 files changed, 51 insertions(+), 48 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 809b16d2b2..9077b1725c 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -762,7 +762,7 @@ struct Regs {
 
                 union {
                     BitField<0, 1, u32> directional;
-                    BitField<1, 1, u32> two_sided_diffuse; // 1: GL_TRUE, 0: GL_FALSE; when disabled, clamp dot-product to 0
+                    BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0
                 };
             };
 
@@ -774,46 +774,46 @@ struct Regs {
         static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words");
 
         LightSrc light[8];
-        LightColor global_ambient; // emission + (material.ambient * lighting.ambient)
+        LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
         INSERT_PADDING_WORDS(0x1);
-        BitField<0, 3, u32> src_num; // number of enabled lights - 1
+        BitField<0, 3, u32> num_lights; // Number of enabled lights - 1
 
         union {
             BitField< 2, 2, LightingFresnelSelector> fresnel_selector;
             BitField< 4, 4, LightingConfig> config;
             BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2
-            BitField<27, 1, u32> clamp_highlights; // 1: GL_TRUE, 0: GL_FALSE
-            BitField<28, 2, LightingBumpMode> bump_mode; // 1: GL_TRUE, 0: GL_FALSE
-            BitField<30, 1, u32> bump_renorm; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<27, 1, u32> clamp_highlights;
+            BitField<28, 2, LightingBumpMode> bump_mode;
+            BitField<30, 1, u32> disable_bump_renorm;
         };
 
         union {
-            BitField<16, 1, u32> lut_enable_d0; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<17, 1, u32> lut_enable_d1; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<19, 1, u32> lut_enable_fr; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<20, 1, u32> lut_enable_rr; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<21, 1, u32> lut_enable_rg; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<22, 1, u32> lut_enable_rb; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<16, 1, u32> disable_lut_d0;
+            BitField<17, 1, u32> disable_lut_d1;
+            BitField<19, 1, u32> disable_lut_fr;
+            BitField<20, 1, u32> disable_lut_rr;
+            BitField<21, 1, u32> disable_lut_rg;
+            BitField<22, 1, u32> disable_lut_rb;
 
             // Each bit specifies whether distance attenuation should be applied for the
             // corresponding light
 
-            BitField<24, 1, u32> dist_atten_enable_light_0; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<25, 1, u32> dist_atten_enable_light_1; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<26, 1, u32> dist_atten_enable_light_2; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<27, 1, u32> dist_atten_enable_light_3; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<28, 1, u32> dist_atten_enable_light_4; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<29, 1, u32> dist_atten_enable_light_5; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<30, 1, u32> dist_atten_enable_light_6; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<31, 1, u32> dist_atten_enable_light_7; // 0: GL_TRUE, 1: GL_FALSE
+            BitField<24, 1, u32> disable_dist_atten_light_0;
+            BitField<25, 1, u32> disable_dist_atten_light_1;
+            BitField<26, 1, u32> disable_dist_atten_light_2;
+            BitField<27, 1, u32> disable_dist_atten_light_3;
+            BitField<28, 1, u32> disable_dist_atten_light_4;
+            BitField<29, 1, u32> disable_dist_atten_light_5;
+            BitField<30, 1, u32> disable_dist_atten_light_6;
+            BitField<31, 1, u32> disable_dist_atten_light_7;
         };
 
-        bool IsDistAttenEnabled(unsigned index) const {
-            const unsigned enable[] = { dist_atten_enable_light_0, dist_atten_enable_light_1,
-                                        dist_atten_enable_light_2, dist_atten_enable_light_3,
-                                        dist_atten_enable_light_4, dist_atten_enable_light_5,
-                                        dist_atten_enable_light_6, dist_atten_enable_light_7 };
-            return enable[index] == 0;
+        bool IsDistAttenDisabled(unsigned index) const {
+            const unsigned disable[] = { disable_dist_atten_light_0, disable_dist_atten_light_1,
+                                         disable_dist_atten_light_2, disable_dist_atten_light_3,
+                                         disable_dist_atten_light_4, disable_dist_atten_light_5,
+                                         disable_dist_atten_light_6, disable_dist_atten_light_7 };
+            return disable[index] != 0;
         }
 
         union {
@@ -830,14 +830,17 @@ struct Regs {
         // registers is written to, the behavior will be the same.
         u32 lut_data[8];
 
+        // These are used to specify if absolute (abs) value should be used for each LUT index. When
+        // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in
+        // the range of (0.0, 1.0).
         union {
-            BitField< 1, 1, u32> d0; // 0: GL_TRUE, 1: GL_FALSE
-            BitField< 5, 1, u32> d1; // 0: GL_TRUE, 1: GL_FALSE
-            BitField< 9, 1, u32> sp; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<13, 1, u32> fr; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<17, 1, u32> rb; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<21, 1, u32> rg; // 0: GL_TRUE, 1: GL_FALSE
-            BitField<25, 1, u32> rr; // 0: GL_TRUE, 1: GL_FALSE
+            BitField< 1, 1, u32> disable_d0;
+            BitField< 5, 1, u32> disable_d1;
+            BitField< 9, 1, u32> disable_sp;
+            BitField<13, 1, u32> disable_fr;
+            BitField<17, 1, u32> disable_rb;
+            BitField<21, 1, u32> disable_rg;
+            BitField<25, 1, u32> disable_rr;
         } abs_lut_input;
 
         union {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 208a7bcb62..fef5f53315 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -75,7 +75,7 @@ struct PicaShaderConfig {
         // Fragment lighting
 
         res.lighting.enable = !regs.lighting.disable;
-        res.lighting.src_num = regs.lighting.src_num + 1;
+        res.lighting.src_num = regs.lighting.num_lights + 1;
 
         for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) {
             unsigned num = regs.lighting.light_enable.GetNum(light_index);
@@ -83,38 +83,38 @@ struct PicaShaderConfig {
             res.lighting.light[light_index].num = num;
             res.lighting.light[light_index].directional = light.directional != 0;
             res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
-            res.lighting.light[light_index].dist_atten_enable = regs.lighting.IsDistAttenEnabled(num);
+            res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
             res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
             res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
         }
 
-        res.lighting.lut_d0.enable = regs.lighting.lut_enable_d0 == 0;
-        res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.d0 == 0;
+        res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0;
+        res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
         res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
         res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
 
-        res.lighting.lut_d1.enable = regs.lighting.lut_enable_d1 == 0;
-        res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.d1 == 0;
+        res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0;
+        res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
         res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
         res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
 
-        res.lighting.lut_fr.enable = regs.lighting.lut_enable_fr == 0;
-        res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.fr == 0;
+        res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0;
+        res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
         res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
         res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
 
-        res.lighting.lut_rr.enable = regs.lighting.lut_enable_rr == 0;
-        res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.rr == 0;
+        res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0;
+        res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
         res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
         res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
 
-        res.lighting.lut_rg.enable = regs.lighting.lut_enable_rg == 0;
-        res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.rg == 0;
+        res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0;
+        res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
         res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
         res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
 
-        res.lighting.lut_rb.enable = regs.lighting.lut_enable_rb == 0;
-        res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.rb == 0;
+        res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0;
+        res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
         res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
         res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
 
@@ -122,7 +122,7 @@ struct PicaShaderConfig {
         res.lighting.fresnel_selector = regs.lighting.fresnel_selector;
         res.lighting.bump_mode = regs.lighting.bump_mode;
         res.lighting.bump_selector = regs.lighting.bump_selector;
-        res.lighting.bump_renorm = regs.lighting.bump_renorm == 0;
+        res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0;
         res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
 
         return res;