diff --git a/src/common/color.h b/src/common/color.h
index 422fdc8af8..9dafdca0ca 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -208,7 +208,32 @@ inline void EncodeD24(u32 value, u8* bytes) {
  * @param bytes Pointer where to store the encoded value
  */
 inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
-    *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth;
+    bytes[0] = depth & 0xFF;
+    bytes[1] = (depth >> 8) & 0xFF;
+    bytes[2] = (depth >> 16) & 0xFF;
+    bytes[3] = stencil;
+}
+
+/**
+ * Encode a 24 bit depth value as D24X8 format (32 bits per pixel with 8 bits unused)
+ * @param depth 24 bit source depth value to encode
+ * @param bytes Pointer where to store the encoded value
+ * @note unused bits will not be modified
+ */
+inline void EncodeD24X8(u32 depth, u8* bytes) {
+    bytes[0] = depth & 0xFF;
+    bytes[1] = (depth >> 8) & 0xFF;
+    bytes[2] = (depth >> 16) & 0xFF;
+}
+
+/**
+ * Encode an 8 bit stencil value as X24S8 format (32 bits per pixel with 24 bits unused)
+ * @param stencil 8 bit source stencil value to encode
+ * @param bytes Pointer where to store the encoded value
+ * @note unused bits will not be modified
+ */
+inline void EncodeX24S8(u8 stencil, u8* bytes) {
+    bytes[3] = stencil;
 }
 
 } // namespace
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index feb20214ac..46a7b21dc9 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -420,6 +420,11 @@ struct Regs {
         GreaterThanOrEqual = 7,
     };
 
+    enum class StencilAction : u32 {
+        Keep = 0,
+        Xor  = 5,
+    };
+
     struct {
         union {
             // If false, logic blending is used
@@ -454,15 +459,35 @@ struct Regs {
             BitField< 8, 8, u32> ref;
         } alpha_test;
 
-        union {
-            BitField< 0, 1, u32> stencil_test_enable;
-            BitField< 4, 3, CompareFunc> stencil_test_func;
-            BitField< 8, 8, u32> stencil_replacement_value;
-            BitField<16, 8, u32> stencil_reference_value;
-            BitField<24, 8, u32> stencil_mask;
-        } stencil_test;
+        struct {
+            union {
+                // If true, enable stencil testing
+                BitField< 0, 1, u32> enable;
 
-        INSERT_PADDING_WORDS(0x1);
+                // Comparison operation for stencil testing
+                BitField< 4, 3, CompareFunc> func;
+
+                // Value to calculate the new stencil value from
+                BitField< 8, 8, u32> replacement_value;
+
+                // Value to compare against for stencil testing
+                BitField<16, 8, u32> reference_value;
+
+                // Mask to apply on stencil test inputs
+                BitField<24, 8, u32> mask;
+            };
+
+            union {
+                // Action to perform when the stencil test fails
+                BitField< 0, 3, StencilAction> action_stencil_fail;
+
+                // Action to perform when stencil testing passed but depth testing fails
+                BitField< 4, 3, StencilAction> action_depth_fail;
+
+                // Action to perform when both stencil and depth testing pass
+                BitField< 8, 3, StencilAction> action_depth_pass;
+            };
+        } stencil_test;
 
         union {
             BitField< 0, 1, u32> depth_test_enable;
@@ -512,7 +537,7 @@ struct Regs {
     struct {
         INSERT_PADDING_WORDS(0x6);
 
-        DepthFormat depth_format;
+        DepthFormat depth_format; // TODO: Should be a BitField!
         BitField<16, 3, ColorFormat> color_format;
 
         INSERT_PADDING_WORDS(0x4);
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 70b115744d..c381c2bd94 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) {
     }
 }
 
+static u8 GetStencil(int x, int y) {
+    const auto& framebuffer = g_state.regs.framebuffer;
+    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+    u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+    y = framebuffer.height - y;
+
+    const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
+    u32 stride = framebuffer.width * bytes_per_pixel;
+
+    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+    u8* src_pixel = depth_buffer + src_offset;
+
+    switch (framebuffer.depth_format) {
+        case Regs::DepthFormat::D24S8:
+            return Color::DecodeD24S8(src_pixel).y;
+
+        default:
+            LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format);
+            return 0;
+    }
+}
+
 static void SetDepth(int x, int y, u32 value) {
     const auto& framebuffer = g_state.regs.framebuffer;
     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
@@ -144,13 +168,15 @@ static void SetDepth(int x, int y, u32 value) {
         case Regs::DepthFormat::D16:
             Color::EncodeD16(value, dst_pixel);
             break;
+
         case Regs::DepthFormat::D24:
             Color::EncodeD24(value, dst_pixel);
             break;
+
         case Regs::DepthFormat::D24S8:
-            // TODO(Subv): Implement the stencil buffer
-            Color::EncodeD24S8(value, 0, dst_pixel);
+            Color::EncodeD24X8(value, dst_pixel);
             break;
+
         default:
             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
             UNIMPLEMENTED();
@@ -158,6 +184,53 @@ static void SetDepth(int x, int y, u32 value) {
     }
 }
 
+static void SetStencil(int x, int y, u8 value) {
+    const auto& framebuffer = g_state.regs.framebuffer;
+    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+    u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+    y = framebuffer.height - y;
+
+    const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
+    u32 stride = framebuffer.width * bytes_per_pixel;
+
+    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+    u8* dst_pixel = depth_buffer + dst_offset;
+
+    switch (framebuffer.depth_format) {
+        case Pica::Regs::DepthFormat::D16:
+        case Pica::Regs::DepthFormat::D24:
+            // Nothing to do
+            break;
+
+        case Pica::Regs::DepthFormat::D24S8:
+            Color::EncodeX24S8(value, dst_pixel);
+            break;
+
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
+            UNIMPLEMENTED();
+            break;
+    }
+}
+
+// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not!
+static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) {
+    switch (action) {
+    case Regs::StencilAction::Keep:
+        return dest;
+
+    case Regs::StencilAction::Xor:
+        return dest ^ ref;
+
+    default:
+        LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
+        UNIMPLEMENTED();
+        return 0;
+    }
+}
+
 // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 struct Fix12P4 {
     Fix12P4() {}
@@ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
     auto textures = regs.GetTextures();
     auto tev_stages = regs.GetTevStages();
 
+    bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8;
+    const auto stencil_test = g_state.regs.output_merger.stencil_test;
+
     // Enter rasterization loop, starting at the center of the topleft bounding box corner.
     // TODO: Not sure if looping through x first might be faster
     for (u16 y = min_y + 8; y < max_y; y += 0x10) {
@@ -647,6 +723,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
             }
 
             const auto& output_merger = regs.output_merger;
+            // TODO: Does alpha testing happen before or after stencil?
             if (output_merger.alpha_test.enable) {
                 bool pass = false;
 
@@ -688,6 +765,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                     continue;
             }
 
+            u8 old_stencil = 0;
+            if (stencil_action_enable) {
+                old_stencil = GetStencil(x >> 4, y >> 4);
+                u8 dest = old_stencil & stencil_test.mask;
+                u8 ref = stencil_test.reference_value & stencil_test.mask;
+
+                bool pass = false;
+                switch (stencil_test.func) {
+                case Regs::CompareFunc::Never:
+                    pass = false;
+                    break;
+
+                case Regs::CompareFunc::Always:
+                    pass = true;
+                    break;
+
+                case Regs::CompareFunc::Equal:
+                    pass = (ref == dest);
+                    break;
+
+                case Regs::CompareFunc::NotEqual:
+                    pass = (ref != dest);
+                    break;
+
+                case Regs::CompareFunc::LessThan:
+                    pass = (ref < dest);
+                    break;
+
+                case Regs::CompareFunc::LessThanOrEqual:
+                    pass = (ref <= dest);
+                    break;
+
+                case Regs::CompareFunc::GreaterThan:
+                    pass = (ref > dest);
+                    break;
+
+                case Regs::CompareFunc::GreaterThanOrEqual:
+                    pass = (ref >= dest);
+                    break;
+                }
+
+                if (!pass) {
+                    u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value);
+                    SetStencil(x >> 4, y >> 4, new_stencil);
+                    continue;
+                }
+            }
+
             // TODO: Does depth indeed only get written even if depth testing is enabled?
             if (output_merger.depth_test_enable) {
                 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
@@ -732,11 +857,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                     break;
                 }
 
-                if (!pass)
+                if (!pass) {
+                    if (stencil_action_enable) {
+                        u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value);
+                        SetStencil(x >> 4, y >> 4, new_stencil);
+                    }
                     continue;
+                }
 
                 if (output_merger.depth_write_enable)
                     SetDepth(x >> 4, y >> 4, z);
+
+                if (stencil_action_enable) {
+                    // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway?
+                    u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value);
+                    SetStencil(x >> 4, y >> 4, new_stencil);
+                }
             }
 
             auto dest = GetPixel(x >> 4, y >> 4);