From 98317f2b7799360ee4e5e55cbbb123bce72fbf2c Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 3 Apr 2022 02:18:03 +0200
Subject: [PATCH] Decoders: Improve overall speed.

---
 src/video_core/textures/decoders.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index fcc636e0b9..52d067a2dc 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -35,7 +35,7 @@ void incrpdep(u32& value) {
 
 template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
 void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth,
-                 u32 block_height, u32 block_depth, u32 stride_alignment) {
+                 u32 block_height, u32 block_depth, u32 stride) {
     // The origin of the transformation can be configured here, leave it as zero as the current API
     // doesn't expose it.
     static constexpr u32 origin_x = 0;
@@ -45,7 +45,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
     // We can configure here a custom pitch
     // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
     const u32 pitch = width * BYTES_PER_PIXEL;
-    const u32 stride = Common::AlignUpLog2(width, stride_alignment) * BYTES_PER_PIXEL;
 
     const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
     const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
@@ -179,15 +178,23 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
 void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
                       u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
                       u32 stride_alignment) {
+    const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
+    const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
+    width = (width * bytes_per_pixel) >> new_bpp;
+    bytes_per_pixel = 1U << new_bpp;
     Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
-                   stride_alignment);
+                   stride);
 }
 
 void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
                     u32 height, u32 depth, u32 block_height, u32 block_depth,
                     u32 stride_alignment) {
+    const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
+    const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
+    width = (width * bytes_per_pixel) >> new_bpp;
+    bytes_per_pixel = 1U << new_bpp;
     Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
-                  stride_alignment);
+                  stride);
 }
 
 void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,