From 05ee37a1f0641a2e1a15e9052371a5ce0cd20058 Mon Sep 17 00:00:00 2001
From: Ameer J <52414509+ameerj@users.noreply.github.com>
Date: Tue, 1 Aug 2023 19:48:19 -0400
Subject: [PATCH] amd opts

---
 src/video_core/host_shaders/astc_decoder.comp | 29 +++++++++----------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index e8801b0ff4..ca93dc2a20 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -154,7 +154,7 @@ uint FastReplicateTo8(uint value, uint num_bits) {
         return 0;
     }
     const uint array_index = value / 4;
-    const uint vector_index = value % 4;
+    const uint vector_index = bitfieldExtract(value, 0, 2);
     switch (num_bits) {
     case 1:
         return 255;
@@ -213,7 +213,7 @@ uint FastReplicateTo6(uint value, uint num_bits) {
         return 0;
     }
     const uint array_index = value / 4;
-    const uint vector_index = value % 4;
+    const uint vector_index = bitfieldExtract(value, 0, 2);
     switch (num_bits) {
     case 1:
         return 63;
@@ -536,8 +536,8 @@ void DecodeIntegerSequence(uint max_range, uint num_values) {
     }
 }
 
-void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits,
-                       out uvec4 color_values[8]) {
+uvec4 color_values[8];
+void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits) {
     uint num_values = 0;
     for (uint i = 0; i < num_partitions; i++) {
         num_values += ((modes[i] >> 2) + 1) << 1;
@@ -664,10 +664,7 @@ ivec2 BitTransferSigned(int a, int b) {
 }
 
 uvec4 ClampByte(ivec4 color) {
-    for (uint i = 0; i < 4; ++i) {
-        color[i] = clamp(color[i], 0, 255);
-    }
-    return uvec4(color);
+    return uvec4(clamp(color, 0, 255));
 }
 
 ivec4 BlueContract(int a, int r, int g, int b) {
@@ -675,7 +672,7 @@ ivec4 BlueContract(int a, int r, int g, int b) {
 }
 
 void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode,
-                      in uvec4 color_values[8], inout uint colvals_index) {
+                      inout uint colvals_index) {
 #define READ_UINT_VALUES(N)                                                                        \
     uint v[N];                                                                                     \
     for (uint i = 0; i < N; i++) {                                                                 \
@@ -887,8 +884,9 @@ uint UnquantizeTexelWeight(EncodingData val) {
     return result;
 }
 
-void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane,
-                            out uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]) {
+uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE];
+
+void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
     const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
     const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
     const uint num_planes = is_dual_plane ? 2 : 1;
@@ -1205,11 +1203,11 @@ void DecompressBlock(ivec3 coord) {
         // This decode phase should at most push 32 elements into the vector
         result_vector_max_index = 32;
 
-        uvec4 color_values[8];
+        // uvec4 color_values[8];
         uint colvals_index = 0;
-        DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits, color_values);
+        DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits);
         for (uint i = 0; i < num_partitions; i++) {
-            ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], color_values,
+            ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i],
                              colvals_index);
         }
     }
@@ -1239,8 +1237,7 @@ void DecompressBlock(ivec3 coord) {
     }
     DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane));
 
-    uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE];
-    UnquantizeTexelWeights(params.size, params.dual_plane, unquantized_texel_weights);
+    UnquantizeTexelWeights(params.size, params.dual_plane);
     for (uint j = 0; j < block_dims.y; j++) {
         for (uint i = 0; i < block_dims.x; i++) {
             uint local_partition = 0;