From b8c906f9d1acbd59c264bd3ad335c6d75f92da5c Mon Sep 17 00:00:00 2001
From: Morph <39850852+Morph1984@users.noreply.github.com>
Date: Sat, 24 Jun 2023 19:58:45 -0400
Subject: [PATCH 1/5] scratch_buffer: Add member types to ScratchBuffer

Allows for implicit conversion to std::span<T>.
---
 src/common/scratch_buffer.h | 46 +++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h
index 6fe9079531..d5961b0208 100644
--- a/src/common/scratch_buffer.h
+++ b/src/common/scratch_buffer.h
@@ -5,7 +5,6 @@
 
 #include <iterator>
 
-#include "common/concepts.h"
 #include "common/make_unique_for_overwrite.h"
 
 namespace Common {
@@ -19,15 +18,22 @@ namespace Common {
 template <typename T>
 class ScratchBuffer {
 public:
-    using iterator = T*;
-    using const_iterator = const T*;
-    using value_type = T;
     using element_type = T;
-    using iterator_category = std::contiguous_iterator_tag;
+    using value_type = T;
+    using size_type = size_t;
+    using difference_type = std::ptrdiff_t;
+    using pointer = T*;
+    using const_pointer = const T*;
+    using reference = T&;
+    using const_reference = const T&;
+    using iterator = pointer;
+    using const_iterator = const_pointer;
+    using iterator_category = std::random_access_iterator_tag;
+    using iterator_concept = std::contiguous_iterator_tag;
 
     ScratchBuffer() = default;
 
-    explicit ScratchBuffer(size_t initial_capacity)
+    explicit ScratchBuffer(size_type initial_capacity)
         : last_requested_size{initial_capacity}, buffer_capacity{initial_capacity},
           buffer{Common::make_unique_for_overwrite<T[]>(initial_capacity)} {}
 
@@ -39,7 +45,7 @@ public:
 
     /// This will only grow the buffer's capacity if size is greater than the current capacity.
     /// The previously held data will remain intact.
-    void resize(size_t size) {
+    void resize(size_type size) {
         if (size > buffer_capacity) {
             auto new_buffer = Common::make_unique_for_overwrite<T[]>(size);
             std::move(buffer.get(), buffer.get() + buffer_capacity, new_buffer.get());
@@ -51,7 +57,7 @@ public:
 
     /// This will only grow the buffer's capacity if size is greater than the current capacity.
     /// The previously held data will be destroyed if a reallocation occurs.
-    void resize_destructive(size_t size) {
+    void resize_destructive(size_type size) {
         if (size > buffer_capacity) {
             buffer_capacity = size;
             buffer = Common::make_unique_for_overwrite<T[]>(buffer_capacity);
@@ -59,43 +65,43 @@ public:
         last_requested_size = size;
     }
 
-    [[nodiscard]] T* data() noexcept {
+    [[nodiscard]] pointer data() noexcept {
         return buffer.get();
     }
 
-    [[nodiscard]] const T* data() const noexcept {
+    [[nodiscard]] const_pointer data() const noexcept {
         return buffer.get();
     }
 
-    [[nodiscard]] T* begin() noexcept {
+    [[nodiscard]] iterator begin() noexcept {
         return data();
     }
 
-    [[nodiscard]] const T* begin() const noexcept {
+    [[nodiscard]] const_iterator begin() const noexcept {
         return data();
     }
 
-    [[nodiscard]] T* end() noexcept {
+    [[nodiscard]] iterator end() noexcept {
         return data() + last_requested_size;
     }
 
-    [[nodiscard]] const T* end() const noexcept {
+    [[nodiscard]] const_iterator end() const noexcept {
         return data() + last_requested_size;
     }
 
-    [[nodiscard]] T& operator[](size_t i) {
+    [[nodiscard]] reference operator[](size_type i) {
         return buffer[i];
     }
 
-    [[nodiscard]] const T& operator[](size_t i) const {
+    [[nodiscard]] const_reference operator[](size_type i) const {
         return buffer[i];
     }
 
-    [[nodiscard]] size_t size() const noexcept {
+    [[nodiscard]] size_type size() const noexcept {
         return last_requested_size;
     }
 
-    [[nodiscard]] size_t capacity() const noexcept {
+    [[nodiscard]] size_type capacity() const noexcept {
         return buffer_capacity;
     }
 
@@ -106,8 +112,8 @@ public:
     }
 
 private:
-    size_t last_requested_size{};
-    size_t buffer_capacity{};
+    size_type last_requested_size{};
+    size_type buffer_capacity{};
     std::unique_ptr<T[]> buffer{};
 };
 

From fbd85417ffdfe23dd4b4d3d13518244bd00be361 Mon Sep 17 00:00:00 2001
From: Morph <39850852+Morph1984@users.noreply.github.com>
Date: Sat, 24 Jun 2023 20:31:20 -0400
Subject: [PATCH 2/5] ring_buffer: Fix const usage on std::span

---
 src/common/ring_buffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h
index 416680d445..5c961b202f 100644
--- a/src/common/ring_buffer.h
+++ b/src/common/ring_buffer.h
@@ -54,7 +54,7 @@ public:
         return push_count;
     }
 
-    std::size_t Push(const std::span<T> input) {
+    std::size_t Push(std::span<const T> input) {
         return Push(input.data(), input.size());
     }
 

From 310b6cf4af940fa07666400426bbcca815c5375c Mon Sep 17 00:00:00 2001
From: Morph <39850852+Morph1984@users.noreply.github.com>
Date: Sat, 24 Jun 2023 21:58:23 -0400
Subject: [PATCH 3/5] general: Use ScratchBuffer where possible

---
 src/core/hle/service/audio/audin_u.cpp        | 16 ++++++------
 src/core/hle/service/audio/audout_u.cpp       | 20 ++++++++-------
 src/core/hle/service/audio/audren_u.cpp       | 23 +++++++++--------
 src/core/hle/service/audio/hwopus.cpp         |  9 ++++---
 .../hle/service/nvdrv/nvdrv_interface.cpp     | 25 ++++++++++---------
 src/core/hle/service/nvdrv/nvdrv_interface.h  |  5 ++--
 src/video_core/host1x/codecs/codec.cpp        |  2 +-
 src/video_core/host1x/codecs/h264.cpp         | 14 +++++------
 src/video_core/host1x/codecs/h264.h           | 12 ++++++---
 src/video_core/host1x/codecs/vp8.cpp          |  2 +-
 src/video_core/host1x/codecs/vp8.h            |  7 +++---
 src/video_core/host1x/codecs/vp9.cpp          |  1 +
 src/video_core/host1x/codecs/vp9.h            |  8 +++---
 src/video_core/host1x/codecs/vp9_types.h      |  1 +
 14 files changed, 81 insertions(+), 64 deletions(-)

diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp
index c8d574993b..526a391307 100644
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -5,7 +5,7 @@
 #include "audio_core/renderer/audio_device.h"
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
-#include "common/settings.h"
+#include "common/scratch_buffer.h"
 #include "common/string_util.h"
 #include "core/core.h"
 #include "core/hle/kernel/k_event.h"
@@ -124,12 +124,15 @@ private:
 
     void GetReleasedAudioInBuffer(HLERequestContext& ctx) {
         const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
-        tmp_buffer.resize_destructive(write_buffer_size);
-        tmp_buffer[0] = 0;
+        released_buffer.resize_destructive(write_buffer_size);
+        released_buffer[0] = 0;
 
-        const auto count = impl->GetReleasedBuffers(tmp_buffer);
+        const auto count = impl->GetReleasedBuffers(released_buffer);
 
-        ctx.WriteBuffer(tmp_buffer);
+        LOG_TRACE(Service_Audio, "called. Session {} released {} buffers",
+                  impl->GetSystem().GetSessionId(), count);
+
+        ctx.WriteBuffer(released_buffer);
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(ResultSuccess);
@@ -155,7 +158,6 @@ private:
         LOG_DEBUG(Service_Audio, "called. Buffer count={}", buffer_count);
 
         IPC::ResponseBuilder rb{ctx, 3};
-
         rb.Push(ResultSuccess);
         rb.Push(buffer_count);
     }
@@ -195,7 +197,7 @@ private:
     KernelHelpers::ServiceContext service_context;
     Kernel::KEvent* event;
     std::shared_ptr<AudioCore::AudioIn::In> impl;
-    Common::ScratchBuffer<u64> tmp_buffer;
+    Common::ScratchBuffer<u64> released_buffer;
 };
 
 AudInU::AudInU(Core::System& system_)
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 032c8c11f5..23f84a29f7 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -9,6 +9,7 @@
 #include "audio_core/renderer/audio_device.h"
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
+#include "common/scratch_buffer.h"
 #include "common/string_util.h"
 #include "common/swap.h"
 #include "core/core.h"
@@ -102,8 +103,8 @@ private:
         AudioOutBuffer buffer{};
         std::memcpy(&buffer, in_buffer.data(), sizeof(AudioOutBuffer));
 
-        [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()};
-        LOG_TRACE(Service_Audio, "called. Session {} Appending buffer {:08X}", sessionid, tag);
+        LOG_TRACE(Service_Audio, "called. Session {} Appending buffer {:08X}",
+                  impl->GetSystem().GetSessionId(), tag);
 
         auto result = impl->AppendBuffer(buffer, tag);
 
@@ -123,12 +124,15 @@ private:
 
     void GetReleasedAudioOutBuffers(HLERequestContext& ctx) {
         const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
-        tmp_buffer.resize_destructive(write_buffer_size);
-        tmp_buffer[0] = 0;
+        released_buffer.resize_destructive(write_buffer_size);
+        released_buffer[0] = 0;
 
-        const auto count = impl->GetReleasedBuffers(tmp_buffer);
+        const auto count = impl->GetReleasedBuffers(released_buffer);
 
-        ctx.WriteBuffer(tmp_buffer);
+        ctx.WriteBuffer(released_buffer);
+
+        LOG_TRACE(Service_Audio, "called. Session {} released {} buffers",
+                  impl->GetSystem().GetSessionId(), count);
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(ResultSuccess);
@@ -154,7 +158,6 @@ private:
         LOG_DEBUG(Service_Audio, "called. Buffer count={}", buffer_count);
 
         IPC::ResponseBuilder rb{ctx, 3};
-
         rb.Push(ResultSuccess);
         rb.Push(buffer_count);
     }
@@ -165,7 +168,6 @@ private:
         LOG_DEBUG(Service_Audio, "called. Played samples={}", samples_played);
 
         IPC::ResponseBuilder rb{ctx, 4};
-
         rb.Push(ResultSuccess);
         rb.Push(samples_played);
     }
@@ -205,7 +207,7 @@ private:
     KernelHelpers::ServiceContext service_context;
     Kernel::KEvent* event;
     std::shared_ptr<AudioCore::AudioOut::Out> impl;
-    Common::ScratchBuffer<u64> tmp_buffer;
+    Common::ScratchBuffer<u64> released_buffer;
 };
 
 AudOutU::AudOutU(Core::System& system_)
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 12845c23a3..003870176b 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -15,6 +15,7 @@
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
 #include "common/polyfill_ranges.h"
+#include "common/scratch_buffer.h"
 #include "common/string_util.h"
 #include "core/core.h"
 #include "core/hle/kernel/k_event.h"
@@ -119,23 +120,23 @@ private:
         auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0};
         if (is_buffer_b) {
             const auto buffersB{ctx.BufferDescriptorB()};
-            tmp_output.resize_destructive(buffersB[0].Size());
-            tmp_performance.resize_destructive(buffersB[1].Size());
+            output_buffer.resize_destructive(buffersB[0].Size());
+            performance_buffer.resize_destructive(buffersB[1].Size());
         } else {
             const auto buffersC{ctx.BufferDescriptorC()};
-            tmp_output.resize_destructive(buffersC[0].Size());
-            tmp_performance.resize_destructive(buffersC[1].Size());
+            output_buffer.resize_destructive(buffersC[0].Size());
+            performance_buffer.resize_destructive(buffersC[1].Size());
         }
 
-        auto result = impl->RequestUpdate(input, tmp_performance, tmp_output);
+        auto result = impl->RequestUpdate(input, performance_buffer, output_buffer);
 
         if (result.IsSuccess()) {
             if (is_buffer_b) {
-                ctx.WriteBufferB(tmp_output.data(), tmp_output.size(), 0);
-                ctx.WriteBufferB(tmp_performance.data(), tmp_performance.size(), 1);
+                ctx.WriteBufferB(output_buffer.data(), output_buffer.size(), 0);
+                ctx.WriteBufferB(performance_buffer.data(), performance_buffer.size(), 1);
             } else {
-                ctx.WriteBufferC(tmp_output.data(), tmp_output.size(), 0);
-                ctx.WriteBufferC(tmp_performance.data(), tmp_performance.size(), 1);
+                ctx.WriteBufferC(output_buffer.data(), output_buffer.size(), 0);
+                ctx.WriteBufferC(performance_buffer.data(), performance_buffer.size(), 1);
             }
         } else {
             LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description);
@@ -233,8 +234,8 @@ private:
     Kernel::KEvent* rendered_event;
     Manager& manager;
     std::unique_ptr<Renderer> impl;
-    Common::ScratchBuffer<u8> tmp_output;
-    Common::ScratchBuffer<u8> tmp_performance;
+    Common::ScratchBuffer<u8> output_buffer;
+    Common::ScratchBuffer<u8> performance_buffer;
 };
 
 class IAudioDevice final : public ServiceFramework<IAudioDevice> {
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index c835f6cb76..fa77007f37 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -11,6 +11,7 @@
 
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/scratch_buffer.h"
 #include "core/hle/service/audio/hwopus.h"
 #include "core/hle/service/ipc_helpers.h"
 
@@ -68,13 +69,13 @@ private:
                                  ExtraBehavior extra_behavior) {
         u32 consumed = 0;
         u32 sample_count = 0;
-        tmp_samples.resize_destructive(ctx.GetWriteBufferNumElements<opus_int16>());
+        samples.resize_destructive(ctx.GetWriteBufferNumElements<opus_int16>());
 
         if (extra_behavior == ExtraBehavior::ResetContext) {
             ResetDecoderContext();
         }
 
-        if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), tmp_samples, performance)) {
+        if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
             LOG_ERROR(Audio, "Failed to decode opus data");
             IPC::ResponseBuilder rb{ctx, 2};
             // TODO(ogniK): Use correct error code
@@ -90,7 +91,7 @@ private:
         if (performance) {
             rb.Push<u64>(*performance);
         }
-        ctx.WriteBuffer(tmp_samples);
+        ctx.WriteBuffer(samples);
     }
 
     bool DecodeOpusData(u32& consumed, u32& sample_count, std::span<const u8> input,
@@ -154,7 +155,7 @@ private:
     OpusDecoderPtr decoder;
     u32 sample_rate;
     u32 channel_count;
-    Common::ScratchBuffer<opus_int16> tmp_samples;
+    Common::ScratchBuffer<opus_int16> samples;
 };
 
 class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
index 348207e250..c8a880e841 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -2,7 +2,6 @@
 // SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
 // SPDX-License-Identifier: GPL-3.0-or-later
 
-#include <cinttypes>
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/hle/kernel/k_event.h"
@@ -63,12 +62,12 @@ void NVDRV::Ioctl1(HLERequestContext& ctx) {
     }
 
     // Check device
-    tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
+    output_buffer.resize_destructive(ctx.GetWriteBufferSize(0));
     const auto input_buffer = ctx.ReadBuffer(0);
 
-    const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output);
+    const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer);
     if (command.is_out != 0) {
-        ctx.WriteBuffer(tmp_output);
+        ctx.WriteBuffer(output_buffer);
     }
 
     IPC::ResponseBuilder rb{ctx, 3};
@@ -90,12 +89,12 @@ void NVDRV::Ioctl2(HLERequestContext& ctx) {
 
     const auto input_buffer = ctx.ReadBuffer(0);
     const auto input_inlined_buffer = ctx.ReadBuffer(1);
-    tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
+    output_buffer.resize_destructive(ctx.GetWriteBufferSize(0));
 
     const auto nv_result =
-        nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, tmp_output);
+        nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer);
     if (command.is_out != 0) {
-        ctx.WriteBuffer(tmp_output);
+        ctx.WriteBuffer(output_buffer);
     }
 
     IPC::ResponseBuilder rb{ctx, 3};
@@ -116,12 +115,14 @@ void NVDRV::Ioctl3(HLERequestContext& ctx) {
     }
 
     const auto input_buffer = ctx.ReadBuffer(0);
-    tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
-    tmp_output_inline.resize_destructive(ctx.GetWriteBufferSize(1));
-    const auto nv_result = nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output_inline);
+    output_buffer.resize_destructive(ctx.GetWriteBufferSize(0));
+    inline_output_buffer.resize_destructive(ctx.GetWriteBufferSize(1));
+
+    const auto nv_result =
+        nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, inline_output_buffer);
     if (command.is_out != 0) {
-        ctx.WriteBuffer(tmp_output, 0);
-        ctx.WriteBuffer(tmp_output_inline, 1);
+        ctx.WriteBuffer(output_buffer, 0);
+        ctx.WriteBuffer(inline_output_buffer, 1);
     }
 
     IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h
index 4b593ff90d..6e98115dcd 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.h
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.h
@@ -4,6 +4,7 @@
 #pragma once
 
 #include <memory>
+
 #include "common/scratch_buffer.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/service.h"
@@ -34,8 +35,8 @@ private:
 
     u64 pid{};
     bool is_initialized{};
-    Common::ScratchBuffer<u8> tmp_output;
-    Common::ScratchBuffer<u8> tmp_output_inline;
+    Common::ScratchBuffer<u8> output_buffer;
+    Common::ScratchBuffer<u8> inline_output_buffer;
 };
 
 } // namespace Service::Nvidia
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index cd6a3a9b83..da07a556f4 100644
--- a/src/video_core/host1x/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -290,7 +290,7 @@ void Codec::Decode() {
             return vp9_decoder->GetFrameBytes();
         default:
             ASSERT(false);
-            return std::vector<u8>{};
+            return std::span<const u8>{};
         }
     }();
     AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index ce827eb6c2..862904e397 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -29,15 +29,15 @@ H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {}
 
 H264::~H264() = default;
 
-const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
-                                          bool is_first_frame) {
+std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
+                                       bool is_first_frame) {
     H264DecoderContext context;
     host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context,
                                      sizeof(H264DecoderContext));
 
     const s64 frame_number = context.h264_parameter_set.frame_number.Value();
     if (!is_first_frame && frame_number != 0) {
-        frame.resize(context.stream_len);
+        frame.resize_destructive(context.stream_len);
         host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
         return frame;
     }
@@ -135,14 +135,14 @@ const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegist
     for (s32 index = 0; index < 6; index++) {
         writer.WriteBit(true);
         std::span<const u8> matrix{context.weight_scale};
-        writer.WriteScalingList(matrix, index * 16, 16);
+        writer.WriteScalingList(scan, matrix, index * 16, 16);
     }
 
     if (context.h264_parameter_set.transform_8x8_mode_flag) {
         for (s32 index = 0; index < 2; index++) {
             writer.WriteBit(true);
             std::span<const u8> matrix{context.weight_scale_8x8};
-            writer.WriteScalingList(matrix, index * 64, 64);
+            writer.WriteScalingList(scan, matrix, index * 64, 64);
         }
     }
 
@@ -188,8 +188,8 @@ void H264BitWriter::WriteBit(bool state) {
     WriteBits(state ? 1 : 0, 1);
 }
 
-void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
-    static Common::ScratchBuffer<u8> scan{};
+void H264BitWriter::WriteScalingList(Common::ScratchBuffer<u8>& scan, std::span<const u8> list,
+                                     s32 start, s32 count) {
     scan.resize_destructive(count);
     if (count == 16) {
         std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h
index 5cc86454e4..d6b5563223 100644
--- a/src/video_core/host1x/codecs/h264.h
+++ b/src/video_core/host1x/codecs/h264.h
@@ -5,9 +5,11 @@
 
 #include <span>
 #include <vector>
+
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "common/scratch_buffer.h"
 #include "video_core/host1x/nvdec_common.h"
 
 namespace Tegra {
@@ -37,7 +39,8 @@ public:
 
     /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
     /// Writes the scaling matrices of the sream
-    void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
+    void WriteScalingList(Common::ScratchBuffer<u8>& scan, std::span<const u8> list, s32 start,
+                          s32 count);
 
     /// Return the bitstream as a vector.
     [[nodiscard]] std::vector<u8>& GetByteArray();
@@ -63,11 +66,12 @@ public:
     ~H264();
 
     /// Compose the H264 frame for FFmpeg decoding
-    [[nodiscard]] const std::vector<u8>& ComposeFrame(
-        const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
+    [[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
+                                                   bool is_first_frame = false);
 
 private:
-    std::vector<u8> frame;
+    Common::ScratchBuffer<u8> frame;
+    Common::ScratchBuffer<u8> scan;
     Host1x::Host1x& host1x;
 
     struct H264ParameterSet {
diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp
index 28fb12cb8e..ee6392ff9c 100644
--- a/src/video_core/host1x/codecs/vp8.cpp
+++ b/src/video_core/host1x/codecs/vp8.cpp
@@ -12,7 +12,7 @@ VP8::VP8(Host1x::Host1x& host1x_) : host1x{host1x_} {}
 
 VP8::~VP8() = default;
 
-const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
+std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
     VP8PictureInfo info;
     host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
 
diff --git a/src/video_core/host1x/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h
index 5bf07ecab1..7926b73f31 100644
--- a/src/video_core/host1x/codecs/vp8.h
+++ b/src/video_core/host1x/codecs/vp8.h
@@ -4,10 +4,11 @@
 #pragma once
 
 #include <array>
-#include <vector>
+#include <span>
 
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "common/scratch_buffer.h"
 #include "video_core/host1x/nvdec_common.h"
 
 namespace Tegra {
@@ -24,11 +25,11 @@ public:
     ~VP8();
 
     /// Compose the VP8 frame for FFmpeg decoding
-    [[nodiscard]] const std::vector<u8>& ComposeFrame(
+    [[nodiscard]] std::span<const u8> ComposeFrame(
         const Host1x::NvdecCommon::NvdecRegisters& state);
 
 private:
-    std::vector<u8> frame;
+    Common::ScratchBuffer<u8> frame;
     Host1x::Host1x& host1x;
 
     struct VP8PictureInfo {
diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp
index cf40c90121..306c3d0e8c 100644
--- a/src/video_core/host1x/codecs/vp9.cpp
+++ b/src/video_core/host1x/codecs/vp9.cpp
@@ -3,6 +3,7 @@
 
 #include <algorithm> // for std::copy
 #include <numeric>
+
 #include "common/assert.h"
 #include "video_core/host1x/codecs/vp9.h"
 #include "video_core/host1x/host1x.h"
diff --git a/src/video_core/host1x/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h
index d4083e8d3e..f1ed195081 100644
--- a/src/video_core/host1x/codecs/vp9.h
+++ b/src/video_core/host1x/codecs/vp9.h
@@ -4,9 +4,11 @@
 #pragma once
 
 #include <array>
+#include <span>
 #include <vector>
 
 #include "common/common_types.h"
+#include "common/scratch_buffer.h"
 #include "common/stream.h"
 #include "video_core/host1x/codecs/vp9_types.h"
 #include "video_core/host1x/nvdec_common.h"
@@ -128,8 +130,8 @@ public:
         return !current_frame_info.show_frame;
     }
 
-    /// Returns a const reference to the composed frame data.
-    [[nodiscard]] const std::vector<u8>& GetFrameBytes() const {
+    /// Returns a const span to the composed frame data.
+    [[nodiscard]] std::span<const u8> GetFrameBytes() const {
         return frame;
     }
 
@@ -181,7 +183,7 @@ private:
     [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
 
     Host1x::Host1x& host1x;
-    std::vector<u8> frame;
+    Common::ScratchBuffer<u8> frame;
 
     std::array<s8, 4> loop_filter_ref_deltas{};
     std::array<s8, 2> loop_filter_mode_deltas{};
diff --git a/src/video_core/host1x/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h
index adad8ed7ed..cc9b256907 100644
--- a/src/video_core/host1x/codecs/vp9_types.h
+++ b/src/video_core/host1x/codecs/vp9_types.h
@@ -5,6 +5,7 @@
 
 #include <array>
 #include <vector>
+
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 

From 5a09fa50122af7c56ca3a05b18a1d2ab1e6b0e8b Mon Sep 17 00:00:00 2001
From: Morph <39850852+Morph1984@users.noreply.github.com>
Date: Sat, 24 Jun 2023 22:25:35 -0400
Subject: [PATCH 4/5] maxwell_dma: Specify dst_operand.pitch instead of a temp
 var

---
 src/video_core/engines/maxwell_dma.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a290d6ea7e..f8598fd980 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -174,8 +174,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
     src_operand.address = regs.offset_in;
 
     DMA::BufferOperand dst_operand;
-    u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out));
-    dst_operand.pitch = abs_pitch_out;
+    dst_operand.pitch = static_cast<u32>(std::abs(regs.pitch_out));
     dst_operand.width = regs.line_length_in;
     dst_operand.height = regs.line_count;
     dst_operand.address = regs.offset_out;
@@ -222,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
     const size_t src_size =
         CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
 
-    const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count;
+    const size_t dst_size = dst_operand.pitch * regs.line_count;
     read_buffer.resize_destructive(src_size);
     write_buffer.resize_destructive(dst_size);
 
@@ -231,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
 
     UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
                      src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
-                     abs_pitch_out);
+                     dst_operand.pitch);
 
     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
 }

From 1a46823ec59b0ad4556e494c0488d5a8160aa19b Mon Sep 17 00:00:00 2001
From: Morph <39850852+Morph1984@users.noreply.github.com>
Date: Fri, 30 Jun 2023 21:50:44 -0400
Subject: [PATCH 5/5] parcel: Optimize small_vector sizes

---
 src/core/hle/service/nvnflinger/parcel.h | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/core/hle/service/nvnflinger/parcel.h b/src/core/hle/service/nvnflinger/parcel.h
index 23ba315a05..e2c9bbd50f 100644
--- a/src/core/hle/service/nvnflinger/parcel.h
+++ b/src/core/hle/service/nvnflinger/parcel.h
@@ -6,6 +6,7 @@
 #include <memory>
 #include <span>
 #include <vector>
+
 #include <boost/container/small_vector.hpp>
 
 #include "common/alignment.h"
@@ -148,9 +149,9 @@ public:
         this->WriteImpl(0U, m_object_buffer);
     }
 
-    std::vector<u8> Serialize() const {
-        std::vector<u8> output_buffer(sizeof(ParcelHeader) + m_data_buffer.size() +
-                                      m_object_buffer.size());
+    std::span<u8> Serialize() {
+        m_output_buffer.resize(sizeof(ParcelHeader) + m_data_buffer.size() +
+                               m_object_buffer.size());
 
         ParcelHeader header{};
         header.data_size = static_cast<u32>(m_data_buffer.size());
@@ -158,17 +159,17 @@ public:
         header.objects_size = static_cast<u32>(m_object_buffer.size());
         header.objects_offset = header.data_offset + header.data_size;
 
-        std::memcpy(output_buffer.data(), &header, sizeof(header));
-        std::ranges::copy(m_data_buffer, output_buffer.data() + header.data_offset);
-        std::ranges::copy(m_object_buffer, output_buffer.data() + header.objects_offset);
+        std::memcpy(m_output_buffer.data(), &header, sizeof(ParcelHeader));
+        std::ranges::copy(m_data_buffer, m_output_buffer.data() + header.data_offset);
+        std::ranges::copy(m_object_buffer, m_output_buffer.data() + header.objects_offset);
 
-        return output_buffer;
+        return m_output_buffer;
     }
 
 private:
-    template <typename T>
+    template <typename T, size_t BufferSize>
         requires(std::is_trivially_copyable_v<T>)
-    void WriteImpl(const T& val, boost::container::small_vector<u8, 0x200>& buffer) {
+    void WriteImpl(const T& val, boost::container::small_vector<u8, BufferSize>& buffer) {
         const size_t aligned_size = Common::AlignUp(sizeof(T), 4);
         const size_t old_size = buffer.size();
         buffer.resize(old_size + aligned_size);
@@ -177,8 +178,9 @@ private:
     }
 
 private:
-    boost::container::small_vector<u8, 0x200> m_data_buffer;
-    boost::container::small_vector<u8, 0x200> m_object_buffer;
+    boost::container::small_vector<u8, 0x1B0> m_data_buffer;
+    boost::container::small_vector<u8, 0x40> m_object_buffer;
+    boost::container::small_vector<u8, 0x200> m_output_buffer;
 };
 
 } // namespace Service::android