From 1e8cee2ddfeb87d4501f66197625a31c09b57e48 Mon Sep 17 00:00:00 2001
From: Feng Chen <vonchenplus@gmail.com>
Date: Thu, 5 Jan 2023 12:27:41 +0800
Subject: [PATCH 1/3] video_core: Implement maxwell3d draw texture method

---
 src/video_core/engines/draw_manager.cpp       | 31 ++++++++++
 src/video_core/engines/draw_manager.h         | 20 +++++++
 src/video_core/engines/maxwell_3d.cpp         |  1 +
 src/video_core/engines/maxwell_3d.h           | 16 ++++-
 .../host_shaders/blit_color_float.frag        | 13 ++++
 src/video_core/renderer_opengl/blit_image.cpp | 59 +++++++++++++++++++
 src/video_core/renderer_opengl/blit_image.h   | 38 ++++++++++++
 7 files changed, 177 insertions(+), 1 deletion(-)
 create mode 100644 src/video_core/host_shaders/blit_color_float.frag
 create mode 100644 src/video_core/renderer_opengl/blit_image.cpp
 create mode 100644 src/video_core/renderer_opengl/blit_image.h

diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index 2437121ce8..2685481f95 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -51,6 +51,10 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
         LOG_WARNING(HW_GPU, "(STUBBED) called");
         break;
     }
+    case MAXWELL3D_REG_INDEX(draw_texture.src_y0): {
+        DrawTexture();
+        break;
+    }
     default:
         break;
     }
@@ -179,6 +183,33 @@ void DrawManager::DrawIndexSmall(u32 argument) {
     ProcessDraw(true, 1);
 }
 
+void DrawManager::DrawTexture() {
+    const auto& regs{maxwell3d->regs};
+    draw_texture_state.dst_x0 = static_cast<float>(regs.draw_texture.dst_x0) / 4096.f;
+    draw_texture_state.dst_y0 = static_cast<float>(regs.draw_texture.dst_y0) / 4096.f;
+    const auto dst_width = static_cast<float>(regs.draw_texture.dst_width) / 4096.f;
+    const auto dst_height = static_cast<float>(regs.draw_texture.dst_height) / 4096.f;
+    const bool lower_left{regs.window_origin.mode !=
+                          Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft};
+    if (lower_left) {
+        draw_texture_state.dst_y0 -= dst_height;
+    }
+    draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width;
+    draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height;
+    draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f;
+    draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f;
+    draw_texture_state.src_x1 =
+        (static_cast<float>(regs.draw_texture.dx_du) / 4294967295.f) * dst_width +
+        draw_texture_state.src_x0;
+    draw_texture_state.src_y1 =
+        (static_cast<float>(regs.draw_texture.dy_dv) / 4294967295.f) * dst_height +
+        draw_texture_state.src_y0;
+    draw_texture_state.src_sampler = regs.draw_texture.src_sampler;
+    draw_texture_state.src_texture = regs.draw_texture.src_texture;
+
+    maxwell3d->rasterizer->DrawTexture();
+}
+
 void DrawManager::UpdateTopology() {
     const auto& regs{maxwell3d->regs};
     switch (regs.primitive_topology_control) {
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h
index 58d1b2d59f..7c22c49f12 100644
--- a/src/video_core/engines/draw_manager.h
+++ b/src/video_core/engines/draw_manager.h
@@ -32,6 +32,19 @@ public:
         std::vector<u8> inline_index_draw_indexes;
     };
 
+    struct DrawTextureState {
+        f32 dst_x0;
+        f32 dst_y0;
+        f32 dst_x1;
+        f32 dst_y1;
+        f32 src_x0;
+        f32 src_y0;
+        f32 src_x1;
+        f32 src_y1;
+        u32 src_sampler;
+        u32 src_texture;
+    };
+
     struct IndirectParams {
         bool is_indexed;
         bool include_count;
@@ -64,6 +77,10 @@ public:
         return draw_state;
     }
 
+    const DrawTextureState& GetDrawTextureState() const {
+        return draw_texture_state;
+    }
+
     IndirectParams& GetIndirectParams() {
         return indirect_state;
     }
@@ -81,6 +98,8 @@ private:
 
     void DrawIndexSmall(u32 argument);
 
+    void DrawTexture();
+
     void UpdateTopology();
 
     void ProcessDraw(bool draw_indexed, u32 instance_count);
@@ -89,6 +108,7 @@ private:
 
     Maxwell3D* maxwell3d{};
     State draw_state{};
+    DrawTextureState draw_texture_state{};
     IndirectParams indirect_state{};
 };
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index fbfd1ddd24..a0555ef3fd 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -149,6 +149,7 @@ bool Maxwell3D::IsMethodExecutable(u32 method) {
     case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
     case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
     case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
+    case MAXWELL3D_REG_INDEX(draw_texture.src_y0):
     case MAXWELL3D_REG_INDEX(wait_for_idle):
     case MAXWELL3D_REG_INDEX(shadow_ram_control):
     case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0b2fd29289..c89969bb45 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1599,6 +1599,20 @@ public:
         };
         static_assert(sizeof(TIRModulationCoeff) == 0x4);
 
+        struct DrawTexture {
+            s32 dst_x0;
+            s32 dst_y0;
+            s32 dst_width;
+            s32 dst_height;
+            s64 dx_du;
+            s64 dy_dv;
+            u32 src_sampler;
+            u32 src_texture;
+            s32 src_x0;
+            s32 src_y0;
+        };
+        static_assert(sizeof(DrawTexture) == 0x30);
+
         struct ReduceColorThreshold {
             union {
                 BitField<0, 8, u32> all_hit_once;
@@ -2751,7 +2765,7 @@ public:
                 u32 reserved_sw_method2;                                               ///< 0x102C
                 std::array<TIRModulationCoeff, 5> tir_modulation_coeff;                ///< 0x1030
                 std::array<u32, 15> spare_nop;                                         ///< 0x1044
-                INSERT_PADDING_BYTES_NOINIT(0x30);
+                DrawTexture draw_texture;                                              ///< 0x1080
                 std::array<u32, 7> reserved_sw_method3_to_7;                           ///< 0x10B0
                 ReduceColorThreshold reduce_color_thresholds_unorm8;                   ///< 0x10CC
                 std::array<u32, 4> reserved_sw_method10_to_13;                         ///< 0x10D0
diff --git a/src/video_core/host_shaders/blit_color_float.frag b/src/video_core/host_shaders/blit_color_float.frag
new file mode 100644
index 0000000000..c0c8322969
--- /dev/null
+++ b/src/video_core/host_shaders/blit_color_float.frag
@@ -0,0 +1,13 @@
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#version 450
+
+layout(binding = 0) uniform sampler2D tex;
+
+layout(location = 0) in vec2 texcoord;
+layout(location = 0) out vec4 color;
+
+void main() {
+    color = textureLod(tex, texcoord, 0);
+}
diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp
new file mode 100644
index 0000000000..9a560a73b8
--- /dev/null
+++ b/src/video_core/renderer_opengl/blit_image.cpp
@@ -0,0 +1,59 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <algorithm>
+
+#include "video_core/host_shaders/blit_color_float_frag.h"
+#include "video_core/host_shaders/full_screen_triangle_vert.h"
+#include "video_core/renderer_opengl/blit_image.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+
+namespace OpenGL {
+
+BlitImageHelper::BlitImageHelper(ProgramManager& program_manager_)
+    : program_manager(program_manager_),
+      full_screen_vert(CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER)),
+      blit_color_to_color_frag(
+          CreateProgram(HostShaders::BLIT_COLOR_FLOAT_FRAG, GL_FRAGMENT_SHADER)) {}
+
+BlitImageHelper::~BlitImageHelper() = default;
+
+void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler,
+                                const Region2D& dst_region, const Region2D& src_region,
+                                const Extent3D& src_size) {
+    glEnable(GL_CULL_FACE);
+    glDisable(GL_COLOR_LOGIC_OP);
+    glDisable(GL_DEPTH_TEST);
+    glDisable(GL_STENCIL_TEST);
+    glDisable(GL_POLYGON_OFFSET_FILL);
+    glDisable(GL_RASTERIZER_DISCARD);
+    glDisable(GL_ALPHA_TEST);
+    glDisablei(GL_BLEND, 0);
+    glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+    glCullFace(GL_BACK);
+    glFrontFace(GL_CW);
+    glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+    glDepthRangeIndexed(0, 0.0, 0.0);
+
+    program_manager.BindPresentPrograms(full_screen_vert.handle, blit_color_to_color_frag.handle);
+    glProgramUniform2f(full_screen_vert.handle, 0,
+                       static_cast<float>(src_region.end.x - src_region.start.x) /
+                           static_cast<float>(src_size.width),
+                       static_cast<float>(src_region.end.y - src_region.start.y) /
+                           static_cast<float>(src_size.height));
+    glProgramUniform2f(full_screen_vert.handle, 1,
+                       static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width),
+                       static_cast<float>(src_region.start.y) /
+                           static_cast<float>(src_size.height));
+    glViewport(std::min(dst_region.start.x, dst_region.end.x),
+               std::min(dst_region.start.y, dst_region.end.y),
+               std::abs(dst_region.end.x - dst_region.start.x),
+               std::abs(dst_region.end.y - dst_region.start.y));
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer);
+    glBindSampler(0, src_sampler);
+    glBindTextureUnit(0, src_image_view);
+    glClear(GL_COLOR_BUFFER_BIT);
+    glDrawArrays(GL_TRIANGLES, 0, 3);
+}
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/blit_image.h b/src/video_core/renderer_opengl/blit_image.h
new file mode 100644
index 0000000000..5a2b12d16f
--- /dev/null
+++ b/src/video_core/renderer_opengl/blit_image.h
@@ -0,0 +1,38 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <glad/glad.h>
+
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/texture_cache/types.h"
+
+namespace OpenGL {
+
+using VideoCommon::Extent3D;
+using VideoCommon::Offset2D;
+using VideoCommon::Region2D;
+
+class ProgramManager;
+class Framebuffer;
+class ImageView;
+
+class BlitImageHelper {
+public:
+    explicit BlitImageHelper(ProgramManager& program_manager);
+    ~BlitImageHelper();
+
+    void BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler,
+                   const Region2D& dst_region, const Region2D& src_region,
+                   const Extent3D& src_size);
+
+private:
+    ProgramManager& program_manager;
+
+    OGLProgram full_screen_vert;
+    OGLProgram blit_color_to_color_frag;
+};
+
+} // namespace OpenGL

From 013b6891531b37e0f882b8b88d404feb63370617 Mon Sep 17 00:00:00 2001
From: Feng Chen <vonchenplus@gmail.com>
Date: Thu, 5 Jan 2023 12:28:48 +0800
Subject: [PATCH 2/3] video_core: Implement opengl/vulkan draw_texture

---
 src/video_core/CMakeLists.txt                 |   2 +
 src/video_core/host_shaders/CMakeLists.txt    |   2 +-
 .../host_shaders/full_screen_triangle.vert    |  15 ++-
 .../host_shaders/vulkan_blit_color_float.frag |  13 --
 src/video_core/rasterizer_interface.h         |   3 +
 .../renderer_null/null_rasterizer.cpp         |   1 +
 .../renderer_null/null_rasterizer.h           |   1 +
 src/video_core/renderer_opengl/gl_device.cpp  |   1 +
 src/video_core/renderer_opengl/gl_device.h    |   7 +
 .../renderer_opengl/gl_rasterizer.cpp         |  44 ++++++-
 .../renderer_opengl/gl_rasterizer.h           |   4 +
 .../renderer_opengl/gl_shader_manager.cpp     | 121 ++++++++++++++++++
 .../renderer_opengl/gl_shader_manager.h       | 115 ++---------------
 src/video_core/renderer_vulkan/blit_image.cpp |  41 +++++-
 src/video_core/renderer_vulkan/blit_image.h   |   6 +
 .../renderer_vulkan/vk_rasterizer.cpp         |  28 ++++
 .../renderer_vulkan/vk_rasterizer.h           |   1 +
 src/video_core/texture_cache/texture_cache.h  |   7 +
 .../texture_cache/texture_cache_base.h        |   3 +
 19 files changed, 284 insertions(+), 131 deletions(-)
 delete mode 100644 src/video_core/host_shaders/vulkan_blit_color_float.frag

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index aa271a3770..f9001104c6 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -99,6 +99,8 @@ add_library(video_core STATIC
     renderer_null/null_rasterizer.h
     renderer_null/renderer_null.cpp
     renderer_null/renderer_null.h
+    renderer_opengl/blit_image.cpp
+    renderer_opengl/blit_image.h
     renderer_opengl/gl_buffer_cache.cpp
     renderer_opengl/gl_buffer_cache.h
     renderer_opengl/gl_compute_pipeline.cpp
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index e6dc24f220..1a7961cb9c 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -11,6 +11,7 @@ set(GLSL_INCLUDES
 
 set(SHADER_FILES
     astc_decoder.comp
+    blit_color_float.frag
     block_linear_unswizzle_2d.comp
     block_linear_unswizzle_3d.comp
     convert_abgr8_to_d24s8.frag
@@ -36,7 +37,6 @@ set(SHADER_FILES
     smaa_blending_weight_calculation.frag
     smaa_neighborhood_blending.vert
     smaa_neighborhood_blending.frag
-    vulkan_blit_color_float.frag
     vulkan_blit_depth_stencil.frag
     vulkan_fidelityfx_fsr_easu_fp16.comp
     vulkan_fidelityfx_fsr_easu_fp32.comp
diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert
index 2c976b19f6..8ac936efdf 100644
--- a/src/video_core/host_shaders/full_screen_triangle.vert
+++ b/src/video_core/host_shaders/full_screen_triangle.vert
@@ -4,13 +4,20 @@
 #version 450
 
 #ifdef VULKAN
+#define VERTEX_ID gl_VertexIndex
 #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
 #define END_PUSH_CONSTANTS };
 #define UNIFORM(n)
+#define FLIPY 1
 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+#define VERTEX_ID gl_VertexID
 #define BEGIN_PUSH_CONSTANTS
 #define END_PUSH_CONSTANTS
+#define FLIPY -1
 #define UNIFORM(n) layout (location = n) uniform
+out gl_PerVertex {
+    vec4 gl_Position;
+};
 #endif
 
 BEGIN_PUSH_CONSTANTS
@@ -21,8 +28,8 @@ END_PUSH_CONSTANTS
 layout(location = 0) out vec2 texcoord;
 
 void main() {
-    float x = float((gl_VertexIndex & 1) << 2);
-    float y = float((gl_VertexIndex & 2) << 1);
-    gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
+    float x = float((VERTEX_ID & 1) << 2);
+    float y = float((VERTEX_ID & 2) << 1);
+    gl_Position = vec4(x - 1.0, FLIPY * (y - 1.0), 0.0, 1.0);
     texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
-}
+}
\ No newline at end of file
diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/vulkan_blit_color_float.frag
deleted file mode 100644
index c0c8322969..0000000000
--- a/src/video_core/host_shaders/vulkan_blit_color_float.frag
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#version 450
-
-layout(binding = 0) uniform sampler2D tex;
-
-layout(location = 0) in vec2 texcoord;
-layout(location = 0) out vec4 color;
-
-void main() {
-    color = textureLod(tex, texcoord, 0);
-}
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index f44c7df506..f980b12c61 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -46,6 +46,9 @@ public:
     /// Dispatches an indirect draw invocation
     virtual void DrawIndirect() {}
 
+    /// Dispatches an draw texture invocation
+    virtual void DrawTexture() = 0;
+
     /// Clear the current framebuffer
     virtual void Clear(u32 layer_count) = 0;
 
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
index 2c11345d7b..2b5c7defa6 100644
--- a/src/video_core/renderer_null/null_rasterizer.cpp
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -21,6 +21,7 @@ RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gp
 RasterizerNull::~RasterizerNull() = default;
 
 void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {}
+void RasterizerNull::DrawTexture() {}
 void RasterizerNull::Clear(u32 layer_count) {}
 void RasterizerNull::DispatchCompute() {}
 void RasterizerNull::ResetCounter(VideoCore::QueryType type) {}
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index 2112aa70e1..51f896e43c 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -31,6 +31,7 @@ public:
     ~RasterizerNull() override;
 
     void Draw(bool is_indexed, u32 instance_count) override;
+    void DrawTexture() override;
     void Clear(u32 layer_count) override;
     void DispatchCompute() override;
     void ResetCounter(VideoCore::QueryType type) override;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index cee5c32470..22ed16ebfb 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -166,6 +166,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
     has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
     has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
     has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
+    has_draw_texture = GLAD_GL_NV_draw_texture;
     warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
     need_fastmath_off = is_nvidia;
     can_report_memory = GLAD_GL_NVX_gpu_memory_info;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 2a72d84be0..3ff8cad83b 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -4,6 +4,8 @@
 #pragma once
 
 #include <cstddef>
+#include <string>
+
 #include "common/common_types.h"
 #include "core/frontend/emu_window.h"
 #include "shader_recompiler/stage.h"
@@ -146,6 +148,10 @@ public:
         return has_sparse_texture_2;
     }
 
+    bool HasDrawTexture() const {
+        return has_draw_texture;
+    }
+
     bool IsWarpSizePotentiallyLargerThanGuest() const {
         return warp_size_potentially_larger_than_guest;
     }
@@ -216,6 +222,7 @@ private:
     bool has_shader_int64{};
     bool has_amd_shader_half_float{};
     bool has_sparse_texture_2{};
+    bool has_draw_texture{};
     bool warp_size_potentially_larger_than_guest{};
     bool need_fastmath_off{};
     bool has_cbuf_ftou_bug{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7d48af8e17..651608a069 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -64,7 +64,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
       shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
                    state_tracker, gpu.ShaderNotify()),
       query_cache(*this), accelerate_dma(buffer_cache),
-      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
+      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
+      blit_image(program_manager_) {}
 
 RasterizerOpenGL::~RasterizerOpenGL() = default;
 
@@ -318,6 +319,47 @@ void RasterizerOpenGL::DrawIndirect() {
     buffer_cache.SetDrawIndirect(nullptr);
 }
 
+void RasterizerOpenGL::DrawTexture() {
+    MICROPROFILE_SCOPE(OpenGL_Drawing);
+
+    SCOPE_EXIT({ gpu.TickWork(); });
+    query_cache.UpdateCounters();
+
+    texture_cache.SynchronizeGraphicsDescriptors();
+    texture_cache.UpdateRenderTargets(false);
+
+    SyncState();
+
+    const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
+    const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
+    const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
+
+    if (device.HasDrawTexture()) {
+        state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+
+        glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(), draw_texture_state.dst_x0,
+                        draw_texture_state.dst_y0, draw_texture_state.dst_x1,
+                        draw_texture_state.dst_y1, 0,
+                        draw_texture_state.src_x0 / static_cast<float>(texture.size.width),
+                        draw_texture_state.src_y0 / static_cast<float>(texture.size.height),
+                        draw_texture_state.src_x1 / static_cast<float>(texture.size.width),
+                        draw_texture_state.src_y1 / static_cast<float>(texture.size.height));
+    } else {
+        Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0),
+                                        .y = static_cast<s32>(draw_texture_state.dst_y0)},
+                               Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1),
+                                        .y = static_cast<s32>(draw_texture_state.dst_y1)}};
+        Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0),
+                                        .y = static_cast<s32>(draw_texture_state.src_y0)},
+                               Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
+                                        .y = static_cast<s32>(draw_texture_state.src_y1)}};
+        blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(),
+                             sampler->Handle(), dst_region, src_region, texture.size);
+    }
+
+    ++num_queued_commands;
+}
+
 void RasterizerOpenGL::DispatchCompute() {
     ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
     if (!pipeline) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index be4f76c18d..0c45832ae1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -16,6 +16,7 @@
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/rasterizer_accelerated.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/blit_image.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_fence_manager.h"
@@ -70,6 +71,7 @@ public:
 
     void Draw(bool is_indexed, u32 instance_count) override;
     void DrawIndirect() override;
+    void DrawTexture() override;
     void Clear(u32 layer_count) override;
     void DispatchCompute() override;
     void ResetCounter(VideoCore::QueryType type) override;
@@ -224,6 +226,8 @@ private:
     AccelerateDMA accelerate_dma;
     FenceManagerOpenGL fence_manager;
 
+    BlitImageHelper blit_image;
+
     boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
     std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
     boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index d9c29d8b79..98841ae65e 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,2 +1,123 @@
 // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+
+namespace OpenGL {
+
+static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
+    GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+    GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+};
+
+ProgramManager::ProgramManager(const Device& device) {
+    glCreateProgramPipelines(1, &pipeline.handle);
+    if (device.UseAssemblyShaders()) {
+        glEnable(GL_COMPUTE_PROGRAM_NV);
+    }
+}
+
+void ProgramManager::BindComputeProgram(GLuint program) {
+    glUseProgram(program);
+    is_compute_bound = true;
+}
+
+void ProgramManager::BindComputeAssemblyProgram(GLuint program) {
+    if (current_assembly_compute_program != program) {
+        current_assembly_compute_program = program;
+        glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
+    }
+    UnbindPipeline();
+}
+
+void ProgramManager::BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
+    static constexpr std::array<GLenum, 5> stage_enums{
+        GL_VERTEX_SHADER_BIT,   GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
+        GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
+    };
+    for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+        if (current_programs[stage] != programs[stage].handle) {
+            current_programs[stage] = programs[stage].handle;
+            glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
+        }
+    }
+    BindPipeline();
+}
+
+void ProgramManager::BindPresentPrograms(GLuint vertex, GLuint fragment) {
+    if (current_programs[0] != vertex) {
+        current_programs[0] = vertex;
+        glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
+    }
+    if (current_programs[4] != fragment) {
+        current_programs[4] = fragment;
+        glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
+    }
+    glUseProgramStages(
+        pipeline.handle,
+        GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
+    current_programs[1] = 0;
+    current_programs[2] = 0;
+    current_programs[3] = 0;
+
+    if (current_stage_mask != 0) {
+        current_stage_mask = 0;
+        for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
+            glDisable(program_type);
+        }
+    }
+    BindPipeline();
+}
+
+void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
+                                          u32 stage_mask) {
+    const u32 changed_mask = current_stage_mask ^ stage_mask;
+    current_stage_mask = stage_mask;
+
+    if (changed_mask != 0) {
+        for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+            if (((changed_mask >> stage) & 1) != 0) {
+                if (((stage_mask >> stage) & 1) != 0) {
+                    glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+                } else {
+                    glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+                }
+            }
+        }
+    }
+    for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+        if (current_programs[stage] != programs[stage].handle) {
+            current_programs[stage] = programs[stage].handle;
+            glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
+        }
+    }
+    UnbindPipeline();
+}
+
+void ProgramManager::RestoreGuestCompute() {}
+
+void ProgramManager::BindPipeline() {
+    if (!is_pipeline_bound) {
+        is_pipeline_bound = true;
+        glBindProgramPipeline(pipeline.handle);
+    }
+    UnbindCompute();
+}
+
+void ProgramManager::UnbindPipeline() {
+    if (is_pipeline_bound) {
+        is_pipeline_bound = false;
+        glBindProgramPipeline(0);
+    }
+    UnbindCompute();
+}
+
+void ProgramManager::UnbindCompute() {
+    if (is_compute_bound) {
+        is_compute_bound = false;
+        glUseProgram(0);
+    }
+}
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index a84f5aeb3d..07ffab77f1 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -6,8 +6,6 @@
 #include <array>
 #include <span>
 
-#include <glad/glad.h>
-
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 
@@ -16,121 +14,28 @@ namespace OpenGL {
 class ProgramManager {
     static constexpr size_t NUM_STAGES = 5;
 
-    static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
-        GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
-        GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
-    };
-
 public:
-    explicit ProgramManager(const Device& device) {
-        glCreateProgramPipelines(1, &pipeline.handle);
-        if (device.UseAssemblyShaders()) {
-            glEnable(GL_COMPUTE_PROGRAM_NV);
-        }
-    }
+    explicit ProgramManager(const Device& device);
 
-    void BindComputeProgram(GLuint program) {
-        glUseProgram(program);
-        is_compute_bound = true;
-    }
+    void BindComputeProgram(GLuint program);
 
-    void BindComputeAssemblyProgram(GLuint program) {
-        if (current_assembly_compute_program != program) {
-            current_assembly_compute_program = program;
-            glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
-        }
-        UnbindPipeline();
-    }
+    void BindComputeAssemblyProgram(GLuint program);
 
-    void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
-        static constexpr std::array<GLenum, 5> stage_enums{
-            GL_VERTEX_SHADER_BIT,   GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
-            GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
-        };
-        for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
-            if (current_programs[stage] != programs[stage].handle) {
-                current_programs[stage] = programs[stage].handle;
-                glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
-            }
-        }
-        BindPipeline();
-    }
+    void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs);
 
-    void BindPresentPrograms(GLuint vertex, GLuint fragment) {
-        if (current_programs[0] != vertex) {
-            current_programs[0] = vertex;
-            glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
-        }
-        if (current_programs[4] != fragment) {
-            current_programs[4] = fragment;
-            glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
-        }
-        glUseProgramStages(
-            pipeline.handle,
-            GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
-        current_programs[1] = 0;
-        current_programs[2] = 0;
-        current_programs[3] = 0;
-
-        if (current_stage_mask != 0) {
-            current_stage_mask = 0;
-            for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
-                glDisable(program_type);
-            }
-        }
-        BindPipeline();
-    }
+    void BindPresentPrograms(GLuint vertex, GLuint fragment);
 
     void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
-                              u32 stage_mask) {
-        const u32 changed_mask = current_stage_mask ^ stage_mask;
-        current_stage_mask = stage_mask;
+                              u32 stage_mask);
 
-        if (changed_mask != 0) {
-            for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
-                if (((changed_mask >> stage) & 1) != 0) {
-                    if (((stage_mask >> stage) & 1) != 0) {
-                        glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
-                    } else {
-                        glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
-                    }
-                }
-            }
-        }
-        for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
-            if (current_programs[stage] != programs[stage].handle) {
-                current_programs[stage] = programs[stage].handle;
-                glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
-            }
-        }
-        UnbindPipeline();
-    }
-
-    void RestoreGuestCompute() {}
+    void RestoreGuestCompute();
 
 private:
-    void BindPipeline() {
-        if (!is_pipeline_bound) {
-            is_pipeline_bound = true;
-            glBindProgramPipeline(pipeline.handle);
-        }
-        UnbindCompute();
-    }
+    void BindPipeline();
 
-    void UnbindPipeline() {
-        if (is_pipeline_bound) {
-            is_pipeline_bound = false;
-            glBindProgramPipeline(0);
-        }
-        UnbindCompute();
-    }
+    void UnbindPipeline();
 
-    void UnbindCompute() {
-        if (is_compute_bound) {
-            is_compute_bound = false;
-            glUseProgram(0);
-        }
-    }
+    void UnbindCompute();
 
     OGLPipeline pipeline;
     bool is_pipeline_bound{};
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 3f2b139e0c..d728e5c6c9 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -4,13 +4,13 @@
 #include <algorithm>
 
 #include "common/settings.h"
+#include "video_core/host_shaders/blit_color_float_frag_spv.h"
 #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
 #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
 #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
 #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
 #include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h"
 #include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
-#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
 #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
 #include "video_core/renderer_vulkan/blit_image.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
@@ -303,7 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri
 }
 
 void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region,
-                   const Region2D& src_region) {
+                   const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) {
     const VkOffset2D offset{
         .x = std::min(dst_region.start.x, dst_region.end.x),
         .y = std::min(dst_region.start.y, dst_region.end.y),
@@ -325,12 +325,15 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
         .offset = offset,
         .extent = extent,
     };
-    const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x);
-    const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y);
+    const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) /
+                          static_cast<float>(src_size.width);
+    const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) /
+                          static_cast<float>(src_size.height);
     const PushConstants push_constants{
         .tex_scale = {scale_x, scale_y},
-        .tex_offset = {static_cast<float>(src_region.start.x),
-                       static_cast<float>(src_region.start.y)},
+        .tex_offset = {static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width),
+                       static_cast<float>(src_region.start.y) /
+                           static_cast<float>(src_size.height)},
     };
     cmdbuf.SetViewport(0, viewport);
     cmdbuf.SetScissor(0, scissor);
@@ -365,7 +368,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
       two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
           PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
       full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
-      blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
+      blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)),
       blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
       convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
       convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
@@ -404,6 +407,30 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView
     scheduler.InvalidateState();
 }
 
+void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
+                                VkSampler src_sampler, const Region2D& dst_region,
+                                const Region2D& src_region, const Extent3D& src_size) {
+    const BlitImagePipelineKey key{
+        .renderpass = dst_framebuffer->RenderPass(),
+        .operation = Tegra::Engines::Fermi2D::Operation::SrcCopy,
+    };
+    const VkPipelineLayout layout = *one_texture_pipeline_layout;
+    const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
+    scheduler.RequestRenderpass(dst_framebuffer);
+    scheduler.Record([this, dst_region, src_region, src_size, pipeline, layout, src_sampler,
+                      src_image_view](vk::CommandBuffer cmdbuf) {
+        // TODO: Barriers
+        const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
+        UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view);
+        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
+                                  nullptr);
+        BindBlitState(cmdbuf, layout, dst_region, src_region, src_size);
+        cmdbuf.Draw(3, 1, 0, 0);
+    });
+    scheduler.InvalidateState();
+}
+
 void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
                                        VkImageView src_depth_view, VkImageView src_stencil_view,
                                        const Region2D& dst_region, const Region2D& src_region,
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 5df679fb43..e2db33f563 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -10,6 +10,8 @@
 
 namespace Vulkan {
 
+using VideoCommon::Extent3D;
+using VideoCommon::Offset2D;
 using VideoCommon::Region2D;
 
 class Device;
@@ -36,6 +38,10 @@ public:
                    Tegra::Engines::Fermi2D::Filter filter,
                    Tegra::Engines::Fermi2D::Operation operation);
 
+    void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
+                   VkSampler src_sampler, const Region2D& dst_region, const Region2D& src_region,
+                   const Extent3D& src_size);
+
     void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
                           VkImageView src_stencil_view, const Region2D& dst_region,
                           const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 242bf9602a..153096fa40 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -265,6 +265,34 @@ void RasterizerVulkan::DrawIndirect() {
     buffer_cache.SetDrawIndirect(nullptr);
 }
 
+void RasterizerVulkan::DrawTexture() {
+    MICROPROFILE_SCOPE(Vulkan_Drawing);
+
+    SCOPE_EXIT({ gpu.TickWork(); });
+    FlushWork();
+
+    query_cache.UpdateCounters();
+
+    texture_cache.SynchronizeGraphicsDescriptors();
+    texture_cache.UpdateRenderTargets(false);
+
+    UpdateDynamicStates();
+
+    const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
+    const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
+    const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
+    Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0),
+                                    .y = static_cast<s32>(draw_texture_state.dst_y0)},
+                           Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1),
+                                    .y = static_cast<s32>(draw_texture_state.dst_y1)}};
+    Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0),
+                                    .y = static_cast<s32>(draw_texture_state.src_y0)},
+                           Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
+                                    .y = static_cast<s32>(draw_texture_state.src_y1)}};
+    blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(), sampler->Handle(),
+                         dst_region, src_region, texture.size);
+}
+
 void RasterizerVulkan::Clear(u32 layer_count) {
     MICROPROFILE_SCOPE(Vulkan_Clearing);
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index c661e5b197..deb44dcaa5 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -66,6 +66,7 @@ public:
 
     void Draw(bool is_indexed, u32 instance_count) override;
     void DrawIndirect() override;
+    void DrawTexture() override;
     void Clear(u32 layer_count) override;
     void DispatchCompute() override;
     void ResetCounter(VideoCore::QueryType type) override;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 87152c8e99..1b01990a44 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -148,6 +148,13 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
     return slot_image_views[id];
 }
 
+template <class P>
+typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
+    const auto image_view_id = VisitImageView(channel_state->graphics_image_table,
+                                              channel_state->graphics_image_view_ids, index);
+    return slot_image_views[image_view_id];
+}
+
 template <class P>
 void TextureCache<P>::MarkModification(ImageId id) noexcept {
     MarkModification(slot_images[id]);
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 4eea1f609e..485eaabaa8 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -129,6 +129,9 @@ public:
     /// Return a reference to the given image view id
     [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
 
+    /// Get the imageview from the graphics descriptor table in the specified index
+    [[nodiscard]] ImageView& GetImageView(u32 index) noexcept;
+
     /// Mark an image as modified from the GPU
     void MarkModification(ImageId id) noexcept;
 

From 9fc7ca1731862354ef70bfaf34d2c807a904a27b Mon Sep 17 00:00:00 2001
From: Feng Chen <vonchenplus@gmail.com>
Date: Wed, 11 Jan 2023 11:10:48 +0800
Subject: [PATCH 3/3] Address feedback

---
 src/video_core/engines/draw_manager.cpp       |  4 +-
 .../host_shaders/full_screen_triangle.vert    |  2 +-
 src/video_core/renderer_vulkan/blit_image.cpp | 61 ++++++++++++++++---
 src/video_core/renderer_vulkan/blit_image.h   |  4 +-
 .../renderer_vulkan/vk_rasterizer.cpp         |  5 +-
 5 files changed, 62 insertions(+), 14 deletions(-)

diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index 2685481f95..1d22d25f12 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -199,10 +199,10 @@ void DrawManager::DrawTexture() {
     draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f;
     draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f;
     draw_texture_state.src_x1 =
-        (static_cast<float>(regs.draw_texture.dx_du) / 4294967295.f) * dst_width +
+        (static_cast<float>(regs.draw_texture.dx_du) / 4294967296.f) * dst_width +
         draw_texture_state.src_x0;
     draw_texture_state.src_y1 =
-        (static_cast<float>(regs.draw_texture.dy_dv) / 4294967295.f) * dst_height +
+        (static_cast<float>(regs.draw_texture.dy_dv) / 4294967296.f) * dst_height +
         draw_texture_state.src_y0;
     draw_texture_state.src_sampler = regs.draw_texture.src_sampler;
     draw_texture_state.src_texture = regs.draw_texture.src_texture;
diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert
index 8ac936efdf..d16d989956 100644
--- a/src/video_core/host_shaders/full_screen_triangle.vert
+++ b/src/video_core/host_shaders/full_screen_triangle.vert
@@ -32,4 +32,4 @@ void main() {
     float y = float((VERTEX_ID & 2) << 1);
     gl_Position = vec4(x - 1.0, FLIPY * (y - 1.0), 0.0, 1.0);
     texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
-}
\ No newline at end of file
+}
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index d728e5c6c9..dd00d3edfa 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -350,6 +350,51 @@ VkExtent2D GetConversionExtent(const ImageView& src_image_view) {
         .height = is_rescaled ? resolution.ScaleUp(height) : height,
     };
 }
+
+void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout,
+                           VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) {
+    constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                            VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT};
+    const VkImageMemoryBarrier barrier{
+        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+        .pNext = nullptr,
+        .srcAccessMask = flags,
+        .dstAccessMask = flags,
+        .oldLayout = source_layout,
+        .newLayout = target_layout,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .image = image,
+        .subresourceRange{
+            .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+            .baseMipLevel = 0,
+            .levelCount = 1,
+            .baseArrayLayer = 0,
+            .layerCount = 1,
+        },
+    };
+    cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                           0, barrier);
+}
+
+void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) {
+    const VkRenderPass render_pass = framebuffer->RenderPass();
+    const VkFramebuffer framebuffer_handle = framebuffer->Handle();
+    const VkExtent2D render_area = framebuffer->RenderArea();
+    const VkRenderPassBeginInfo renderpass_bi{
+        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+        .pNext = nullptr,
+        .renderPass = render_pass,
+        .framebuffer = framebuffer_handle,
+        .renderArea{
+            .offset{},
+            .extent = render_area,
+        },
+        .clearValueCount = 0,
+        .pClearValues = nullptr,
+    };
+    cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
+}
 } // Anonymous namespace
 
 BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
@@ -408,18 +453,20 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView
 }
 
 void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
-                                VkSampler src_sampler, const Region2D& dst_region,
-                                const Region2D& src_region, const Extent3D& src_size) {
+                                VkImage src_image, VkSampler src_sampler,
+                                const Region2D& dst_region, const Region2D& src_region,
+                                const Extent3D& src_size) {
     const BlitImagePipelineKey key{
         .renderpass = dst_framebuffer->RenderPass(),
         .operation = Tegra::Engines::Fermi2D::Operation::SrcCopy,
     };
     const VkPipelineLayout layout = *one_texture_pipeline_layout;
     const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
-    scheduler.RequestRenderpass(dst_framebuffer);
-    scheduler.Record([this, dst_region, src_region, src_size, pipeline, layout, src_sampler,
-                      src_image_view](vk::CommandBuffer cmdbuf) {
-        // TODO: Barriers
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([this, dst_framebuffer, src_image_view, src_image, src_sampler, dst_region,
+                      src_region, src_size, pipeline, layout](vk::CommandBuffer cmdbuf) {
+        TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL);
+        BeginRenderPass(cmdbuf, dst_framebuffer);
         const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
         UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view);
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
@@ -427,8 +474,8 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView
                                   nullptr);
         BindBlitState(cmdbuf, layout, dst_region, src_region, src_size);
         cmdbuf.Draw(3, 1, 0, 0);
+        cmdbuf.EndRenderPass();
     });
-    scheduler.InvalidateState();
 }
 
 void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index e2db33f563..be8a9a2f67 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -39,8 +39,8 @@ public:
                    Tegra::Engines::Fermi2D::Operation operation);
 
     void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
-                   VkSampler src_sampler, const Region2D& dst_region, const Region2D& src_region,
-                   const Extent3D& src_size);
+                   VkImage src_image, VkSampler src_sampler, const Region2D& dst_region,
+                   const Region2D& src_region, const Extent3D& src_size);
 
     void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
                           VkImageView src_stencil_view, const Region2D& dst_region,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 153096fa40..e45512d4fd 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -289,8 +289,9 @@ void RasterizerVulkan::DrawTexture() {
                                     .y = static_cast<s32>(draw_texture_state.src_y0)},
                            Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
                                     .y = static_cast<s32>(draw_texture_state.src_y1)}};
-    blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(), sampler->Handle(),
-                         dst_region, src_region, texture.size);
+    blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(),
+                         texture.ImageHandle(), sampler->Handle(), dst_region, src_region,
+                         texture.size);
 }
 
 void RasterizerVulkan::Clear(u32 layer_count) {