From 787a1b047c150f6be705b66ffa929083bd5671f5 Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Thu, 28 Aug 2014 05:51:57 -0300
Subject: [PATCH 1/5] Remove virtual inheritance from RendererOpenGL

Also make destructor virtual so that instances are properly destructed.
---
 src/video_core/renderer_base.h                   | 2 +-
 src/video_core/renderer_opengl/renderer_opengl.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 2650620b4b..f1dbc9d17e 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -19,7 +19,7 @@ public:
     RendererBase() : m_current_fps(0), m_current_frame(0) {
     }
 
-    ~RendererBase() {
+    virtual ~RendererBase() {
     }
 
     /// Swap buffers (render frame)
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 0d25b2a52f..98ae7aa66f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -13,11 +13,11 @@
 
 #include <array>
 
-class RendererOpenGL : virtual public RendererBase {
+class RendererOpenGL : public RendererBase {
 public:
 
     RendererOpenGL();
-    ~RendererOpenGL();
+    ~RendererOpenGL() override;
 
     /// Swap buffers (render frame)
     void SwapBuffers();

From 5f598a5e2c574ebc854fa5d9cc0d02fda46e56d8 Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Thu, 28 Aug 2014 06:34:47 -0300
Subject: [PATCH 2/5] OpenGL renderer: Shuffle initialization code around and
 rename functions.

---
 .../renderer_opengl/renderer_opengl.cpp       | 33 +++++++++----------
 .../renderer_opengl/renderer_opengl.h         | 10 ++----
 2 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index bc1683cb56..cad2783819 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -81,7 +81,7 @@ void RendererOpenGL::SwapBuffers() {
     RenderXFB(framebuffer_size, framebuffer_size);
 
     // XFB->Window copy
-    RenderFramebuffer();
+    DrawScreens();
 
     // Swap buffers
     render_window->PollEvents();
@@ -151,8 +151,16 @@ void RendererOpenGL::RenderXFB(const Common::Rect& src_rect, const Common::Rect&
     // so this may need to be changed (pair for each screen).
 }
 
-/// Initialize the FBO
-void RendererOpenGL::InitFramebuffer() {
+/**
+ * Initializes the OpenGL state and creates persistent objects.
+ */
+void RendererOpenGL::InitOpenGLObjects() {
+    glGenVertexArrays(1, &vertex_array_id);
+    glBindVertexArray(vertex_array_id);
+
+    glClearColor(1.0f, 1.0f, 1.0f, 0.0f);
+    glDisable(GL_DEPTH_TEST);
+
     program_id = ShaderUtil::LoadShaders(GLShaders::g_vertex_shader, GLShaders::g_fragment_shader);
     sampler_id = glGetUniformLocation(program_id, "sampler");
     attrib_position = glGetAttribLocation(program_id, "position");
@@ -190,7 +198,10 @@ void RendererOpenGL::InitFramebuffer() {
     glBindTexture(GL_TEXTURE_2D, 0);
 }
 
-void RendererOpenGL::RenderFramebuffer() {
+/**
+ * Draws the emulated screens to the emulator window.
+ */
+void RendererOpenGL::DrawScreens() {
     glViewport(0, 0, resolution_width, resolution_height);
     glClear(GL_COLOR_BUFFER_BIT);
 
@@ -253,20 +264,8 @@ void RendererOpenGL::Init() {
         exit(-1);
     }
 
-    // Generate VAO
-    glGenVertexArrays(1, &vertex_array_id);
-    glBindVertexArray(vertex_array_id);
-
-    glClearColor(1.0f, 1.0f, 1.0f, 0.0f);
-    glDisable(GL_DEPTH_TEST);
-
-    glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
-
-    // Initialize everything else
-    // --------------------------
-    InitFramebuffer();
-
     NOTICE_LOG(RENDER, "GL_VERSION: %s\n", glGetString(GL_VERSION));
+    InitOpenGLObjects();
 }
 
 /// Shutdown the renderer
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 98ae7aa66f..3dcb331beb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -42,14 +42,8 @@ public:
     void ShutDown();
 
 private:
-
-    /// Initialize the FBO
-    void InitFramebuffer();
-
-    // Blit the FBO to the OpenGL default framebuffer
-    void RenderFramebuffer();
-
-    /// Updates the framerate
+    void InitOpenGLObjects();
+    void DrawScreens();
     void UpdateFramerate();
 
     /// Structure used for storing information for rendering each 3DS screen

From fec7f6b035c1328cefac8a97cd26f3a79d033fa4 Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Thu, 28 Aug 2014 15:17:09 -0300
Subject: [PATCH 3/5] Rename GPU::Regs::FramebufferFormat to PixelFormat

This name better represents what the enum does, and is less overloaded
in the context. (The whole register the enum is part of is also called
'format'.)
---
 src/core/hw/gpu.cpp | 10 +++++-----
 src/core/hw/gpu.h   | 14 +++++---------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 9c71923139..33a0e0fe7c 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -89,7 +89,7 @@ inline void Write(u32 addr, const T data) {
                     } source_color = { 0, 0, 0, 0 };
 
                     switch (config.input_format) {
-                    case Regs::FramebufferFormat::RGBA8:
+                    case Regs::PixelFormat::RGBA8:
                     {
                         // TODO: Most likely got the component order messed up.
                         u8* srcptr = source_pointer + x * 4 + y * config.input_width * 4;
@@ -106,7 +106,7 @@ inline void Write(u32 addr, const T data) {
                     }
 
                     switch (config.output_format) {
-                    /*case Regs::FramebufferFormat::RGBA8:
+                    /*case Regs::PixelFormat::RGBA8:
                     {
                         // TODO: Untested
                         u8* dstptr = (u32*)(dest_pointer + x * 4 + y * config.output_width * 4);
@@ -117,7 +117,7 @@ inline void Write(u32 addr, const T data) {
                         break;
                     }*/
 
-                    case Regs::FramebufferFormat::RGB8:
+                    case Regs::PixelFormat::RGB8:
                     {
                         // TODO: Most likely got the component order messed up.
                         u8* dstptr = dest_pointer + x * 3 + y * config.output_width * 3;
@@ -236,13 +236,13 @@ void Init() {
     framebuffer_top.width = 240;
     framebuffer_top.height = 400;
     framebuffer_top.stride = 3 * 240;
-    framebuffer_top.color_format = Regs::FramebufferFormat::RGB8;
+    framebuffer_top.color_format = Regs::PixelFormat::RGB8;
     framebuffer_top.active_fb = 0;
 
     framebuffer_sub.width = 240;
     framebuffer_sub.height = 320;
     framebuffer_sub.stride = 3 * 240;
-    framebuffer_sub.color_format = Regs::FramebufferFormat::RGB8;
+    framebuffer_sub.color_format = Regs::PixelFormat::RGB8;
     framebuffer_sub.active_fb = 0;
 
     NOTICE_LOG(GPU, "initialized OK");
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index c853429a0a..92097d182d 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -56,7 +56,7 @@ struct Regs {
                   "Structure size and register block length don't match")
 #endif
 
-    enum class FramebufferFormat : u32 {
+    enum class PixelFormat : u32 {
         RGBA8  = 0,
         RGB8   = 1,
         RGB565 = 2,
@@ -84,9 +84,7 @@ struct Regs {
 
     INSERT_PADDING_WORDS(0x10b);
 
-    struct {
-        using Format = Regs::FramebufferFormat;
-
+    struct FramebufferConfig {
         union {
             u32 size;
 
@@ -102,7 +100,7 @@ struct Regs {
         union {
             u32 format;
 
-            BitField< 0, 3, Format> color_format;
+            BitField< 0, 3, PixelFormat> color_format;
         };
 
         INSERT_PADDING_WORDS(0x1);
@@ -130,8 +128,6 @@ struct Regs {
     INSERT_PADDING_WORDS(0x169);
 
     struct {
-        using Format = Regs::FramebufferFormat;
-
         u32 input_address;
         u32 output_address;
 
@@ -161,8 +157,8 @@ struct Regs {
             u32 flags;
 
             BitField< 0, 1, u32> flip_data;        // flips input data horizontally (TODO) if true
-            BitField< 8, 3, Format> input_format;
-            BitField<12, 3, Format> output_format;
+            BitField< 8, 3, PixelFormat> input_format;
+            BitField<12, 3, PixelFormat> output_format;
             BitField<16, 1, u32> output_tiled;     // stores output in a tiled format
         };
 

From 11642fd3a218185187bb356f2f446313694d4be4 Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Thu, 28 Aug 2014 15:21:54 -0300
Subject: [PATCH 4/5] Rework OpenGL renderer.

The OpenGL renderer has been revised, with the following changes:
 - Initialization and rendering have been refactored to reduce the number of
   redundant objects used.
 - Framebuffer rotation is now done directly, using texture mapping.
 - Vertex coordinates are now given in pixels, and the projection matrix
   isn't hardcoded anymore.
---
 src/video_core/renderer_opengl/gl_shaders.h   |  46 +--
 .../renderer_opengl/renderer_opengl.cpp       | 288 ++++++++----------
 .../renderer_opengl/renderer_opengl.h         |  72 ++---
 src/video_core/video_core.h                   |   4 +
 4 files changed, 185 insertions(+), 225 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shaders.h b/src/video_core/renderer_opengl/gl_shaders.h
index 380648f453..0f88ab8027 100644
--- a/src/video_core/renderer_opengl/gl_shaders.h
+++ b/src/video_core/renderer_opengl/gl_shaders.h
@@ -6,34 +6,40 @@
 
 namespace GLShaders {
 
-static const char g_vertex_shader[] = R"(
+const char g_vertex_shader[] = R"(
 #version 150 core
-in vec3 position;
-in vec2 texCoord;
 
-out vec2 UV;
+in vec2 vert_position;
+in vec2 vert_tex_coord;
+out vec2 frag_tex_coord;
 
-mat3 window_scale = mat3(
-                         vec3(1.0, 0.0, 0.0),
-                         vec3(0.0, 5.0/6.0, 0.0), // TODO(princesspeachum): replace hard-coded aspect with uniform
-                         vec3(0.0, 0.0, 1.0)
-                         );
+// This is a truncated 3x3 matrix for 2D transformations:
+// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
+// The third column performs translation.
+// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
+// implicitly be [0, 0, 1]
+uniform mat3x2 modelview_matrix;
 
 void main() {
-    gl_Position.xyz = window_scale * position;
-    gl_Position.w = 1.0;
+    // Multiply input position by the rotscale part of the matrix and then manually translate by
+    // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
+    // to `vec3(vert_position.xy, 1.0)`
+    gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
+    frag_tex_coord = vert_tex_coord;
+}
+)";
 
-    UV = texCoord;
-})";
-
-static const char g_fragment_shader[] = R"(
+const char g_fragment_shader[] = R"(
 #version 150 core
-in vec2 UV;
-out vec3 color;
-uniform sampler2D sampler;
+
+in vec2 frag_tex_coord;
+out vec4 color;
+
+uniform sampler2D color_texture;
 
 void main() {
-    color = texture(sampler, UV).rgb;
-})";
+    color = texture(color_texture, frag_tex_coord);
+}
+)";
 
 }
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index cad2783819..8483f79bee 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -3,64 +3,51 @@
 // Refer to the license.txt file included.
 
 #include "core/hw/gpu.h"
-
+#include "core/mem_map.h"
+#include "common/emu_window.h"
 #include "video_core/video_core.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
 #include "video_core/renderer_opengl/gl_shader_util.h"
 #include "video_core/renderer_opengl/gl_shaders.h"
 
-#include "core/mem_map.h"
-
 #include <algorithm>
 
-static const GLfloat kViewportAspectRatio =
-    (static_cast<float>(VideoCore::kScreenTopHeight) + VideoCore::kScreenBottomHeight) / VideoCore::kScreenTopWidth;
+/**
+ * Vertex structure that the drawn screen rectangles are composed of.
+ */
+struct ScreenRectVertex {
+    ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) {
+        position[0] = x;
+        position[1] = y;
+        tex_coord[0] = u;
+        tex_coord[1] = v;
+    }
 
-// Fullscreen quad dimensions
-static const GLfloat kTopScreenWidthNormalized = 2;
-static const GLfloat kTopScreenHeightNormalized    = kTopScreenWidthNormalized    * (static_cast<float>(VideoCore::kScreenTopHeight)    / VideoCore::kScreenTopWidth);
-static const GLfloat kBottomScreenWidthNormalized  = kTopScreenWidthNormalized    * (static_cast<float>(VideoCore::kScreenBottomWidth)  / VideoCore::kScreenTopWidth);
-static const GLfloat kBottomScreenHeightNormalized = kBottomScreenWidthNormalized * (static_cast<float>(VideoCore::kScreenBottomHeight) / VideoCore::kScreenBottomWidth);
-
-static const GLfloat g_vbuffer_top[] = {
-    // x,   y                           z     u     v
-    -1.0f, 0.0f,                       0.0f, 0.0f, 1.0f,
-     1.0f, 0.0f,                       0.0f, 1.0f, 1.0f,
-     1.0f, kTopScreenHeightNormalized, 0.0f, 1.0f, 0.0f,
-     1.0f, kTopScreenHeightNormalized, 0.0f, 1.0f, 0.0f,
-    -1.0f, kTopScreenHeightNormalized, 0.0f, 0.0f, 0.0f,
-    -1.0f, 0.0f,                       0.0f, 0.0f, 1.0f
+    GLfloat position[2];
+    GLfloat tex_coord[2];
 };
 
-static const GLfloat g_vbuffer_bottom[] = {
-    // x                                   y                              z     u     v
-    -(kBottomScreenWidthNormalized / 2), -kBottomScreenHeightNormalized, 0.0f, 0.0f, 1.0f,
-     (kBottomScreenWidthNormalized / 2), -kBottomScreenHeightNormalized, 0.0f, 1.0f, 1.0f,
-     (kBottomScreenWidthNormalized / 2),  0.0f,                          0.0f, 1.0f, 0.0f,
-     (kBottomScreenWidthNormalized / 2),  0.0f,                          0.0f, 1.0f, 0.0f,
-    -(kBottomScreenWidthNormalized / 2),  0.0f,                          0.0f, 0.0f, 0.0f,
-    -(kBottomScreenWidthNormalized / 2), -kBottomScreenHeightNormalized, 0.0f, 0.0f, 1.0f
-};
+/**
+ * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
+ * corner and (width, height) on the lower-bottom.
+ *
+ * The projection part of the matrix is trivial, hence these operations are represented
+ * by a 3x2 matrix.
+ */
+static std::array<GLfloat, 3*2> MakeOrthographicMatrix(const float width, const float height) {
+    std::array<GLfloat, 3*2> matrix;
+
+    matrix[0] = 2.f / width; matrix[2] = 0.f;           matrix[4] = -1.f;
+    matrix[1] = 0.f;         matrix[3] = -2.f / height; matrix[5] = 1.f;
+    // Last matrix row is implicitly assumed to be [0, 0, 1].
+
+    return matrix;
+}
 
 /// RendererOpenGL constructor
 RendererOpenGL::RendererOpenGL() {
-
     resolution_width  = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth);
     resolution_height = VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight;
-
-    // Initialize screen info
-    const auto& framebuffer_top = GPU::g_regs.framebuffer_config[0];
-    const auto& framebuffer_sub = GPU::g_regs.framebuffer_config[1];
-
-    screen_info.Top().width               = VideoCore::kScreenTopWidth;
-    screen_info.Top().height              = VideoCore::kScreenTopHeight;
-    screen_info.Top().stride              = framebuffer_top.stride;
-    screen_info.Top().flipped_xfb_data    = xfb_top_flipped;
-
-    screen_info.Bottom().width            = VideoCore::kScreenBottomWidth;
-    screen_info.Bottom().height           = VideoCore::kScreenBottomHeight;
-    screen_info.Bottom().stride           = framebuffer_sub.stride;
-    screen_info.Bottom().flipped_xfb_data = xfb_bottom_flipped;
 }
 
 /// RendererOpenGL destructor
@@ -71,16 +58,23 @@ RendererOpenGL::~RendererOpenGL() {
 void RendererOpenGL::SwapBuffers() {
     render_window->MakeCurrent();
 
-    // EFB->XFB copy
-    // TODO(bunnei): This is a hack and does not belong here. The copy should be triggered by some
-    // register write.
-    //
-    // TODO(princesspeachum): (related to above^) this should only be called when there's new data, not every frame.
-    // Currently this uploads data that shouldn't have changed.
-    Common::Rect framebuffer_size(0, 0, resolution_width, resolution_height);
-    RenderXFB(framebuffer_size, framebuffer_size);
+    for(int i : {0, 1}) {
+        const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
+
+        if (textures[i].width != framebuffer.width || textures[i].height != framebuffer.height) {
+            // Reallocate texture if the framebuffer size has changed.
+            // This is expected to not happen very often and hence should not be a
+            // performance problem.
+            glBindTexture(GL_TEXTURE_2D, textures[i].handle);
+            glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, framebuffer.width, framebuffer.height, 0,
+                GL_BGR, GL_UNSIGNED_BYTE, nullptr);
+            textures[i].width = framebuffer.width;
+            textures[i].height = framebuffer.height;
+        }
+
+        LoadFBToActiveGLTexture(GPU::g_regs.framebuffer_config[i], textures[i]);
+    }
 
-    // XFB->Window copy
     DrawScreens();
 
     // Swap buffers
@@ -89,115 +83,110 @@ void RendererOpenGL::SwapBuffers() {
 }
 
 /**
- * Helper function to flip framebuffer from left-to-right to top-to-bottom
- * @param raw_data Pointer to input raw framebuffer in V/RAM
- * @param screen_info ScreenInfo structure with screen size and output buffer pointer
- * @todo Early on hack... I'd like to find a more efficient way of doing this /bunnei
+ * Loads framebuffer from emulated memory into the active OpenGL texture.
  */
-void RendererOpenGL::FlipFramebuffer(const u8* raw_data, ScreenInfo& screen_info) {
-    for (int x = 0; x < screen_info.width; x++) {
-        int in_coord = x * screen_info.stride;
-        for (int y = screen_info.height-1; y >= 0; y--) {
-            // TODO: Properly support other framebuffer formats
-            int out_coord = (x + y * screen_info.width) * 3;
-            screen_info.flipped_xfb_data[out_coord] = raw_data[in_coord + 2];       // Red
-            screen_info.flipped_xfb_data[out_coord + 1] = raw_data[in_coord + 1];   // Green
-            screen_info.flipped_xfb_data[out_coord + 2] = raw_data[in_coord];       // Blue
-            in_coord += 3;
-        }
-    }
-}
+void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer,
+                                             const TextureInfo& texture) {
+    const VAddr framebuffer_vaddr = Memory::PhysicalToVirtualAddress(
+        framebuffer.active_fb == 1 ? framebuffer.address_left2 : framebuffer.address_left1);
 
-/**
- * Renders external framebuffer (XFB)
- * @param src_rect Source rectangle in XFB to copy
- * @param dst_rect Destination rectangle in output framebuffer to copy to
- */
-void RendererOpenGL::RenderXFB(const Common::Rect& src_rect, const Common::Rect& dst_rect) {
-    const auto& framebuffer_top = GPU::g_regs.framebuffer_config[0];
-    const auto& framebuffer_sub = GPU::g_regs.framebuffer_config[1];
-    const u32 active_fb_top = (framebuffer_top.active_fb == 1)
-                            ? Memory::PhysicalToVirtualAddress(framebuffer_top.address_left2)
-                            : Memory::PhysicalToVirtualAddress(framebuffer_top.address_left1);
-    const u32 active_fb_sub = (framebuffer_sub.active_fb == 1)
-                            ? Memory::PhysicalToVirtualAddress(framebuffer_sub.address_left2)
-                            : Memory::PhysicalToVirtualAddress(framebuffer_sub.address_left1);
+    DEBUG_LOG(GPU, "0x%08x bytes from 0x%08x(%dx%d), fmt %x",
+        framebuffer.stride * framebuffer.height,
+        framebuffer_vaddr, (int)framebuffer.width,
+        (int)framebuffer.height, (int)framebuffer.format);
 
-    DEBUG_LOG(GPU, "RenderXFB: 0x%08x bytes from 0x%08x(%dx%d), fmt %x",
-              framebuffer_top.stride * framebuffer_top.height,
-              active_fb_top, (int)framebuffer_top.width,
-              (int)framebuffer_top.height, (int)framebuffer_top.format);
+    const u8* framebuffer_data = Memory::GetPointer(framebuffer_vaddr);
 
-    FlipFramebuffer(Memory::GetPointer(active_fb_top), screen_info.Top());
-    FlipFramebuffer(Memory::GetPointer(active_fb_sub), screen_info.Bottom());
+    // TODO: Handle other pixel formats
+    _dbg_assert_msg_(RENDER, framebuffer.color_format == GPU::Regs::PixelFormat::RGB8,
+                     "Unsupported 3DS pixel format.");
 
-    for (int i = 0; i < 2; i++) {
-        ScreenInfo* current_screen = &screen_info[i];
+    size_t pixel_stride = framebuffer.stride / 3;
+    // OpenGL only supports specifying a stride in units of pixels, not bytes, unfortunately
+    _dbg_assert_(RENDER, pixel_stride * 3 == framebuffer.stride);
+    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default
+    // only allows rows to have a memory alignement of 4.
+    _dbg_assert_(RENDER, pixel_stride % 4 == 0);
 
-        glBindTexture(GL_TEXTURE_2D, current_screen->texture_id);
+    glBindTexture(GL_TEXTURE_2D, texture.handle);
+    glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
 
-        // TODO: This should consider the GPU registers for framebuffer width, height and stride.
-        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, current_screen->width, current_screen->height,
-                        GL_RGB, GL_UNSIGNED_BYTE, current_screen->flipped_xfb_data);
-    }
+    // Update existing texture
+    // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
+    //       differ from the LCD resolution.
+    // TODO: Applications could theoretically crash Citra here by specifying too large
+    //       framebuffer sizes. We should make sure that this cannot happen.
+    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
+        GL_BGR, GL_UNSIGNED_BYTE, framebuffer_data);
+
+    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
 
     glBindTexture(GL_TEXTURE_2D, 0);
-
-    // TODO(princesspeachum):
-    // Only the subset src_rect of the GPU buffer
-    // should be copied into the texture of the relevant screen.
-    //
-    // The method's parameters also only include src_rect and dest_rec for one screen,
-    // so this may need to be changed (pair for each screen).
 }
 
 /**
  * Initializes the OpenGL state and creates persistent objects.
  */
 void RendererOpenGL::InitOpenGLObjects() {
-    glGenVertexArrays(1, &vertex_array_id);
-    glBindVertexArray(vertex_array_id);
-
     glClearColor(1.0f, 1.0f, 1.0f, 0.0f);
     glDisable(GL_DEPTH_TEST);
 
+    // Link shaders and get variable locations
     program_id = ShaderUtil::LoadShaders(GLShaders::g_vertex_shader, GLShaders::g_fragment_shader);
-    sampler_id = glGetUniformLocation(program_id, "sampler");
-    attrib_position = glGetAttribLocation(program_id, "position");
-    attrib_texcoord = glGetAttribLocation(program_id, "texCoord");
+    uniform_modelview_matrix = glGetUniformLocation(program_id, "modelview_matrix");
+    uniform_color_texture = glGetUniformLocation(program_id, "color_texture");
+    attrib_position = glGetAttribLocation(program_id, "vert_position");
+    attrib_tex_coord = glGetAttribLocation(program_id, "vert_tex_coord");
 
-    // Generate vertex buffers for both screens
-    glGenBuffers(1, &screen_info.Top().vertex_buffer_id);
-    glGenBuffers(1, &screen_info.Bottom().vertex_buffer_id);
+    // Generate VBO handle for drawing
+    glGenBuffers(1, &vertex_buffer_handle);
 
-    // Attach vertex data for top screen
-    glBindBuffer(GL_ARRAY_BUFFER, screen_info.Top().vertex_buffer_id);
-    glBufferData(GL_ARRAY_BUFFER, sizeof(g_vbuffer_top), g_vbuffer_top, GL_STATIC_DRAW);
+    // Generate VAO
+    glGenVertexArrays(1, &vertex_array_handle);
+    glBindVertexArray(vertex_array_handle);
 
-    // Attach vertex data for bottom screen
-    glBindBuffer(GL_ARRAY_BUFFER, screen_info.Bottom().vertex_buffer_id);
-    glBufferData(GL_ARRAY_BUFFER, sizeof(g_vbuffer_bottom), g_vbuffer_bottom, GL_STATIC_DRAW);
+    // Attach vertex data to VAO
+    glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_handle);
+    glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
+    glVertexAttribPointer(attrib_position,  2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, position));
+    glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, tex_coord));
+    glEnableVertexAttribArray(attrib_position);
+    glEnableVertexAttribArray(attrib_tex_coord);
 
-    // Create color buffers for both screens
-    glGenTextures(1, &screen_info.Top().texture_id);
-    glGenTextures(1, &screen_info.Bottom().texture_id);
+    // Allocate textures for each screen
+    for (auto& texture : textures) {
+        glGenTextures(1, &texture.handle);
 
-    for (int i = 0; i < 2; i++) {
-
-        ScreenInfo* current_screen = &screen_info[i];
-
-        // Allocate texture
-        glBindTexture(GL_TEXTURE_2D, current_screen->vertex_buffer_id);
-        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, current_screen->width, current_screen->height,
-                     0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
+        // Allocation of storage is deferred until the first frame, when we
+        // know the framebuffer size.
 
+        glBindTexture(GL_TEXTURE_2D, texture.handle);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
     }
-
     glBindTexture(GL_TEXTURE_2D, 0);
 }
 
+/**
+ * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation.
+ */
+void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h) {
+    std::array<ScreenRectVertex, 4> vertices = {
+        ScreenRectVertex(x,   y,   1.f, 0.f),
+        ScreenRectVertex(x+w, y,   1.f, 1.f),
+        ScreenRectVertex(x,   y+h, 0.f, 0.f),
+        ScreenRectVertex(x+w, y+h, 0.f, 1.f),
+    };
+
+    glBindTexture(GL_TEXTURE_2D, texture.handle);
+    glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_handle);
+    glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
+    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
 /**
  * Draws the emulated screens to the emulator window.
  */
@@ -207,37 +196,22 @@ void RendererOpenGL::DrawScreens() {
 
     glUseProgram(program_id);
 
+    // Set projection matrix
+    std::array<GLfloat, 3*2> ortho_matrix = MakeOrthographicMatrix((float)resolution_width, (float)resolution_height);
+    glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data());
+
     // Bind texture in Texture Unit 0
     glActiveTexture(GL_TEXTURE0);
+    glUniform1i(uniform_color_texture, 0);
 
-    glEnableVertexAttribArray(attrib_position);
-    glEnableVertexAttribArray(attrib_texcoord);
+    const float max_width = std::max((float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenBottomWidth);
+    const float top_x = 0.5f * (max_width - VideoCore::kScreenTopWidth);
+    const float bottom_x = 0.5f * (max_width - VideoCore::kScreenBottomWidth);
 
-    for (int i = 0; i < 2; i++) {
-
-        ScreenInfo* current_screen = &screen_info[i];
-
-        glBindTexture(GL_TEXTURE_2D, current_screen->texture_id);
-
-        // Set sampler on Texture Unit 0
-        glUniform1i(sampler_id, 0);
-
-        glBindBuffer(GL_ARRAY_BUFFER, current_screen->vertex_buffer_id);
-
-        // Vertex buffer layout
-        const GLsizei stride = 5 * sizeof(GLfloat);
-        const GLvoid* uv_offset = (const GLvoid*)(3 * sizeof(GLfloat));
-
-        // Configure vertex buffer
-        glVertexAttribPointer(attrib_position, 3, GL_FLOAT, GL_FALSE, stride, NULL);
-        glVertexAttribPointer(attrib_texcoord, 2, GL_FLOAT, GL_FALSE, stride, uv_offset);
-
-        // Draw screen
-        glDrawArrays(GL_TRIANGLES, 0, 6);
-    }
-
-    glDisableVertexAttribArray(attrib_position);
-    glDisableVertexAttribArray(attrib_texcoord);
+    DrawSingleScreenRotated(textures[0], top_x, 0,
+        (float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenTopHeight);
+    DrawSingleScreenRotated(textures[1], bottom_x, (float)VideoCore::kScreenTopHeight,
+        (float)VideoCore::kScreenBottomWidth, (float)VideoCore::kScreenBottomHeight);
 
     m_current_frame++;
 }
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 3dcb331beb..82ef4b14be 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -7,12 +7,13 @@
 #include "generated/gl_3_2_core.h"
 
 #include "common/common.h"
-#include "common/emu_window.h"
-
+#include "core/hw/gpu.h"
 #include "video_core/renderer_base.h"
 
 #include <array>
 
+class EmuWindow;
+
 class RendererOpenGL : public RendererBase {
 public:
 
@@ -22,13 +23,6 @@ public:
     /// Swap buffers (render frame)
     void SwapBuffers();
 
-    /**
-     * Renders external framebuffer (XFB)
-     * @param src_rect Source rectangle in XFB to copy
-     * @param dst_rect Destination rectangle in output framebuffer to copy to
-     */
-    void RenderXFB(const Common::Rect& src_rect, const Common::Rect& dst_rect);
-
     /**
      * Set the emulator window to use for renderer
      * @param window EmuWindow handle to emulator window to use for rendering
@@ -42,32 +36,21 @@ public:
     void ShutDown();
 
 private:
-    void InitOpenGLObjects();
-    void DrawScreens();
-    void UpdateFramerate();
-
-    /// Structure used for storing information for rendering each 3DS screen
-    struct ScreenInfo {
-        // Properties
-        int width;
-        int height;
-        int stride; ///< Number of bytes between the coordinates (0,0) and (1,0)
-
-        // OpenGL object IDs
-        GLuint texture_id;
-        GLuint vertex_buffer_id;
-
-        // Temporary
-        u8* flipped_xfb_data;
+    /// Structure used for storing information about the textures for each 3DS screen
+    struct TextureInfo {
+        GLuint handle;
+        GLsizei width;
+        GLsizei height;
     };
 
-    /**
-    * Helper function to flip framebuffer from left-to-right to top-to-bottom
-    * @param raw_data Pointer to input raw framebuffer in V/RAM
-    * @param screen_info ScreenInfo structure with screen size and output buffer pointer
-    * @todo Early on hack... I'd like to find a more efficient way of doing this /bunnei
-    */
-    void FlipFramebuffer(const u8* raw_data, ScreenInfo& screen_info);
+    void InitOpenGLObjects();
+    void DrawScreens();
+    void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h);
+    void UpdateFramerate();
+
+    // Loads framebuffer from emulated memory into the active OpenGL texture.
+    static void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer,
+                                        const TextureInfo& texture);
 
     EmuWindow*  render_window;                    ///< Handle to render window
     u32         last_mode;                        ///< Last render mode
@@ -75,22 +58,15 @@ private:
     int resolution_width;                         ///< Current resolution width
     int resolution_height;                        ///< Current resolution height
 
-    // OpenGL global object IDs
-    GLuint vertex_array_id;
+    // OpenGL object IDs
+    GLuint vertex_array_handle;
+    GLuint vertex_buffer_handle;
     GLuint program_id;
-    GLuint sampler_id;
+    std::array<TextureInfo, 2> textures;
+    // Shader uniform location indices
+    GLuint uniform_modelview_matrix;
+    GLuint uniform_color_texture;
     // Shader attribute input indices
     GLuint attrib_position;
-    GLuint attrib_texcoord;
-
-    struct : std::array<ScreenInfo, 2> {
-        ScreenInfo& Top() { return (*this)[0]; }
-        ScreenInfo& Bottom() { return (*this)[1]; }
-    } screen_info;
-
-    // "Flipped" framebuffers translate scanlines from native 3DS left-to-right to top-to-bottom
-    // as OpenGL expects them in a texture. There probably is a more efficient way of doing this:
-    u8 xfb_top_flipped[VideoCore::kScreenTopWidth * VideoCore::kScreenTopHeight * 4];
-    u8 xfb_bottom_flipped[VideoCore::kScreenBottomWidth * VideoCore::kScreenBottomHeight * 4];
-
+    GLuint attrib_tex_coord;
 };
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 5e8129b5a4..609aac5131 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -17,6 +17,10 @@ namespace VideoCore {
 // 3DS Video Constants
 // -------------------
 
+// NOTE: The LCDs actually rotate the image 90 degrees when displaying. Because of that the
+// framebuffers in video memory are stored in column-major order and rendered sideways, causing
+// the widths and heights of the framebuffers read by the LCD to be switched compared to the
+// heights and widths of the screens listed here.
 static const int kScreenTopWidth        = 400;  ///< 3DS top screen width
 static const int kScreenTopHeight       = 240;  ///< 3DS top screen height
 static const int kScreenBottomWidth     = 320;  ///< 3DS bottom screen width

From ac54cd13dbb8e82ccc9134aab212271d2f32b0c8 Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Sat, 30 Aug 2014 18:17:47 -0300
Subject: [PATCH 5/5] OpenGL renderer: Request a forward compatible context in
 citra-qt

This should fix context creation on OS X. Also requests a core context on all platforms in Citra-GLFW, for consistency.
---
 src/citra/emu_window/emu_window_glfw.cpp | 3 ---
 src/citra_qt/bootmanager.cpp             | 6 +++---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/citra/emu_window/emu_window_glfw.cpp b/src/citra/emu_window/emu_window_glfw.cpp
index 661521eb74..6cdba2b872 100644
--- a/src/citra/emu_window/emu_window_glfw.cpp
+++ b/src/citra/emu_window/emu_window_glfw.cpp
@@ -42,12 +42,9 @@ EmuWindow_GLFW::EmuWindow_GLFW() {
     }
     glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
     glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
-	
-#if EMU_PLATFORM == PLATFORM_MACOSX
     // GLFW on OSX requires these window hints to be set to create a 3.2+ GL context.
     glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
     glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
-#endif
 	
     m_render_window = glfwCreateWindow(VideoCore::kScreenTopWidth, 
         (VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight), 
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index 5dce9e5709..0430aa1edf 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -113,10 +113,10 @@ GRenderWindow::GRenderWindow(QWidget* parent) : QWidget(parent), emu_thread(this
 
     // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground, WA_DontShowOnScreen, WA_DeleteOnClose
     QGLFormat fmt;
-    fmt.setProfile(QGLFormat::CoreProfile);
     fmt.setVersion(3,2);
-    fmt.setSampleBuffers(true);
-    fmt.setSamples(4);
+    fmt.setProfile(QGLFormat::CoreProfile);
+    // Requests a forward-compatible context, which is required to get a 3.2+ context on OS X
+    fmt.setOption(QGL::NoDeprecatedFunctions);
     
     child = new GGLWidgetInternal(fmt, this);
     QBoxLayout* layout = new QHBoxLayout(this);