From 38394f36d78f22ca75acf8275e86d25faecd0e8d Mon Sep 17 00:00:00 2001
From: Liam <byteslice@airmail.cc>
Date: Sun, 1 Oct 2023 16:21:23 -0400
Subject: [PATCH] gdbserver: use numeric character references for unicode

---
 src/common/string_util.cpp    |  5 +++++
 src/common/string_util.h      |  1 +
 src/core/debugger/gdbstub.cpp | 17 +++++++++++++++--
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index feab1653df..4c7aba3f5c 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -135,6 +135,11 @@ std::u16string UTF8ToUTF16(std::string_view input) {
     return convert.from_bytes(input.data(), input.data() + input.size());
 }
 
+std::u32string UTF8ToUTF32(std::string_view input) {
+    std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> convert;
+    return convert.from_bytes(input.data(), input.data() + input.size());
+}
+
 #ifdef _WIN32
 static std::wstring CPToUTF16(u32 code_page, std::string_view input) {
     const auto size =
diff --git a/src/common/string_util.h b/src/common/string_util.h
index c351f1a0c8..9da1ca4e94 100644
--- a/src/common/string_util.h
+++ b/src/common/string_util.h
@@ -38,6 +38,7 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _
 
 [[nodiscard]] std::string UTF16ToUTF8(std::u16string_view input);
 [[nodiscard]] std::u16string UTF8ToUTF16(std::string_view input);
+[[nodiscard]] std::u32string UTF8ToUTF32(std::string_view input);
 
 #ifdef _WIN32
 [[nodiscard]] std::string UTF16ToUTF8(std::wstring_view input);
diff --git a/src/core/debugger/gdbstub.cpp b/src/core/debugger/gdbstub.cpp
index e55831f27d..82964f0a16 100644
--- a/src/core/debugger/gdbstub.cpp
+++ b/src/core/debugger/gdbstub.cpp
@@ -2,6 +2,8 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include <atomic>
+#include <codecvt>
+#include <locale>
 #include <numeric>
 #include <optional>
 #include <thread>
@@ -12,6 +14,7 @@
 #include "common/logging/log.h"
 #include "common/scope_exit.h"
 #include "common/settings.h"
+#include "common/string_util.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/debugger/gdbstub.h"
@@ -68,10 +71,16 @@ static std::string EscapeGDB(std::string_view data) {
 }
 
 static std::string EscapeXML(std::string_view data) {
+    std::u32string converted = U"[Encoding error]";
+    try {
+        converted = Common::UTF8ToUTF32(data);
+    } catch (std::range_error&) {
+    }
+
     std::string escaped;
     escaped.reserve(data.size());
 
-    for (char c : data) {
+    for (char32_t c : converted) {
         switch (c) {
         case '&':
             escaped += "&amp;";
@@ -86,7 +95,11 @@ static std::string EscapeXML(std::string_view data) {
             escaped += "&gt;";
             break;
         default:
-            escaped += c;
+            if (c > 0x7f) {
+                escaped += fmt::format("&#{};", static_cast<u32>(c));
+            } else {
+                escaped += static_cast<char>(c);
+            }
             break;
         }
     }