From d248c1203ea15992e6ca3a087a02fac76490deba Mon Sep 17 00:00:00 2001
From: Wunkolo <Wunkolo@gmail.com>
Date: Wed, 9 Mar 2022 18:11:46 -0800
Subject: [PATCH] cpu_detect: Add additional x86 flags and telemetry

Adds detection of additional CPU flags to cpu_detect and additions to telemetry output.

This is not exhaustive but guided by features that [dynarmic utilizes](https://github.com/merryhime/dynarmic/blob/bcfe377aaa5138af740e90af5be7a7dff7b62a52/src/dynarmic/backend/x64/host_feature.h#L12-L33) as well as features that are currently utilized but not reported to telemetry(invariant_tsc). This is intended to guide future optimizations.

AVX512 in particular is broken up into its individual subsets and some other processor features such as [sha](https://en.wikipedia.org/wiki/Intel_SHA_extensions) and [gfni](https://en.wikipedia.org/wiki/AVX-512#GFNI) are added to have some forward-facing data-points.

What used to be a single `CPU_Extension_x64_AVX512` telemetry field
is also broken up into individual `CPU_Extension_x64_AVX512{F,VL,CD,...}` fields.
---
 src/common/telemetry.cpp      | 60 +++++++++++++++++++++++++----------
 src/common/x64/cpu_detect.cpp | 30 +++++++++++++-----
 src/common/x64/cpu_detect.h   | 21 ++++++++++--
 src/yuzu/main.cpp             |  4 +--
 4 files changed, 86 insertions(+), 29 deletions(-)

diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index 6241d08b3a..98c82cd17e 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -55,22 +55,50 @@ void AppendBuildInfo(FieldCollection& fc) {
 
 void AppendCPUInfo(FieldCollection& fc) {
 #ifdef ARCHITECTURE_x86_64
-    fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);
-    fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA4", Common::GetCPUCaps().fma4);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE", Common::GetCPUCaps().sse);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE2", Common::GetCPUCaps().sse2);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE3", Common::GetCPUCaps().sse3);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSSE3", Common::GetCPUCaps().ssse3);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE41", Common::GetCPUCaps().sse4_1);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE42", Common::GetCPUCaps().sse4_2);
+
+    const auto& caps = Common::GetCPUCaps();
+    const auto add_field = [&fc](std::string_view field_name, const auto& field_value) {
+        fc.AddField(FieldType::UserSystem, field_name, field_value);
+    };
+    add_field("CPU_Model", caps.cpu_string);
+    add_field("CPU_BrandString", caps.brand_string);
+
+    add_field("CPU_Extension_x64_SSE", caps.sse);
+    add_field("CPU_Extension_x64_SSE2", caps.sse2);
+    add_field("CPU_Extension_x64_SSE3", caps.sse3);
+    add_field("CPU_Extension_x64_SSSE3", caps.ssse3);
+    add_field("CPU_Extension_x64_SSE41", caps.sse4_1);
+    add_field("CPU_Extension_x64_SSE42", caps.sse4_2);
+
+    add_field("CPU_Extension_x64_AVX", caps.avx);
+    add_field("CPU_Extension_x64_AVX_VNNI", caps.avx_vnni);
+    add_field("CPU_Extension_x64_AVX2", caps.avx2);
+
+    // Skylake-X/SP level AVX512, for compatibility with the previous telemetry field
+    add_field("CPU_Extension_x64_AVX512",
+              caps.avx512f && caps.avx512cd && caps.avx512vl && caps.avx512dq && caps.avx512bw);
+
+    add_field("CPU_Extension_x64_AVX512F", caps.avx512f);
+    add_field("CPU_Extension_x64_AVX512CD", caps.avx512cd);
+    add_field("CPU_Extension_x64_AVX512VL", caps.avx512vl);
+    add_field("CPU_Extension_x64_AVX512DQ", caps.avx512dq);
+    add_field("CPU_Extension_x64_AVX512BW", caps.avx512bw);
+    add_field("CPU_Extension_x64_AVX512BITALG", caps.avx512bitalg);
+    add_field("CPU_Extension_x64_AVX512VBMI", caps.avx512vbmi);
+
+    add_field("CPU_Extension_x64_AES", caps.aes);
+    add_field("CPU_Extension_x64_BMI1", caps.bmi1);
+    add_field("CPU_Extension_x64_BMI2", caps.bmi2);
+    add_field("CPU_Extension_x64_F16C", caps.f16c);
+    add_field("CPU_Extension_x64_FMA", caps.fma);
+    add_field("CPU_Extension_x64_FMA4", caps.fma4);
+    add_field("CPU_Extension_x64_GFNI", caps.gfni);
+    add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc);
+    add_field("CPU_Extension_x64_LZCNT", caps.lzcnt);
+    add_field("CPU_Extension_x64_MOVBE", caps.movbe);
+    add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq);
+    add_field("CPU_Extension_x64_POPCNT", caps.popcnt);
+    add_field("CPU_Extension_x64_SHA", caps.sha);
 #else
     fc.AddField(FieldType::UserSystem, "CPU_Model", "Other");
 #endif
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index 99d87f586a..d81edb1404 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -93,10 +93,14 @@ static CPUCaps Detect() {
         caps.sse = Common::Bit<25>(cpu_id[3]);
         caps.sse2 = Common::Bit<26>(cpu_id[3]);
         caps.sse3 = Common::Bit<0>(cpu_id[2]);
+        caps.pclmulqdq = Common::Bit<1>(cpu_id[2]);
         caps.ssse3 = Common::Bit<9>(cpu_id[2]);
         caps.sse4_1 = Common::Bit<19>(cpu_id[2]);
         caps.sse4_2 = Common::Bit<20>(cpu_id[2]);
+        caps.movbe = Common::Bit<22>(cpu_id[2]);
+        caps.popcnt = Common::Bit<23>(cpu_id[2]);
         caps.aes = Common::Bit<25>(cpu_id[2]);
+        caps.f16c = Common::Bit<29>(cpu_id[2]);
 
         // AVX support requires 3 separate checks:
         //  - Is the AVX bit set in CPUID?
@@ -112,16 +116,26 @@ static CPUCaps Detect() {
 
         if (max_std_fn >= 7) {
             __cpuidex(cpu_id, 0x00000007, 0x00000000);
-            // Can't enable AVX2 unless the XSAVE/XGETBV checks above passed
-            caps.avx2 = caps.avx && Common::Bit<5>(cpu_id[1]);
+            // Can't enable AVX{2,512} unless the XSAVE/XGETBV checks above passed
+            if (caps.avx) {
+                caps.avx2 = Common::Bit<5>(cpu_id[1]);
+                caps.avx512f = Common::Bit<16>(cpu_id[1]);
+                caps.avx512dq = Common::Bit<17>(cpu_id[1]);
+                caps.avx512cd = Common::Bit<28>(cpu_id[1]);
+                caps.avx512bw = Common::Bit<30>(cpu_id[1]);
+                caps.avx512vl = Common::Bit<31>(cpu_id[1]);
+                caps.avx512vbmi = Common::Bit<1>(cpu_id[2]);
+                caps.avx512bitalg = Common::Bit<12>(cpu_id[2]);
+            }
+
             caps.bmi1 = Common::Bit<3>(cpu_id[1]);
             caps.bmi2 = Common::Bit<8>(cpu_id[1]);
-            // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
-            if (Common::Bit<16>(cpu_id[1]) && Common::Bit<28>(cpu_id[1]) &&
-                Common::Bit<31>(cpu_id[1]) && Common::Bit<17>(cpu_id[1]) &&
-                Common::Bit<30>(cpu_id[1])) {
-                caps.avx512 = caps.avx2;
-            }
+            caps.sha = Common::Bit<29>(cpu_id[1]);
+
+            caps.gfni = Common::Bit<8>(cpu_id[2]);
+
+            __cpuidex(cpu_id, 0x00000007, 0x00000001);
+            caps.avx_vnni = caps.avx && Common::Bit<4>(cpu_id[0]);
         }
     }
 
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 3e6d808f38..40c48b1323 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -35,16 +35,31 @@ struct CPUCaps {
     bool ssse3 : 1;
     bool sse4_1 : 1;
     bool sse4_2 : 1;
-    bool lzcnt : 1;
+
     bool avx : 1;
+    bool avx_vnni : 1;
     bool avx2 : 1;
-    bool avx512 : 1;
+    bool avx512f : 1;
+    bool avx512dq : 1;
+    bool avx512cd : 1;
+    bool avx512bw : 1;
+    bool avx512vl : 1;
+    bool avx512vbmi : 1;
+    bool avx512bitalg : 1;
+
+    bool aes : 1;
     bool bmi1 : 1;
     bool bmi2 : 1;
+    bool f16c : 1;
     bool fma : 1;
     bool fma4 : 1;
-    bool aes : 1;
+    bool gfni : 1;
     bool invariant_tsc : 1;
+    bool lzcnt : 1;
+    bool movbe : 1;
+    bool pclmulqdq : 1;
+    bool popcnt : 1;
+    bool sha : 1;
 };
 
 /**
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index b3a8da0eae..1d459bdb32 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -249,9 +249,9 @@ GMainWindow::GMainWindow()
 #ifdef ARCHITECTURE_x86_64
     const auto& caps = Common::GetCPUCaps();
     std::string cpu_string = caps.cpu_string;
-    if (caps.avx || caps.avx2 || caps.avx512) {
+    if (caps.avx || caps.avx2 || caps.avx512f) {
         cpu_string += " | AVX";
-        if (caps.avx512) {
+        if (caps.avx512f) {
             cpu_string += "512";
         } else if (caps.avx2) {
             cpu_string += '2';