diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index 91352912d5..929ed67e4a 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -93,6 +93,7 @@ void AppendCPUInfo(FieldCollection& fc) {
     add_field("CPU_Extension_x64_GFNI", caps.gfni);
     add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc);
     add_field("CPU_Extension_x64_LZCNT", caps.lzcnt);
+    add_field("CPU_Extension_x64_MONITORX", caps.monitorx);
     add_field("CPU_Extension_x64_MOVBE", caps.movbe);
     add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq);
     add_field("CPU_Extension_x64_POPCNT", caps.popcnt);
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index c998b1197e..780120a5b2 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -168,6 +168,7 @@ static CPUCaps Detect() {
         __cpuid(cpu_id, 0x80000001);
         caps.lzcnt = Common::Bit<5>(cpu_id[2]);
         caps.fma4 = Common::Bit<16>(cpu_id[2]);
+        caps.monitorx = Common::Bit<29>(cpu_id[2]);
     }
 
     if (max_ex_fn >= 0x80000007) {
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 8253944d6b..756459417f 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -63,6 +63,7 @@ struct CPUCaps {
     bool gfni : 1;
     bool invariant_tsc : 1;
     bool lzcnt : 1;
+    bool monitorx : 1;
     bool movbe : 1;
     bool pclmulqdq : 1;
     bool popcnt : 1;
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp
index c53dd49453..41d385f598 100644
--- a/src/common/x64/cpu_wait.cpp
+++ b/src/common/x64/cpu_wait.cpp
@@ -13,36 +13,60 @@
 
 namespace Common::X64 {
 
+namespace {
+
+// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
+// For reference:
+// At 1 GHz, 100K cycles is 100us
+// At 2 GHz, 100K cycles is 50us
+// At 4 GHz, 100K cycles is 25us
+constexpr auto PauseCycles = 100'000U;
+
+} // Anonymous namespace
+
 #ifdef _MSC_VER
 __forceinline static void TPAUSE() {
-    // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
-    // For reference:
-    // At 1 GHz, 100K cycles is 100us
-    // At 2 GHz, 100K cycles is 50us
-    // At 4 GHz, 100K cycles is 25us
-    static constexpr auto PauseCycles = 100'000;
-    _tpause(0, FencedRDTSC() + PauseCycles);
+    static constexpr auto RequestC02State = 0U;
+    _tpause(RequestC02State, FencedRDTSC() + PauseCycles);
+}
+
+__forceinline static void MWAITX() {
+    static constexpr auto EnableWaitTimeFlag = 1U << 1;
+    static constexpr auto RequestC1State = 0U;
+
+    // monitor_var should be aligned to a cache line.
+    alignas(64) u64 monitor_var{};
+    _mm_monitorx(&monitor_var, 0, 0);
+    _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles);
 }
 #else
 static void TPAUSE() {
-    // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
-    // For reference:
-    // At 1 GHz, 100K cycles is 100us
-    // At 2 GHz, 100K cycles is 50us
-    // At 4 GHz, 100K cycles is 25us
-    static constexpr auto PauseCycles = 100'000;
+    static constexpr auto RequestC02State = 0U;
     const auto tsc = FencedRDTSC() + PauseCycles;
     const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
     const auto edx = static_cast<u32>(tsc >> 32);
-    asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax));
+    asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax));
+}
+
+static void MWAITX() {
+    static constexpr auto EnableWaitTimeFlag = 1U << 1;
+    static constexpr auto RequestC1State = 0U;
+
+    // monitor_var should be aligned to a cache line.
+    alignas(64) u64 monitor_var{};
+    asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0));
+    asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag));
 }
 #endif
 
 void MicroSleep() {
     static const bool has_waitpkg = GetCPUCaps().waitpkg;
+    static const bool has_monitorx = GetCPUCaps().monitorx;
 
     if (has_waitpkg) {
         TPAUSE();
+    } else if (has_monitorx) {
+        MWAITX();
     } else {
         std::this_thread::yield();
     }