mirror of
https://github.com/archlinuxarm/PKGBUILDs.git
synced 2025-03-29 00:25:25 +00:00
159 lines
5.3 KiB
Diff
159 lines
5.3 KiB
Diff
From: Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
|
Date: Sun, 12 Jan 2014 09:31:20 +0000 (+0200)
|
|
Subject: Fix buggy/incomplete detection of AVX/AVX2 support
|
|
X-Git-Url: http://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=commitdiff_plain;h=8302d66d140d5c78c5e808fa1555ed7a8ee27921
|
|
|
|
Fix buggy/incomplete detection of AVX/AVX2 support
|
|
|
|
* configure.ac: Also check for 'xgetbv' instruction in AVX and AVX2
|
|
inline assembly checks.
|
|
* src/hwf-x86.c [__i386__] (get_xgetbv): New function.
|
|
[__x86_64__] (get_xgetbv): New function.
|
|
[HAS_X86_CPUID] (detect_x86_gnuc): Check for OSXSAVE and OS support for
|
|
XMM&YMM registers and enable AVX/AVX2 only if XMM&YMM registers are
|
|
supported by OS.
|
|
--
|
|
|
|
This patch is based on original patch and bug report by Panagiotis Christopoulos:
|
|
|
|
Adding better detection of AVX/AVX2 support
|
|
|
|
After upgrading libgcrypt from 1.5.3 to 1.6.0 on a remote XEN system (linode) my
|
|
gpg2 stopped working properly, throwing SIGILL signals when doing sha512
|
|
operations etc. I managed to debug this with the help of Doublas Freed
|
|
(dwfreed at mtu.edu) and it seems that the current AVX detection just checks for
|
|
bit 28 on cpuid but the check still works on systems that have disabled the avx/avx2
|
|
instructions for some reason (eg. performance/unstability) resulting in SIGILLs
|
|
(eg. when trying _gcry_sha512_transform_amd64_avx() ).
|
|
From Intel resources[1][2], I found additional checks for better AVX
|
|
detection and applied them in the following patch. Please review/change
|
|
accordingly and commit some better AVX detection mechanism. The AVX part is
|
|
tested but could not test the AVX2 one, because I lack proper hardware. I can
|
|
provide additional information upon request. Use the patch only as a guideline,
|
|
as it's not thoroughly tested.
|
|
|
|
[1] http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
|
|
[2] http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf (sections 14.3
|
|
and 14.7.1)
|
|
|
|
Reported-by: Panagiotis Christopoulos (pchrist) <pchrist@gentoo.org>
|
|
Cc: Doublas Freed <dwfreed@mtu.edu>
|
|
Cc: Tim Harder <radhermit@gentoo.org>
|
|
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
|
(cherry picked from commit bbcb12187afb1756cb27296166b57fa19ee45d4d)
|
|
---
|
|
|
|
diff --git a/configure.ac b/configure.ac
|
|
index a47e13e..3387b9a 100644
|
|
--- a/configure.ac
|
|
+++ b/configure.ac
|
|
@@ -1035,7 +1035,7 @@ AC_CACHE_CHECK([whether GCC inline assembler supports AVX instructions],
|
|
[gcry_cv_gcc_inline_asm_avx=no
|
|
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
|
[[void a(void) {
|
|
- __asm__("vaesdeclast (%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):);
|
|
+ __asm__("xgetbv; vaesdeclast (%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):);
|
|
}]])],
|
|
[gcry_cv_gcc_inline_asm_avx=yes])])
|
|
if test "$gcry_cv_gcc_inline_asm_avx" = "yes" ; then
|
|
@@ -1052,7 +1052,7 @@ AC_CACHE_CHECK([whether GCC inline assembler supports AVX2 instructions],
|
|
[gcry_cv_gcc_inline_asm_avx2=no
|
|
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
|
[[void a(void) {
|
|
- __asm__("vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc");
|
|
+ __asm__("xgetbv; vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc");
|
|
}]])],
|
|
[gcry_cv_gcc_inline_asm_avx2=yes])])
|
|
if test "$gcry_cv_gcc_inline_asm_avx2" = "yes" ; then
|
|
diff --git a/src/hwf-x86.c b/src/hwf-x86.c
|
|
index 4e82558..0591b4f 100644
|
|
--- a/src/hwf-x86.c
|
|
+++ b/src/hwf-x86.c
|
|
@@ -95,6 +95,21 @@ get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
|
|
if (edx)
|
|
*edx = regs[3];
|
|
}
|
|
+
|
|
+static unsigned int
|
|
+get_xgetbv(void)
|
|
+{
|
|
+ unsigned int t_eax;
|
|
+
|
|
+ asm volatile
|
|
+ ("xgetbv\n\t"
|
|
+ : "=a" (t_eax)
|
|
+ : "c" (0)
|
|
+ );
|
|
+
|
|
+ return t_eax;
|
|
+}
|
|
+
|
|
#endif /* i386 && GNUC */
|
|
|
|
|
|
@@ -129,6 +144,21 @@ get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
|
|
if (edx)
|
|
*edx = regs[3];
|
|
}
|
|
+
|
|
+static unsigned int
|
|
+get_xgetbv(void)
|
|
+{
|
|
+ unsigned int t_eax;
|
|
+
|
|
+ asm volatile
|
|
+ ("xgetbv\n\t"
|
|
+ : "=a" (t_eax)
|
|
+ : "c" (0)
|
|
+ );
|
|
+
|
|
+ return t_eax;
|
|
+}
|
|
+
|
|
#endif /* x86-64 && GNUC */
|
|
|
|
|
|
@@ -138,9 +168,12 @@ detect_x86_gnuc (void)
|
|
{
|
|
char vendor_id[12+1];
|
|
unsigned int features;
|
|
+ unsigned int os_supports_avx_avx2_registers = 0;
|
|
unsigned int max_cpuid_level;
|
|
unsigned int result = 0;
|
|
|
|
+ (void)os_supports_avx_avx2_registers;
|
|
+
|
|
if (!is_cpuid_available())
|
|
return 0;
|
|
|
|
@@ -215,10 +248,20 @@ detect_x86_gnuc (void)
|
|
if (features & 0x02000000)
|
|
result |= HWF_INTEL_AESNI;
|
|
#endif /*ENABLE_AESNI_SUPPORT*/
|
|
+#if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
|
|
+ /* Test bit 27 for OSXSAVE (required for AVX/AVX2). */
|
|
+ if (features & 0x08000000)
|
|
+ {
|
|
+ /* Check that OS has enabled both XMM and YMM state support. */
|
|
+ if ((get_xgetbv() & 0x6) == 0x6)
|
|
+ os_supports_avx_avx2_registers = 1;
|
|
+ }
|
|
+#endif
|
|
#ifdef ENABLE_AVX_SUPPORT
|
|
/* Test bit 28 for AVX. */
|
|
if (features & 0x10000000)
|
|
- result |= HWF_INTEL_AVX;
|
|
+ if (os_supports_avx_avx2_registers)
|
|
+ result |= HWF_INTEL_AVX;
|
|
#endif /*ENABLE_AVX_SUPPORT*/
|
|
#ifdef ENABLE_DRNG_SUPPORT
|
|
/* Test bit 30 for RDRAND. */
|
|
@@ -242,6 +285,7 @@ detect_x86_gnuc (void)
|
|
#ifdef ENABLE_AVX2_SUPPORT
|
|
/* Test bit 5 for AVX2. */
|
|
if (features & 0x00000020)
|
|
+ if (os_supports_avx_avx2_registers)
|
|
result |= HWF_INTEL_AVX2;
|
|
#endif /*ENABLE_AVX_SUPPORT*/
|
|
}
|
|
|