2011-07-24 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Simplify
AVX check.
2011-08-20 Ulrich Drepper <drepper@gmail.com>
* sysdeps/x86_64/dl-trampoline.h: If MORE_CODE is defined, restore
the CFI state in the end.
* sysdeps/x86_64/dl-trampoline.S: Define MORE_CODE before first
inclusion of dl-trampoline.h.
Based on a patch by Jiri Olsa <jolsa@redhat.com>.
2011-07-23 Ulrich Drepper <drepper@gmail.com>
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix one more
typo.
(_dl_x86_64_save_sse): Likewise.
2011-07-22 Ulrich Drepper <drepper@gmail.com>
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix test for
OSXSAVE.
(_dl_x86_64_save_sse): Likewise.
2011-07-21 Andreas Schwab <schwab@redhat.com>
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix last
change.
(_dl_x86_64_save_sse): Use correct AVX check.
2011-07-20 Ulrich Drepper <drepper@gmail.com>
[BZ #13007]
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): More complete
check for AVX enablement so that we don't crash with old kernels and
new hardware.
* elf/tst-audit4.c: Add same checks here.
* elf/tst-audit6.c: Likewise.
Index: glibc-2.12-2-gc4ccff1/elf/tst-audit4.c
===================================================================
--- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit4.c
+++ glibc-2.12-2-gc4ccff1/elf/tst-audit4.c
@@ -6,16 +6,30 @@
#include <cpuid.h>
#include <immintrin.h>
+
+static int
+avx_enabled (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+ return 0;
+
+ /* Check the OS has AVX and SSE saving enabled. */
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+ return (eax & 6) == 6;
+}
+
+
extern __m256i audit_test (__m256i, __m256i, __m256i, __m256i,
__m256i, __m256i, __m256i, __m256i);
int
main (void)
{
- unsigned int eax, ebx, ecx, edx;
-
/* Run AVX test only if AVX is supported. */
- if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
- && (ecx & bit_AVX))
+ if (avx_enabled ())
{
__m256i ymm = _mm256_setzero_si256 ();
__m256i ret = audit_test (ymm, ymm, ymm, ymm, ymm, ymm, ymm, ymm);
Index: glibc-2.12-2-gc4ccff1/elf/tst-audit6.c
===================================================================
--- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit6.c
+++ glibc-2.12-2-gc4ccff1/elf/tst-audit6.c
@@ -8,14 +8,28 @@
extern __m128i audit_test (__m128i, __m128i, __m128i, __m128i,
__m128i, __m128i, __m128i, __m128i);
-int
-main (void)
+
+static int
+avx_enabled (void)
{
unsigned int eax, ebx, ecx, edx;
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+ return 0;
+
+ /* Check the OS has AVX and SSE saving enabled. */
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+ return (eax & 6) == 6;
+}
+
+
+int
+main (void)
+{
/* Run AVX test only if AVX is supported. */
- if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
- && (ecx & bit_AVX))
+ if (avx_enabled ())
{
__m128i xmm = _mm_setzero_si128 ();
__m128i ret = audit_test (xmm, xmm, xmm, xmm, xmm, xmm, xmm, xmm);
Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S
===================================================================
--- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.S
+++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S
@@ -139,24 +139,31 @@ L(have_avx):
movl $1, %eax
cpuid
movq %r11,%rbx # Restore rbx
- movl $1, %eax
- testl $(1 << 28), %ecx
+ xorl %eax, %eax
+ // AVX and XSAVE supported?
+ andl $((1 << 28) | (1 << 27)), %ecx
+ cmpl $((1 << 28) | (1 << 27)), %ecx
jne 2f
- negl %eax
-2: movl %eax, L(have_avx)(%rip)
+ xorl %ecx, %ecx
+ // Get XFEATURE_ENABLED_MASK
+ xgetbv
+ andl $0x6, %eax
+2: subl $0x5, %eax
+ movl %eax, L(have_avx)(%rip)
cmpl $0, %eax
1: js L(no_avx)
# define RESTORE_AVX
+# define MORE_CODE
# include "dl-trampoline.h"
.align 16
L(no_avx):
# endif
-# undef RESTORE_AVX
-# include "dl-trampoline.h"
+# undef RESTORE_AVX
+# include "dl-trampoline.h"
cfi_endproc
.size _dl_runtime_profile, .-_dl_runtime_profile
@@ -176,11 +183,20 @@ _dl_x86_64_save_sse:
movl $1, %eax
cpuid
movq %r11,%rbx # Restore rbx
- movl $1, %eax
- testl $(1 << 28), %ecx
+ xorl %eax, %eax
+ // AVX and XSAVE supported?
+ andl $((1 << 28) | (1 << 27)), %ecx
+ cmpl $((1 << 28) | (1 << 27)), %ecx
jne 2f
- negl %eax
-2: movl %eax, L(have_avx)(%rip)
+ xorl %ecx, %ecx
+ // Get XFEATURE_ENABLED_MASK
+ xgetbv
+ andl $0x6, %eax
+ cmpl $0x6, %eax
+ // Nonzero if SSE and AVX state saving is enabled.
+ sete %al
+2: leal -1(%eax,%eax), %eax
+ movl %eax, L(have_avx)(%rip)
cmpl $0, %eax
1: js L(no_avx5)
Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h
===================================================================
--- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.h
+++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h
@@ -195,14 +195,14 @@
_dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
so we just need to allocate the sizeof(La_x86_64_retval) space on
the stack, since the alignment has already been taken care of. */
-# ifdef RESTORE_AVX
+#ifdef RESTORE_AVX
/* sizeof(La_x86_64_retval). Need extra space for 2 SSE
registers to detect if xmm0/xmm1 registers are changed
by audit module. */
subq $(LRV_SIZE + XMM_SIZE*2), %rsp
-# else
+#else
subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval)
-# endif
+#endif
movq %rsp, %rcx # La_x86_64_retval argument to %rcx.
/* Fill in the La_x86_64_retval structure. */
@@ -212,7 +212,7 @@
movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
-# ifdef RESTORE_AVX
+#ifdef RESTORE_AVX
/* This is to support AVX audit modules. */
vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
@@ -221,14 +221,14 @@
by audit module. */
vmovdqa %xmm0, (LRV_SIZE)(%rcx)
vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
-# endif
+#endif
fstpt LRV_ST0_OFFSET(%rcx)
fstpt LRV_ST1_OFFSET(%rcx)
movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
- movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
+ movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
call _dl_call_pltexit
/* Restore return registers. */
@@ -238,7 +238,7 @@
movaps LRV_XMM0_OFFSET(%rsp), %xmm0
movaps LRV_XMM1_OFFSET(%rsp), %xmm1
-# ifdef RESTORE_AVX
+#ifdef RESTORE_AVX
/* Check if xmm0/xmm1 registers are changed by audit module. */
vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
vpmovmskb %xmm2, %esi
@@ -253,7 +253,7 @@
vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
1:
-# endif
+#endif
fldt LRV_ST1_OFFSET(%rsp)
fldt LRV_ST0_OFFSET(%rsp)
@@ -267,3 +267,10 @@
# (eats the reloc index and link_map)
cfi_adjust_cfa_offset(-48)
retq
+
+#ifdef MORE_CODE
+ cfi_adjust_cfa_offset(48)
+ cfi_rel_offset(%rbx, 0)
+ cfi_def_cfa_register(%rbx)
+# undef MORE_CODE
+#endif