|
|
b40826 |
2011-07-24 H.J. Lu <hongjiu.lu@intel.com>
|
|
|
b40826 |
|
|
|
b40826 |
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Simplify
|
|
|
b40826 |
AVX check.
|
|
|
b40826 |
|
|
|
b40826 |
2011-08-20 Ulrich Drepper <drepper@gmail.com>
|
|
|
b40826 |
|
|
|
b40826 |
* sysdeps/x86_64/dl-trampoline.h: If MORE_CODE is defined, restore
|
|
|
b40826 |
the CFI state in the end.
|
|
|
b40826 |
* sysdeps/x86_64/dl-trampoline.S: Define MORE_CODE before first
|
|
|
b40826 |
inclusion of dl-trampoline.h.
|
|
|
b40826 |
Based on a patch by Jiri Olsa <jolsa@redhat.com>.
|
|
|
b40826 |
|
|
|
b40826 |
2011-07-23 Ulrich Drepper <drepper@gmail.com>
|
|
|
b40826 |
|
|
|
b40826 |
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix one more
|
|
|
b40826 |
typo.
|
|
|
b40826 |
(_dl_x86_64_save_sse): Likewise.
|
|
|
b40826 |
|
|
|
b40826 |
2011-07-22 Ulrich Drepper <drepper@gmail.com>
|
|
|
b40826 |
|
|
|
b40826 |
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix test for
|
|
|
b40826 |
OSXSAVE.
|
|
|
b40826 |
(_dl_x86_64_save_sse): Likewise.
|
|
|
b40826 |
|
|
|
b40826 |
2011-07-21 Andreas Schwab <schwab@redhat.com>
|
|
|
b40826 |
|
|
|
b40826 |
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix last
|
|
|
b40826 |
change.
|
|
|
b40826 |
(_dl_x86_64_save_sse): Use correct AVX check.
|
|
|
b40826 |
|
|
|
b40826 |
2011-07-20 Ulrich Drepper <drepper@gmail.com>
|
|
|
b40826 |
|
|
|
b40826 |
[BZ #13007]
|
|
|
b40826 |
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): More complete
|
|
|
b40826 |
check for AVX enablement so that we don't crash with old kernels and
|
|
|
b40826 |
new hardware.
|
|
|
b40826 |
* elf/tst-audit4.c: Add same checks here.
|
|
|
b40826 |
* elf/tst-audit6.c: Likewise.
|
|
|
b40826 |
|
|
|
b40826 |
Index: glibc-2.12-2-gc4ccff1/elf/tst-audit4.c
|
|
|
b40826 |
===================================================================
|
|
|
b40826 |
--- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit4.c
|
|
|
b40826 |
+++ glibc-2.12-2-gc4ccff1/elf/tst-audit4.c
|
|
|
b40826 |
@@ -6,16 +6,30 @@
|
|
|
b40826 |
#include <cpuid.h>
|
|
|
b40826 |
#include <immintrin.h>
|
|
|
b40826 |
|
|
|
b40826 |
+
|
|
|
b40826 |
+static int
|
|
|
b40826 |
+avx_enabled (void)
|
|
|
b40826 |
+{
|
|
|
b40826 |
+ unsigned int eax, ebx, ecx, edx;
|
|
|
b40826 |
+
|
|
|
b40826 |
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
|
|
|
b40826 |
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
|
|
|
b40826 |
+ return 0;
|
|
|
b40826 |
+
|
|
|
b40826 |
+ /* Check the OS has AVX and SSE saving enabled. */
|
|
|
b40826 |
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
|
|
|
b40826 |
+
|
|
|
b40826 |
+ return (eax & 6) == 6;
|
|
|
b40826 |
+}
|
|
|
b40826 |
+
|
|
|
b40826 |
+
|
|
|
b40826 |
extern __m256i audit_test (__m256i, __m256i, __m256i, __m256i,
|
|
|
b40826 |
__m256i, __m256i, __m256i, __m256i);
|
|
|
b40826 |
int
|
|
|
b40826 |
main (void)
|
|
|
b40826 |
{
|
|
|
b40826 |
- unsigned int eax, ebx, ecx, edx;
|
|
|
b40826 |
-
|
|
|
b40826 |
/* Run AVX test only if AVX is supported. */
|
|
|
b40826 |
- if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
|
|
|
b40826 |
- && (ecx & bit_AVX))
|
|
|
b40826 |
+ if (avx_enabled ())
|
|
|
b40826 |
{
|
|
|
b40826 |
__m256i ymm = _mm256_setzero_si256 ();
|
|
|
b40826 |
__m256i ret = audit_test (ymm, ymm, ymm, ymm, ymm, ymm, ymm, ymm);
|
|
|
b40826 |
Index: glibc-2.12-2-gc4ccff1/elf/tst-audit6.c
|
|
|
b40826 |
===================================================================
|
|
|
b40826 |
--- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit6.c
|
|
|
b40826 |
+++ glibc-2.12-2-gc4ccff1/elf/tst-audit6.c
|
|
|
b40826 |
@@ -8,14 +8,28 @@
|
|
|
b40826 |
extern __m128i audit_test (__m128i, __m128i, __m128i, __m128i,
|
|
|
b40826 |
__m128i, __m128i, __m128i, __m128i);
|
|
|
b40826 |
|
|
|
b40826 |
-int
|
|
|
b40826 |
-main (void)
|
|
|
b40826 |
+
|
|
|
b40826 |
+static int
|
|
|
b40826 |
+avx_enabled (void)
|
|
|
b40826 |
{
|
|
|
b40826 |
unsigned int eax, ebx, ecx, edx;
|
|
|
b40826 |
|
|
|
b40826 |
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
|
|
|
b40826 |
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
|
|
|
b40826 |
+ return 0;
|
|
|
b40826 |
+
|
|
|
b40826 |
+ /* Check the OS has AVX and SSE saving enabled. */
|
|
|
b40826 |
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
|
|
|
b40826 |
+
|
|
|
b40826 |
+ return (eax & 6) == 6;
|
|
|
b40826 |
+}
|
|
|
b40826 |
+
|
|
|
b40826 |
+
|
|
|
b40826 |
+int
|
|
|
b40826 |
+main (void)
|
|
|
b40826 |
+{
|
|
|
b40826 |
/* Run AVX test only if AVX is supported. */
|
|
|
b40826 |
- if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
|
|
|
b40826 |
- && (ecx & bit_AVX))
|
|
|
b40826 |
+ if (avx_enabled ())
|
|
|
b40826 |
{
|
|
|
b40826 |
__m128i xmm = _mm_setzero_si128 ();
|
|
|
b40826 |
__m128i ret = audit_test (xmm, xmm, xmm, xmm, xmm, xmm, xmm, xmm);
|
|
|
b40826 |
Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S
|
|
|
b40826 |
===================================================================
|
|
|
b40826 |
--- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.S
|
|
|
b40826 |
+++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S
|
|
|
b40826 |
@@ -139,24 +139,31 @@ L(have_avx):
|
|
|
b40826 |
movl $1, %eax
|
|
|
b40826 |
cpuid
|
|
|
b40826 |
movq %r11,%rbx # Restore rbx
|
|
|
b40826 |
- movl $1, %eax
|
|
|
b40826 |
- testl $(1 << 28), %ecx
|
|
|
b40826 |
+ xorl %eax, %eax
|
|
|
b40826 |
+ // AVX and XSAVE supported?
|
|
|
b40826 |
+ andl $((1 << 28) | (1 << 27)), %ecx
|
|
|
b40826 |
+ cmpl $((1 << 28) | (1 << 27)), %ecx
|
|
|
b40826 |
jne 2f
|
|
|
b40826 |
- negl %eax
|
|
|
b40826 |
-2: movl %eax, L(have_avx)(%rip)
|
|
|
b40826 |
+ xorl %ecx, %ecx
|
|
|
b40826 |
+ // Get XFEATURE_ENABLED_MASK
|
|
|
b40826 |
+ xgetbv
|
|
|
b40826 |
+ andl $0x6, %eax
|
|
|
b40826 |
+2: subl $0x5, %eax
|
|
|
b40826 |
+ movl %eax, L(have_avx)(%rip)
|
|
|
b40826 |
cmpl $0, %eax
|
|
|
b40826 |
|
|
|
b40826 |
1: js L(no_avx)
|
|
|
b40826 |
|
|
|
b40826 |
# define RESTORE_AVX
|
|
|
b40826 |
+# define MORE_CODE
|
|
|
b40826 |
# include "dl-trampoline.h"
|
|
|
b40826 |
|
|
|
b40826 |
.align 16
|
|
|
b40826 |
L(no_avx):
|
|
|
b40826 |
# endif
|
|
|
b40826 |
|
|
|
b40826 |
-# undef RESTORE_AVX
|
|
|
b40826 |
-# include "dl-trampoline.h"
|
|
|
b40826 |
+# undef RESTORE_AVX
|
|
|
b40826 |
+# include "dl-trampoline.h"
|
|
|
b40826 |
|
|
|
b40826 |
cfi_endproc
|
|
|
b40826 |
.size _dl_runtime_profile, .-_dl_runtime_profile
|
|
|
b40826 |
@@ -176,11 +183,20 @@ _dl_x86_64_save_sse:
|
|
|
b40826 |
movl $1, %eax
|
|
|
b40826 |
cpuid
|
|
|
b40826 |
movq %r11,%rbx # Restore rbx
|
|
|
b40826 |
- movl $1, %eax
|
|
|
b40826 |
- testl $(1 << 28), %ecx
|
|
|
b40826 |
+ xorl %eax, %eax
|
|
|
b40826 |
+ // AVX and XSAVE supported?
|
|
|
b40826 |
+ andl $((1 << 28) | (1 << 27)), %ecx
|
|
|
b40826 |
+ cmpl $((1 << 28) | (1 << 27)), %ecx
|
|
|
b40826 |
jne 2f
|
|
|
b40826 |
- negl %eax
|
|
|
b40826 |
-2: movl %eax, L(have_avx)(%rip)
|
|
|
b40826 |
+ xorl %ecx, %ecx
|
|
|
b40826 |
+ // Get XFEATURE_ENABLED_MASK
|
|
|
b40826 |
+ xgetbv
|
|
|
b40826 |
+ andl $0x6, %eax
|
|
|
b40826 |
+ cmpl $0x6, %eax
|
|
|
b40826 |
+ // Nonzero if SSE and AVX state saving is enabled.
|
|
|
b40826 |
+ sete %al
|
|
|
b40826 |
+2: leal -1(%eax,%eax), %eax
|
|
|
b40826 |
+ movl %eax, L(have_avx)(%rip)
|
|
|
b40826 |
cmpl $0, %eax
|
|
|
b40826 |
|
|
|
b40826 |
1: js L(no_avx5)
|
|
|
b40826 |
Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h
|
|
|
b40826 |
===================================================================
|
|
|
b40826 |
--- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.h
|
|
|
b40826 |
+++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h
|
|
|
b40826 |
@@ -195,14 +195,14 @@
|
|
|
b40826 |
_dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
|
|
|
b40826 |
so we just need to allocate the sizeof(La_x86_64_retval) space on
|
|
|
b40826 |
the stack, since the alignment has already been taken care of. */
|
|
|
b40826 |
-# ifdef RESTORE_AVX
|
|
|
b40826 |
+#ifdef RESTORE_AVX
|
|
|
b40826 |
/* sizeof(La_x86_64_retval). Need extra space for 2 SSE
|
|
|
b40826 |
registers to detect if xmm0/xmm1 registers are changed
|
|
|
b40826 |
by audit module. */
|
|
|
b40826 |
subq $(LRV_SIZE + XMM_SIZE*2), %rsp
|
|
|
b40826 |
-# else
|
|
|
b40826 |
+#else
|
|
|
b40826 |
subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval)
|
|
|
b40826 |
-# endif
|
|
|
b40826 |
+#endif
|
|
|
b40826 |
movq %rsp, %rcx # La_x86_64_retval argument to %rcx.
|
|
|
b40826 |
|
|
|
b40826 |
/* Fill in the La_x86_64_retval structure. */
|
|
|
b40826 |
@@ -212,7 +212,7 @@
|
|
|
b40826 |
movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
|
|
|
b40826 |
movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
|
|
|
b40826 |
|
|
|
b40826 |
-# ifdef RESTORE_AVX
|
|
|
b40826 |
+#ifdef RESTORE_AVX
|
|
|
b40826 |
/* This is to support AVX audit modules. */
|
|
|
b40826 |
vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
|
|
|
b40826 |
vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
|
|
|
b40826 |
@@ -221,14 +221,14 @@
|
|
|
b40826 |
by audit module. */
|
|
|
b40826 |
vmovdqa %xmm0, (LRV_SIZE)(%rcx)
|
|
|
b40826 |
vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
|
|
|
b40826 |
-# endif
|
|
|
b40826 |
+#endif
|
|
|
b40826 |
|
|
|
b40826 |
fstpt LRV_ST0_OFFSET(%rcx)
|
|
|
b40826 |
fstpt LRV_ST1_OFFSET(%rcx)
|
|
|
b40826 |
|
|
|
b40826 |
movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
|
|
|
b40826 |
movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
|
|
|
b40826 |
- movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
|
|
|
b40826 |
+ movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
|
|
|
b40826 |
call _dl_call_pltexit
|
|
|
b40826 |
|
|
|
b40826 |
/* Restore return registers. */
|
|
|
b40826 |
@@ -238,7 +238,7 @@
|
|
|
b40826 |
movaps LRV_XMM0_OFFSET(%rsp), %xmm0
|
|
|
b40826 |
movaps LRV_XMM1_OFFSET(%rsp), %xmm1
|
|
|
b40826 |
|
|
|
b40826 |
-# ifdef RESTORE_AVX
|
|
|
b40826 |
+#ifdef RESTORE_AVX
|
|
|
b40826 |
/* Check if xmm0/xmm1 registers are changed by audit module. */
|
|
|
b40826 |
vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
|
|
|
b40826 |
vpmovmskb %xmm2, %esi
|
|
|
b40826 |
@@ -253,7 +253,7 @@
|
|
|
b40826 |
vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
|
|
|
b40826 |
|
|
|
b40826 |
1:
|
|
|
b40826 |
-# endif
|
|
|
b40826 |
+#endif
|
|
|
b40826 |
|
|
|
b40826 |
fldt LRV_ST1_OFFSET(%rsp)
|
|
|
b40826 |
fldt LRV_ST0_OFFSET(%rsp)
|
|
|
b40826 |
@@ -267,3 +267,10 @@
|
|
|
b40826 |
# (eats the reloc index and link_map)
|
|
|
b40826 |
cfi_adjust_cfa_offset(-48)
|
|
|
b40826 |
retq
|
|
|
b40826 |
+
|
|
|
b40826 |
+#ifdef MORE_CODE
|
|
|
b40826 |
+ cfi_adjust_cfa_offset(48)
|
|
|
b40826 |
+ cfi_rel_offset(%rbx, 0)
|
|
|
b40826 |
+ cfi_def_cfa_register(%rbx)
|
|
|
b40826 |
+# undef MORE_CODE
|
|
|
b40826 |
+#endif
|