2011-07-24 H.J. Lu * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Simplify AVX check. 2011-08-20 Ulrich Drepper * sysdeps/x86_64/dl-trampoline.h: If MORE_CODE is defined, restore the CFI state in the end. * sysdeps/x86_64/dl-trampoline.S: Define MORE_CODE before first inclusion of dl-trampoline.h. Based on a patch by Jiri Olsa . 2011-07-23 Ulrich Drepper * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix one more typo. (_dl_x86_64_save_sse): Likewise. 2011-07-22 Ulrich Drepper * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix test for OSXSAVE. (_dl_x86_64_save_sse): Likewise. 2011-07-21 Andreas Schwab * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix last change. (_dl_x86_64_save_sse): Use correct AVX check. 2011-07-20 Ulrich Drepper [BZ #13007] * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): More complete check for AVX enablement so that we don't crash with old kernels and new hardware. * elf/tst-audit4.c: Add same checks here. * elf/tst-audit6.c: Likewise. Index: glibc-2.12-2-gc4ccff1/elf/tst-audit4.c =================================================================== --- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit4.c +++ glibc-2.12-2-gc4ccff1/elf/tst-audit4.c @@ -6,16 +6,30 @@ #include #include + +static int +avx_enabled (void) +{ + unsigned int eax, ebx, ecx, edx; + + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 + || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) + return 0; + + /* Check the OS has AVX and SSE saving enabled. */ + asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); + + return (eax & 6) == 6; +} + + extern __m256i audit_test (__m256i, __m256i, __m256i, __m256i, __m256i, __m256i, __m256i, __m256i); int main (void) { - unsigned int eax, ebx, ecx, edx; - /* Run AVX test only if AVX is supported. */ - if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) - && (ecx & bit_AVX)) + if (avx_enabled ()) { __m256i ymm = _mm256_setzero_si256 (); __m256i ret = audit_test (ymm, ymm, ymm, ymm, ymm, ymm, ymm, ymm); Index: glibc-2.12-2-gc4ccff1/elf/tst-audit6.c =================================================================== --- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit6.c +++ glibc-2.12-2-gc4ccff1/elf/tst-audit6.c @@ -8,14 +8,28 @@ extern __m128i audit_test (__m128i, __m128i, __m128i, __m128i, __m128i, __m128i, __m128i, __m128i); -int -main (void) + +static int +avx_enabled (void) { unsigned int eax, ebx, ecx, edx; + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 + || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) + return 0; + + /* Check the OS has AVX and SSE saving enabled. */ + asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); + + return (eax & 6) == 6; +} + + +int +main (void) +{ /* Run AVX test only if AVX is supported. */ - if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) - && (ecx & bit_AVX)) + if (avx_enabled ()) { __m128i xmm = _mm_setzero_si128 (); __m128i ret = audit_test (xmm, xmm, xmm, xmm, xmm, xmm, xmm, xmm); Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S =================================================================== --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.S +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S @@ -139,24 +139,31 @@ L(have_avx): movl $1, %eax cpuid movq %r11,%rbx # Restore rbx - movl $1, %eax - testl $(1 << 28), %ecx + xorl %eax, %eax + // AVX and XSAVE supported? + andl $((1 << 28) | (1 << 27)), %ecx + cmpl $((1 << 28) | (1 << 27)), %ecx jne 2f - negl %eax -2: movl %eax, L(have_avx)(%rip) + xorl %ecx, %ecx + // Get XFEATURE_ENABLED_MASK + xgetbv + andl $0x6, %eax +2: subl $0x5, %eax + movl %eax, L(have_avx)(%rip) cmpl $0, %eax 1: js L(no_avx) # define RESTORE_AVX +# define MORE_CODE # include "dl-trampoline.h" .align 16 L(no_avx): # endif -# undef RESTORE_AVX -# include "dl-trampoline.h" +# undef RESTORE_AVX +# include "dl-trampoline.h" cfi_endproc .size _dl_runtime_profile, .-_dl_runtime_profile @@ -176,11 +183,20 @@ _dl_x86_64_save_sse: movl $1, %eax cpuid movq %r11,%rbx # Restore rbx - movl $1, %eax - testl $(1 << 28), %ecx + xorl %eax, %eax + // AVX and XSAVE supported? + andl $((1 << 28) | (1 << 27)), %ecx + cmpl $((1 << 28) | (1 << 27)), %ecx jne 2f - negl %eax -2: movl %eax, L(have_avx)(%rip) + xorl %ecx, %ecx + // Get XFEATURE_ENABLED_MASK + xgetbv + andl $0x6, %eax + cmpl $0x6, %eax + // Nonzero if SSE and AVX state saving is enabled. + sete %al +2: leal -1(%eax,%eax), %eax + movl %eax, L(have_avx)(%rip) cmpl $0, %eax 1: js L(no_avx5) Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h =================================================================== --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.h +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h @@ -195,14 +195,14 @@ _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, so we just need to allocate the sizeof(La_x86_64_retval) space on the stack, since the alignment has already been taken care of. */ -# ifdef RESTORE_AVX +#ifdef RESTORE_AVX /* sizeof(La_x86_64_retval). Need extra space for 2 SSE registers to detect if xmm0/xmm1 registers are changed by audit module. */ subq $(LRV_SIZE + XMM_SIZE*2), %rsp -# else +#else subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval) -# endif +#endif movq %rsp, %rcx # La_x86_64_retval argument to %rcx. /* Fill in the La_x86_64_retval structure. */ @@ -212,7 +212,7 @@ movaps %xmm0, LRV_XMM0_OFFSET(%rcx) movaps %xmm1, LRV_XMM1_OFFSET(%rcx) -# ifdef RESTORE_AVX +#ifdef RESTORE_AVX /* This is to support AVX audit modules. */ vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx) vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx) @@ -221,14 +221,14 @@ by audit module. */ vmovdqa %xmm0, (LRV_SIZE)(%rcx) vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) -# endif +#endif fstpt LRV_ST0_OFFSET(%rcx) fstpt LRV_ST1_OFFSET(%rcx) movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. movq 40(%rbx), %rsi # Copy args pushed by PLT in register. - movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index + movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index call _dl_call_pltexit /* Restore return registers. */ @@ -238,7 +238,7 @@ movaps LRV_XMM0_OFFSET(%rsp), %xmm0 movaps LRV_XMM1_OFFSET(%rsp), %xmm1 -# ifdef RESTORE_AVX +#ifdef RESTORE_AVX /* Check if xmm0/xmm1 registers are changed by audit module. */ vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 vpmovmskb %xmm2, %esi @@ -253,7 +253,7 @@ vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1 1: -# endif +#endif fldt LRV_ST1_OFFSET(%rsp) fldt LRV_ST0_OFFSET(%rsp) @@ -267,3 +267,10 @@ # (eats the reloc index and link_map) cfi_adjust_cfa_offset(-48) retq + +#ifdef MORE_CODE + cfi_adjust_cfa_offset(48) + cfi_rel_offset(%rbx, 0) + cfi_def_cfa_register(%rbx) +# undef MORE_CODE +#endif