# # AVX-512 support for glibc: # # Notes: Renamed configure.ac changes to configure.in. # # commit aa4de9cea5c07d43caeaca9722c2d417e9a2919c # Author: H.J. Lu # Date: Fri Mar 14 08:51:25 2014 -0700 # # Check AVX-512 assembler support first # # It checks AVX-512 assembler support first and sets libc_cv_cc_avx512 to # $libc_cv_asm_avx512, instead of yes. GCC won't support AVX-512 if # assembler doesn't support it. # # * sysdeps/x86_64/configure.ac: Check AVX-512 assembler support # first. Disable AVX-512 GCC support if assembler doesn't support # it. # * sysdeps/x86_64/configure: Regenerated. # # commit 2d63a517e4084ec80403cd9f278690fa8b676cc4 # Author: Igor Zamyatin # Date: Thu Mar 13 11:10:22 2014 -0700 # # Save and restore AVX-512 zmm registers to x86-64 ld.so # # AVX-512 ISA adds 512-bit zmm registers. This patch updates # _dl_runtime_profile to pass zmm registers to run-time audit. It also # changes _dl_x86_64_save_sse and _dl_x86_64_restore_sse to upport zmm # registers, which are called when only when RTLD_PREPARE_FOREIGN_CALL # is used. Its performance impact is minimum. # # * config.h.in (HAVE_AVX512_SUPPORT): New #undef. # (HAVE_AVX512_ASM_SUPPORT): Likewise. # * sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New. # (La_x86_64_vector): Add zmm. # * sysdeps/x86_64/Makefile (tests): Add tst-audit10. # (modules-names): Add tst-auditmod10a and tst-auditmod10b. # ($(objpfx)tst-audit10): New target. # ($(objpfx)tst-audit10.out): Likewise. # (tst-audit10-ENV): New. # (AVX512-CFLAGS): Likewise. # (CFLAGS-tst-audit10.c): Likewise. # (CFLAGS-tst-auditmod10a.c): Likewise. # (CFLAGS-tst-auditmod10b.c): Likewise. # * sysdeps/x86_64/configure.ac: Set config-cflags-avx512, # HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT. # * sysdeps/x86_64/configure: Regenerated. # * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add # AVX-512 zmm register support. # (_dl_x86_64_save_sse): Likewise. # (_dl_x86_64_restore_sse): Likewise. # * sysdeps/x86_64/dl-trampoline.h: Updated to support different # size vector registers. # * sysdeps/x86_64/link-defines.sym (YMM_SIZE): New. # (ZMM_SIZE): Likewise. # * sysdeps/x86_64/tst-audit10.c: New file. # * sysdeps/x86_64/tst-auditmod10a.c: Likewise. # * sysdeps/x86_64/tst-auditmod10b.c: Likewise. # # In addition adds: # https://sourceware.org/ml/libc-alpha/2014-09/msg00228.html # To extend zmm register checking. # diff -urN glibc-2.17-c758a686/config.h.in glibc-2.17-c758a686/config.h.in --- glibc-2.17-c758a686/config.h.in 2014-09-10 23:11:14.605787816 -0400 +++ glibc-2.17-c758a686/config.h.in 2014-09-10 23:16:36.331167056 -0400 @@ -101,6 +101,12 @@ /* Define if gcc supports VEX encoding. */ #undef HAVE_SSE2AVX_SUPPORT +/* Define if compiler supports AVX512. */ +#undef HAVE_AVX512_SUPPORT + +/* Define if assembler supports AVX512. */ +#undef HAVE_AVX512_ASM_SUPPORT + /* Define if gcc supports FMA4. */ #undef HAVE_FMA4_SUPPORT diff -urN glibc-2.17-c758a686/sysdeps/x86/bits/link.h glibc-2.17-c758a686/sysdeps/x86/bits/link.h --- glibc-2.17-c758a686/sysdeps/x86/bits/link.h 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86/bits/link.h 2014-09-10 23:16:36.331167056 -0400 @@ -66,6 +66,8 @@ typedef float La_x86_64_xmm __attribute__ ((__vector_size__ (16))); typedef float La_x86_64_ymm __attribute__ ((__vector_size__ (32), __aligned__ (16))); +typedef double La_x86_64_zmm + __attribute__ ((__vector_size__ (64), __aligned__ (16))); # else typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__))); # endif @@ -74,6 +76,7 @@ { # if __GNUC_PREREQ (4,0) La_x86_64_ymm ymm[2]; + La_x86_64_zmm zmm[1]; # endif La_x86_64_xmm xmm[4]; } La_x86_64_vector __attribute__ ((__aligned__ (16))); diff -urN glibc-2.17-c758a686/sysdeps/x86_64/configure glibc-2.17-c758a686/sysdeps/x86_64/configure --- glibc-2.17-c758a686/sysdeps/x86_64/configure 2014-09-10 23:11:15.000787061 -0400 +++ glibc-2.17-c758a686/sysdeps/x86_64/configure 2014-09-10 23:16:36.338167042 -0400 @@ -91,6 +91,59 @@ fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support in assembler" >&5 +$as_echo_n "checking for AVX512 support in assembler... " >&6; } +if ${libc_cv_asm_avx512+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.s <<\EOF + vmovdqu64 %zmm0, (%rsp) +EOF +if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + libc_cv_asm_avx512=yes +else + libc_cv_asm_avx512=no +fi +rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512" >&5 +$as_echo "$libc_cv_asm_avx512" >&6; } +if test $libc_cv_asm_avx512 == yes; then + $as_echo "#define HAVE_AVX512_ASM_SUPPORT 1" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support" >&5 +$as_echo_n "checking for AVX512 support... " >&6; } +if ${libc_cv_cc_avx512+:} false; then : + $as_echo_n "(cached) " >&6 +else + if { ac_try='${CC-cc} -mavx512f -xc /dev/null -S -o /dev/null' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + libc_cv_cc_avx512=$libc_cv_asm_avx512 +else + libc_cv_cc_avx512=no +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx512" >&5 +$as_echo "$libc_cv_cc_avx512" >&6; } +if test $libc_cv_cc_avx512 = yes; then + $as_echo "#define HAVE_AVX512_SUPPORT 1" >>confdefs.h + +fi +config_vars="$config_vars +config-cflags-avx512 = $libc_cv_cc_avx512" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5 $as_echo_n "checking for AVX encoding of SSE instructions... " >&6; } if ${libc_cv_cc_sse2avx+:} false; then : diff -urN glibc-2.17-c758a686/sysdeps/x86_64/configure.in glibc-2.17-c758a686/sysdeps/x86_64/configure.in --- glibc-2.17-c758a686/sysdeps/x86_64/configure.in 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86_64/configure.in 2014-09-10 23:16:36.338167042 -0400 @@ -21,6 +21,30 @@ AC_DEFINE(HAVE_AVX_SUPPORT) fi +dnl Check if asm supports AVX512. +AC_CACHE_CHECK(for AVX512 support in assembler, libc_cv_asm_avx512, [dnl +cat > conftest.s <<\EOF + vmovdqu64 %zmm0, (%rsp) +EOF +if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then + libc_cv_asm_avx512=yes +else + libc_cv_asm_avx512=no +fi +rm -f conftest*]) +if test $libc_cv_asm_avx512 == yes; then + AC_DEFINE(HAVE_AVX512_ASM_SUPPORT) +fi + +dnl Check if -mavx512f works. +AC_CACHE_CHECK(for AVX512 support, libc_cv_cc_avx512, [dnl +LIBC_TRY_CC_OPTION([-mavx512f], [libc_cv_cc_avx512=$libc_cv_asm_avx512], [libc_cv_cc_avx512=no]) +]) +if test $libc_cv_cc_avx512 = yes; then + AC_DEFINE(HAVE_AVX512_SUPPORT) +fi +LIBC_CONFIG_VAR([config-cflags-avx512], [$libc_cv_cc_avx512]) + dnl Check if -msse2avx works. AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl LIBC_TRY_CC_OPTION([-msse2avx], diff -urN glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h --- glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h 2014-09-10 23:16:36.334167050 -0400 @@ -19,14 +19,14 @@ #ifdef RESTORE_AVX /* This is to support AVX audit modules. */ - vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp) - vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) - vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) - vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) - vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) - vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) - vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) - vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) + VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp) + VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) + VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) + VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) + VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) + VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) + VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) + VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) /* Save xmm0-xmm7 registers to detect if any of them are changed by audit module. */ @@ -72,7 +72,7 @@ je 2f vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0 +2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0) vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 @@ -81,7 +81,7 @@ je 2f vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 @@ -90,7 +90,7 @@ je 2f vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 @@ -99,7 +99,7 @@ je 2f vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 @@ -108,7 +108,7 @@ je 2f vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 @@ -117,7 +117,7 @@ je 2f vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 @@ -126,7 +126,7 @@ je 2f vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 @@ -135,7 +135,7 @@ je 2f vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) 1: @@ -213,8 +213,8 @@ #ifdef RESTORE_AVX /* This is to support AVX audit modules. */ - vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx) - vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx) + VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx) + VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx) /* Save xmm0/xmm1 registers to detect if they are changed by audit module. */ @@ -243,13 +243,13 @@ vpmovmskb %xmm2, %esi cmpl $0xffff, %esi jne 1f - vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0 + VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0) 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 vpmovmskb %xmm2, %esi cmpl $0xffff, %esi jne 1f - vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1 + VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1) 1: #endif diff -urN glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S --- glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S 2014-09-10 23:16:36.334167050 -0400 @@ -96,7 +96,7 @@ /* Actively align the La_x86_64_regs structure. */ andq $0xfffffffffffffff0, %rsp -# ifdef HAVE_AVX_SUPPORT +# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers to detect if any xmm0-xmm7 registers are changed by audit module. */ @@ -130,7 +130,7 @@ movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) -# ifdef HAVE_AVX_SUPPORT +# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT .data L(have_avx): .zero 4 @@ -138,7 +138,7 @@ .previous cmpl $0, L(have_avx)(%rip) - jne 1f + jne L(defined) movq %rbx, %r11 # Save rbx movl $1, %eax cpuid @@ -147,18 +147,54 @@ // AVX and XSAVE supported? andl $((1 << 28) | (1 << 27)), %ecx cmpl $((1 << 28) | (1 << 27)), %ecx - jne 2f + jne 10f +# ifdef HAVE_AVX512_ASM_SUPPORT + // AVX512 supported in processor? + movq %rbx, %r11 # Save rbx + xorl %ecx, %ecx + mov $0x7, %eax + cpuid + andl $(1 << 16), %ebx +# endif xorl %ecx, %ecx // Get XFEATURE_ENABLED_MASK xgetbv - andl $0x6, %eax -2: subl $0x5, %eax +# ifdef HAVE_AVX512_ASM_SUPPORT + test %ebx, %ebx + movq %r11, %rbx # Restore rbx + je 20f + // Verify that XCR0[7:5] = '111b' and + // XCR0[2:1] = '11b' which means + // that zmm state is enabled + andl $0xe6, %eax + cmpl $0xe6, %eax + jne 20f + movl %eax, L(have_avx)(%rip) +L(avx512): +# define RESTORE_AVX +# define VMOV vmovdqu64 +# define VEC(i) zmm##i +# define MORE_CODE +# include "dl-trampoline.h" +# undef VMOV +# undef VEC +# undef RESTORE_AVX +# endif +20: andl $0x6, %eax +10: subl $0x5, %eax movl %eax, L(have_avx)(%rip) cmpl $0, %eax -1: js L(no_avx) +L(defined): + js L(no_avx) +# ifdef HAVE_AVX512_ASM_SUPPORT + cmpl $0xe6, L(have_avx)(%rip) + je L(avx512) +# endif # define RESTORE_AVX +# define VMOV vmovdqu +# define VEC(i) ymm##i # define MORE_CODE # include "dl-trampoline.h" @@ -180,9 +216,9 @@ .align 16 cfi_startproc _dl_x86_64_save_sse: -# ifdef HAVE_AVX_SUPPORT +# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT cmpl $0, L(have_avx)(%rip) - jne 1f + jne L(defined_5) movq %rbx, %r11 # Save rbx movl $1, %eax cpuid @@ -191,21 +227,43 @@ // AVX and XSAVE supported? andl $((1 << 28) | (1 << 27)), %ecx cmpl $((1 << 28) | (1 << 27)), %ecx - jne 2f + jne 1f +# ifdef HAVE_AVX512_ASM_SUPPORT + // AVX512 supported in a processor? + movq %rbx, %r11 # Save rbx + xorl %ecx,%ecx + mov $0x7,%eax + cpuid + andl $(1 << 16), %ebx +# endif xorl %ecx, %ecx // Get XFEATURE_ENABLED_MASK xgetbv - andl $0x6, %eax - cmpl $0x6, %eax - // Nonzero if SSE and AVX state saving is enabled. - sete %al -2: leal -1(%eax,%eax), %eax +# ifdef HAVE_AVX512_ASM_SUPPORT + test %ebx, %ebx + movq %r11, %rbx # Restore rbx + je 2f + // Verify that XCR0[7:5] = '111b' and + // XCR0[2:1] = '11b' which means + // that zmm state is enabled + andl $0xe6, %eax + movl %eax, L(have_avx)(%rip) + cmpl $0xe6, %eax + je L(avx512_5) +# endif + +2: andl $0x6, %eax +1: subl $0x5, %eax movl %eax, L(have_avx)(%rip) cmpl $0, %eax -1: js L(no_avx5) +L(defined_5): + js L(no_avx5) +# ifdef HAVE_AVX512_ASM_SUPPORT + cmpl $0xe6, L(have_avx)(%rip) + je L(avx512_5) +# endif -# define YMM_SIZE 32 vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE @@ -215,6 +273,18 @@ vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE ret +# ifdef HAVE_AVX512_ASM_SUPPORT +L(avx512_5): + vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE + vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE + vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE + vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE + vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE + vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE + vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE + vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE + ret +# endif L(no_avx5): # endif movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE @@ -235,9 +305,13 @@ .align 16 cfi_startproc _dl_x86_64_restore_sse: -# ifdef HAVE_AVX_SUPPORT +# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT cmpl $0, L(have_avx)(%rip) js L(no_avx6) +# ifdef HAVE_AVX512_ASM_SUPPORT + cmpl $0xe6, L(have_avx)(%rip) + je L(avx512_6) +# endif vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0 vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1 @@ -248,6 +322,18 @@ vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6 vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7 ret +# ifdef HAVE_AVX512_ASM_SUPPORT +L(avx512_6): + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7 + ret +# endif L(no_avx6): # endif movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0 diff -urN glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym --- glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym 2014-09-10 23:16:36.335167048 -0400 @@ -4,6 +4,8 @@ -- VECTOR_SIZE sizeof (La_x86_64_vector) XMM_SIZE sizeof (La_x86_64_xmm) +YMM_SIZE sizeof (La_x86_64_ymm) +ZMM_SIZE sizeof (La_x86_64_zmm) LR_SIZE sizeof (struct La_x86_64_regs) LR_RDX_OFFSET offsetof (struct La_x86_64_regs, lr_rdx) diff -urN glibc-2.17-c758a686/sysdeps/x86_64/Makefile glibc-2.17-c758a686/sysdeps/x86_64/Makefile --- glibc-2.17-c758a686/sysdeps/x86_64/Makefile 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86_64/Makefile 2014-09-10 23:22:04.269518711 -0400 @@ -37,6 +37,20 @@ $(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o $(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o + +tests += tst-audit10 +modules-names += tst-auditmod10a tst-auditmod10b + +$(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so +$(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so +tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so + +ifeq (yes,$(config-cflags-avx512)) +AVX512-CFLAGS = -mavx512f +CFLAGS-tst-audit10.c += $(AVX512-CFLAGS) +CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS) +CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS) +endif endif ifeq ($(subdir),csu) diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c --- glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c 1969-12-31 19:00:00.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c 2014-09-10 23:16:36.335167048 -0400 @@ -0,0 +1,70 @@ +/* Copyright (C) 2012-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Test case for x86-64 preserved registers in dynamic linker. */ + +#ifdef __AVX512F__ +#include +#include +#include +#include + +static int +avx512_enabled (void) +{ + unsigned int eax, ebx, ecx, edx; + + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 + || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) + return 0; + + __cpuid_count (7, 0, eax, ebx, ecx, edx); + if (!(ebx & bit_AVX512F)) + return 0; + + asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); + + /* Verify that ZMM, YMM and XMM states are enabled. */ + return (eax & 0xe6) == 0xe6; +} + + +extern __m512i audit_test (__m512i, __m512i, __m512i, __m512i, + __m512i, __m512i, __m512i, __m512i); +int +main (void) +{ + /* Run AVX512 test only if AVX512 is supported. */ + if (avx512_enabled ()) + { + __m512i zmm = _mm512_setzero_si512 (); + __m512i ret = audit_test (zmm, zmm, zmm, zmm, zmm, zmm, zmm, zmm); + + zmm = _mm512_set1_epi64 (0x12349876); + + if (memcmp (&zmm, &ret, sizeof (ret))) + abort (); + } + return 0; +} +#else +int +main (void) +{ + return 0; +} +#endif diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c --- glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c 1969-12-31 19:00:00.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c 2014-09-10 23:16:36.335167048 -0400 @@ -0,0 +1,65 @@ +/* Copyright (C) 2012-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Test case for x86-64 preserved registers in dynamic linker. */ + +#ifdef __AVX512F__ +#include +#include +#include + +__m512i +audit_test (__m512i x0, __m512i x1, __m512i x2, __m512i x3, + __m512i x4, __m512i x5, __m512i x6, __m512i x7) +{ + __m512i zmm; + + zmm = _mm512_set1_epi64 (1); + if (memcmp (&zmm, &x0, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi64 (2); + if (memcmp (&zmm, &x1, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi64 (3); + if (memcmp (&zmm, &x2, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi64 (4); + if (memcmp (&zmm, &x3, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi64 (5); + if (memcmp (&zmm, &x4, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi64 (6); + if (memcmp (&zmm, &x5, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi64 (7); + if (memcmp (&zmm, &x6, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi64 (8); + if (memcmp (&zmm, &x7, sizeof (zmm))) + abort (); + + return _mm512_setzero_si512 (); +} +#endif diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c --- glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c 1969-12-31 19:00:00.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c 2014-09-10 23:16:36.336167046 -0400 @@ -0,0 +1,219 @@ +/* Copyright (C) 2012-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Verify that changing AVX512 registers in audit library won't affect + function parameter passing/return. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned int +la_version (unsigned int v) +{ + setlinebuf (stdout); + + printf ("version: %u\n", v); + + char buf[20]; + sprintf (buf, "%u", v); + + return v; +} + +void +la_activity (uintptr_t *cookie, unsigned int flag) +{ + if (flag == LA_ACT_CONSISTENT) + printf ("activity: consistent\n"); + else if (flag == LA_ACT_ADD) + printf ("activity: add\n"); + else if (flag == LA_ACT_DELETE) + printf ("activity: delete\n"); + else + printf ("activity: unknown activity %u\n", flag); +} + +char * +la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag) +{ + char buf[100]; + const char *flagstr; + if (flag == LA_SER_ORIG) + flagstr = "LA_SET_ORIG"; + else if (flag == LA_SER_LIBPATH) + flagstr = "LA_SER_LIBPATH"; + else if (flag == LA_SER_RUNPATH) + flagstr = "LA_SER_RUNPATH"; + else if (flag == LA_SER_CONFIG) + flagstr = "LA_SER_CONFIG"; + else if (flag == LA_SER_DEFAULT) + flagstr = "LA_SER_DEFAULT"; + else if (flag == LA_SER_SECURE) + flagstr = "LA_SER_SECURE"; + else + { + sprintf (buf, "unknown flag %d", flag); + flagstr = buf; + } + printf ("objsearch: %s, %s\n", name, flagstr); + + return (char *) name; +} + +unsigned int +la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie) +{ + printf ("objopen: %ld, %s\n", lmid, l->l_name); + + return 3; +} + +void +la_preinit (uintptr_t *cookie) +{ + printf ("preinit\n"); +} + +unsigned int +la_objclose (uintptr_t *cookie) +{ + printf ("objclose\n"); + return 0; +} + +uintptr_t +la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, unsigned int *flags, const char *symname) +{ + printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", + symname, (long int) sym->st_value, ndx, *flags); + + return sym->st_value; +} + +#include + +#ifdef __AVX512F__ +#include +#include + +static int +check_avx512 (void) +{ + unsigned int eax, ebx, ecx, edx; + + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 + || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) + return 0; + + __cpuid_count (7, 0, eax, ebx, ecx, edx); + if (!(ebx & bit_AVX512F)) + return 0; + + asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); + + /* Verify that ZMM, YMM and XMM states are enabled. */ + return (eax & 0xe6) == 0xe6; +} + +#else +#include +#endif + +ElfW(Addr) +pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, La_regs *regs, unsigned int *flags, + const char *symname, long int *framesizep) +{ + printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", + symname, (long int) sym->st_value, ndx, *flags); + +#ifdef __AVX512F__ + if (check_avx512 () && strcmp (symname, "audit_test") == 0) + { + __m512i zero = _mm512_setzero_si512 (); + if (memcmp (®s->lr_vector[0], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[1], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[2], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[3], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[4], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[5], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[6], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[7], &zero, sizeof (zero))) + abort (); + + for (int i = 0; i < 8; i++) + regs->lr_vector[i].zmm[0] + = (La_x86_64_zmm) _mm512_set1_epi64 (i + 1); + + __m512i zmm = _mm512_set1_epi64 (-1); + asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" ); + asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" ); + asm volatile ("vmovdqa64 %0, %%zmm2" : : "x" (zmm) : "xmm2" ); + asm volatile ("vmovdqa64 %0, %%zmm3" : : "x" (zmm) : "xmm3" ); + asm volatile ("vmovdqa64 %0, %%zmm4" : : "x" (zmm) : "xmm4" ); + asm volatile ("vmovdqa64 %0, %%zmm5" : : "x" (zmm) : "xmm5" ); + asm volatile ("vmovdqa64 %0, %%zmm6" : : "x" (zmm) : "xmm6" ); + asm volatile ("vmovdqa64 %0, %%zmm7" : : "x" (zmm) : "xmm7" ); + + *framesizep = 1024; + } +#endif + + return sym->st_value; +} + +unsigned int +pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, const La_regs *inregs, La_retval *outregs, + const char *symname) +{ + printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n", + symname, (long int) sym->st_value, ndx, + (ptrdiff_t) outregs->int_retval); + +#ifdef __AVX512F__ + if (check_avx512 () && strcmp (symname, "audit_test") == 0) + { + __m512i zero = _mm512_setzero_si512 (); + if (memcmp (&outregs->lrv_vector0, &zero, sizeof (zero))) + abort (); + + for (int i = 0; i < 8; i++) + { + __m512i zmm = _mm512_set1_epi64 (i + 1); + if (memcmp (&inregs->lr_vector[i], &zmm, sizeof (zmm)) != 0) + abort (); + } + + outregs->lrv_vector0.zmm[0] + = (La_x86_64_zmm) _mm512_set1_epi64 (0x12349876); + + __m512i zmm = _mm512_set1_epi64 (-1); + asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" ); + asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" ); + } +#endif + + return 0; +} diff -urN glibc-2.17-c758a686/sysdeps/x86/Makefile glibc-2.17-c758a686/sysdeps/x86/Makefile --- glibc-2.17-c758a686/sysdeps/x86/Makefile 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86/Makefile 2014-09-11 16:06:03.121319867 -0400 @@ -2,8 +2,8 @@ CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ -mno-sse -mno-mmx) -tests: $(objpfx)tst-xmmymm.out -$(objpfx)tst-xmmymm.out: ../sysdeps/x86/tst-xmmymm.sh $(objpfx)ld.so +tests: $(objpfx)tst-xmmymmzmm.out +$(objpfx)tst-xmmymmzmm.out: ../sysdeps/x86/tst-xmmymmzmm.sh $(objpfx)ld.so @echo "Checking ld.so for SSE register use. This will take a few seconds..." $(SHELL) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@ endif diff -urN glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh --- glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh 1969-12-31 19:00:00.000000000 -0500 @@ -1,103 +0,0 @@ -#! /bin/bash -# Make sure no code in ld.so uses xmm/ymm registers on x86-64. -# Copyright (C) 2009-2012 Free Software Foundation, Inc. -# This file is part of the GNU C Library. - -# The GNU C Library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. - -# The GNU C Library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with the GNU C Library; if not, see -# . - -set -e - -objpfx="$1" -NM="$2" -OBJDUMP="$3" -READELF="$4" - -tmp=$(mktemp ${objpfx}tst-xmmymm.XXXXXX) -trap 'rm -f "$tmp"' 1 2 3 15 - -# List of object files we have to test -rtldobjs=$($READELF -W -wi ${objpfx}dl-allobjs.os | - awk '/^ "$tmp" -declare -a objects -objects=($(cat "$tmp")) - -objs="dl-runtime.os" -tocheck="dl-runtime.os" - -while test -n "$objs"; do - this="$objs" - objs="" - - for f in $this; do - undef=$($NM -u "$objpfx"../*/"$f" | awk '{print $2}') - if test -n "$undef"; then - for s in $undef; do - for obj in ${objects[*]} "_GLOBAL_OFFSET_TABLE_"; do - if test "$obj" = "$s"; then - continue 2 - fi - done - for o in $rtldobjs; do - ro=$(echo "$objpfx"../*/"$o") - if $NM -g --defined-only "$ro" | egrep -qs " $s\$"; then - if ! (echo "$tocheck $objs" | fgrep -qs "$o"); then - echo "$o needed for $s" - objs="$objs $o" - fi - break; - fi - done - done - fi - done - tocheck="$tocheck$objs" -done - -echo -echo -echo "object files needed: $tocheck" - -cp /dev/null "$tmp" -for f in $tocheck; do - $OBJDUMP -d "$objpfx"../*/"$f" | - awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' | - while read fct; do - if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then - continue; - fi - echo "function $fct in $f modifies xmm/ymm" >> "$tmp" - result=1 - done -done - -if test -s "$tmp"; then - echo - echo - cat "$tmp" - result=1 -else - result=0 -fi - -rm "$tmp" -exit $result diff -urN glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh --- glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh 1969-12-31 19:00:00.000000000 -0500 +++ glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh 2014-09-11 16:05:10.073426623 -0400 @@ -0,0 +1,103 @@ +#! /bin/bash +# Make sure no code in ld.so uses xmm/ymm/zmm registers on x86-64. +# Copyright (C) 2009-2012 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# . + +set -e + +objpfx="$1" +NM="$2" +OBJDUMP="$3" +READELF="$4" + +tmp=$(mktemp ${objpfx}tst-xmmymmzmm.XXXXXX) +trap 'rm -f "$tmp"' 1 2 3 15 + +# List of object files we have to test +rtldobjs=$($READELF -W -wi ${objpfx}dl-allobjs.os | + awk '/^ "$tmp" +declare -a objects +objects=($(cat "$tmp")) + +objs="dl-runtime.os" +tocheck="dl-runtime.os" + +while test -n "$objs"; do + this="$objs" + objs="" + + for f in $this; do + undef=$($NM -u "$objpfx"../*/"$f" | awk '{print $2}') + if test -n "$undef"; then + for s in $undef; do + for obj in ${objects[*]} "_GLOBAL_OFFSET_TABLE_"; do + if test "$obj" = "$s"; then + continue 2 + fi + done + for o in $rtldobjs; do + ro=$(echo "$objpfx"../*/"$o") + if $NM -g --defined-only "$ro" | egrep -qs " $s\$"; then + if ! (echo "$tocheck $objs" | fgrep -qs "$o"); then + echo "$o needed for $s" + objs="$objs $o" + fi + break; + fi + done + done + fi + done + tocheck="$tocheck$objs" +done + +echo +echo +echo "object files needed: $tocheck" + +cp /dev/null "$tmp" +for f in $tocheck; do + $OBJDUMP -d "$objpfx"../*/"$f" | + awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xyz]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' | + while read fct; do + if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then + continue; + fi + echo "function $fct in $f modifies xmm/ymm/zmm" >> "$tmp" + result=1 + done +done + +if test -s "$tmp"; then + echo + echo + cat "$tmp" + result=1 +else + result=0 +fi + +rm "$tmp" +exit $result