5de29b
#
5de29b
# AVX-512 support for glibc:
5de29b
#
5de29b
# Notes: Renamed configure.ac changes to configure.in.
5de29b
#
5de29b
# commit aa4de9cea5c07d43caeaca9722c2d417e9a2919c
5de29b
# Author: H.J. Lu <hjl.tools@gmail.com>
5de29b
# Date:   Fri Mar 14 08:51:25 2014 -0700
5de29b
# 
5de29b
#     Check AVX-512 assembler support first
5de29b
# 
5de29b
#     It checks AVX-512 assembler support first and sets libc_cv_cc_avx512 to
5de29b
#     $libc_cv_asm_avx512, instead of yes.  GCC won't support AVX-512 if
5de29b
#     assembler doesn't support it.
5de29b
# 
5de29b
#         * sysdeps/x86_64/configure.ac: Check AVX-512 assembler support
5de29b
#         first.  Disable AVX-512 GCC support if assembler doesn't support
5de29b
#         it.
5de29b
#         * sysdeps/x86_64/configure: Regenerated.
5de29b
# 
5de29b
# commit 2d63a517e4084ec80403cd9f278690fa8b676cc4
5de29b
# Author: Igor Zamyatin <igor.zamyatin@intel.com>
5de29b
# Date:   Thu Mar 13 11:10:22 2014 -0700
5de29b
# 
5de29b
#     Save and restore AVX-512 zmm registers to x86-64 ld.so
5de29b
#     
5de29b
#     AVX-512 ISA adds 512-bit zmm registers.  This patch updates
5de29b
#     _dl_runtime_profile to pass zmm registers to run-time audit. It also
5de29b
#     changes _dl_x86_64_save_sse and _dl_x86_64_restore_sse to upport zmm
5de29b
#     registers, which are called when only when RTLD_PREPARE_FOREIGN_CALL
5de29b
#     is used.  Its performance impact is minimum.
5de29b
#     
5de29b
#         * config.h.in (HAVE_AVX512_SUPPORT): New #undef.
5de29b
#         (HAVE_AVX512_ASM_SUPPORT): Likewise.
5de29b
#         * sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New.
5de29b
#         (La_x86_64_vector): Add zmm.
5de29b
#         * sysdeps/x86_64/Makefile (tests): Add tst-audit10.
5de29b
#         (modules-names): Add tst-auditmod10a and tst-auditmod10b.
5de29b
#         ($(objpfx)tst-audit10): New target.
5de29b
#         ($(objpfx)tst-audit10.out): Likewise.
5de29b
#         (tst-audit10-ENV): New.
5de29b
#         (AVX512-CFLAGS): Likewise.
5de29b
#         (CFLAGS-tst-audit10.c): Likewise.
5de29b
#         (CFLAGS-tst-auditmod10a.c): Likewise.
5de29b
#         (CFLAGS-tst-auditmod10b.c): Likewise.
5de29b
#         * sysdeps/x86_64/configure.ac: Set config-cflags-avx512,
5de29b
#         HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT.
5de29b
#         * sysdeps/x86_64/configure: Regenerated.
5de29b
#         * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add
5de29b
#         AVX-512 zmm register support.
5de29b
#         (_dl_x86_64_save_sse): Likewise.
5de29b
#         (_dl_x86_64_restore_sse): Likewise.
5de29b
#         * sysdeps/x86_64/dl-trampoline.h: Updated to support different
5de29b
#         size vector registers.
5de29b
#         * sysdeps/x86_64/link-defines.sym (YMM_SIZE): New.
5de29b
#         (ZMM_SIZE): Likewise. 
5de29b
#         * sysdeps/x86_64/tst-audit10.c: New file.
5de29b
#         * sysdeps/x86_64/tst-auditmod10a.c: Likewise.
5de29b
#         * sysdeps/x86_64/tst-auditmod10b.c: Likewise.
5de29b
# 
5de29b
# In addition adds:
5de29b
# https://sourceware.org/ml/libc-alpha/2014-09/msg00228.html
5de29b
# To extend zmm register checking.
5de29b
#
12745e
diff -urN glibc-2.17-c758a686/config.h.in glibc-2.17-c758a686/config.h.in
5de29b
--- glibc-2.17-c758a686/config.h.in	2014-09-10 23:11:14.605787816 -0400
12745e
+++ glibc-2.17-c758a686/config.h.in	2014-09-10 23:16:36.331167056 -0400
5de29b
@@ -101,6 +101,12 @@
5de29b
 /* Define if gcc supports VEX encoding.  */
5de29b
 #undef	HAVE_SSE2AVX_SUPPORT
5de29b
 
5de29b
+/* Define if compiler supports AVX512.  */
5de29b
+#undef  HAVE_AVX512_SUPPORT
5de29b
+
5de29b
+/* Define if assembler supports AVX512.  */
5de29b
+#undef  HAVE_AVX512_ASM_SUPPORT
5de29b
+
5de29b
 /* Define if gcc supports FMA4.  */
5de29b
 #undef	HAVE_FMA4_SUPPORT
5de29b
 
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86/bits/link.h glibc-2.17-c758a686/sysdeps/x86/bits/link.h
5de29b
--- glibc-2.17-c758a686/sysdeps/x86/bits/link.h	2012-12-24 22:02:13.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86/bits/link.h	2014-09-10 23:16:36.331167056 -0400
5de29b
@@ -66,6 +66,8 @@
5de29b
 typedef float La_x86_64_xmm __attribute__ ((__vector_size__ (16)));
5de29b
 typedef float La_x86_64_ymm
5de29b
     __attribute__ ((__vector_size__ (32), __aligned__ (16)));
5de29b
+typedef double La_x86_64_zmm
5de29b
+    __attribute__ ((__vector_size__ (64), __aligned__ (16)));
5de29b
 # else
5de29b
 typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__)));
5de29b
 # endif
5de29b
@@ -74,6 +76,7 @@
5de29b
 {
5de29b
 # if __GNUC_PREREQ (4,0)
5de29b
   La_x86_64_ymm ymm[2];
5de29b
+  La_x86_64_zmm zmm[1];
5de29b
 # endif
5de29b
   La_x86_64_xmm xmm[4];
5de29b
 } La_x86_64_vector __attribute__ ((__aligned__ (16)));
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/configure glibc-2.17-c758a686/sysdeps/x86_64/configure
5de29b
--- glibc-2.17-c758a686/sysdeps/x86_64/configure	2014-09-10 23:11:15.000787061 -0400
12745e
+++ glibc-2.17-c758a686/sysdeps/x86_64/configure	2014-09-10 23:16:36.338167042 -0400
5de29b
@@ -91,6 +91,59 @@
5de29b
 
5de29b
 fi
5de29b
 
5de29b
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support in assembler" >&5
5de29b
+$as_echo_n "checking for AVX512 support in assembler... " >&6; }
5de29b
+if ${libc_cv_asm_avx512+:} false; then :
5de29b
+  $as_echo_n "(cached) " >&6
5de29b
+else
5de29b
+  cat > conftest.s <<\EOF
5de29b
+        vmovdqu64 %zmm0, (%rsp)
5de29b
+EOF
5de29b
+if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
5de29b
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
5de29b
+  (eval $ac_try) 2>&5
5de29b
+  ac_status=$?
5de29b
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
5de29b
+  test $ac_status = 0; }; }; then
5de29b
+  libc_cv_asm_avx512=yes
5de29b
+else
5de29b
+  libc_cv_asm_avx512=no
5de29b
+fi
5de29b
+rm -f conftest*
5de29b
+fi
5de29b
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512" >&5
5de29b
+$as_echo "$libc_cv_asm_avx512" >&6; }
5de29b
+if test $libc_cv_asm_avx512 == yes; then
5de29b
+  $as_echo "#define HAVE_AVX512_ASM_SUPPORT 1" >>confdefs.h
5de29b
+
5de29b
+fi
5de29b
+
5de29b
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support" >&5
5de29b
+$as_echo_n "checking for AVX512 support... " >&6; }
5de29b
+if ${libc_cv_cc_avx512+:} false; then :
5de29b
+  $as_echo_n "(cached) " >&6
5de29b
+else
5de29b
+  if { ac_try='${CC-cc} -mavx512f -xc /dev/null -S -o /dev/null'
5de29b
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
5de29b
+  (eval $ac_try) 2>&5
5de29b
+  ac_status=$?
5de29b
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
5de29b
+  test $ac_status = 0; }; }; then :
5de29b
+  libc_cv_cc_avx512=$libc_cv_asm_avx512
5de29b
+else
5de29b
+  libc_cv_cc_avx512=no
5de29b
+fi
5de29b
+
5de29b
+fi
5de29b
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx512" >&5
5de29b
+$as_echo "$libc_cv_cc_avx512" >&6; }
5de29b
+if test $libc_cv_cc_avx512 = yes; then
5de29b
+  $as_echo "#define HAVE_AVX512_SUPPORT 1" >>confdefs.h
5de29b
+
5de29b
+fi
5de29b
+config_vars="$config_vars
5de29b
+config-cflags-avx512 = $libc_cv_cc_avx512"
5de29b
+
5de29b
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
5de29b
 $as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
5de29b
 if ${libc_cv_cc_sse2avx+:} false; then :
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/configure.in glibc-2.17-c758a686/sysdeps/x86_64/configure.in
5de29b
--- glibc-2.17-c758a686/sysdeps/x86_64/configure.in	2012-12-24 22:02:13.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86_64/configure.in	2014-09-10 23:16:36.338167042 -0400
5de29b
@@ -21,6 +21,30 @@
5de29b
   AC_DEFINE(HAVE_AVX_SUPPORT)
5de29b
 fi
5de29b
 
5de29b
+dnl Check if asm supports AVX512.
5de29b
+AC_CACHE_CHECK(for AVX512 support in assembler, libc_cv_asm_avx512, [dnl
5de29b
+cat > conftest.s <<\EOF
5de29b
+        vmovdqu64 %zmm0, (%rsp)
5de29b
+EOF
5de29b
+if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
5de29b
+  libc_cv_asm_avx512=yes
5de29b
+else
5de29b
+  libc_cv_asm_avx512=no
5de29b
+fi
5de29b
+rm -f conftest*])
5de29b
+if test $libc_cv_asm_avx512 == yes; then
5de29b
+  AC_DEFINE(HAVE_AVX512_ASM_SUPPORT)
5de29b
+fi
5de29b
+
5de29b
+dnl Check if -mavx512f works.
5de29b
+AC_CACHE_CHECK(for AVX512 support, libc_cv_cc_avx512, [dnl
5de29b
+LIBC_TRY_CC_OPTION([-mavx512f], [libc_cv_cc_avx512=$libc_cv_asm_avx512], [libc_cv_cc_avx512=no])
5de29b
+])
5de29b
+if test $libc_cv_cc_avx512 = yes; then
5de29b
+  AC_DEFINE(HAVE_AVX512_SUPPORT)
5de29b
+fi
5de29b
+LIBC_CONFIG_VAR([config-cflags-avx512], [$libc_cv_cc_avx512])
5de29b
+
5de29b
 dnl Check if -msse2avx works.
5de29b
 AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
5de29b
 LIBC_TRY_CC_OPTION([-msse2avx],
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h
5de29b
--- glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h	2012-12-24 22:02:13.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h	2014-09-10 23:16:36.334167050 -0400
5de29b
@@ -19,14 +19,14 @@
5de29b
 
5de29b
 #ifdef RESTORE_AVX
5de29b
 	/* This is to support AVX audit modules.  */
5de29b
-	vmovdqu %ymm0,		      (LR_VECTOR_OFFSET)(%rsp)
5de29b
-	vmovdqu %ymm1, (LR_VECTOR_OFFSET +   VECTOR_SIZE)(%rsp)
5de29b
-	vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
5de29b
-	vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
5de29b
-	vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
5de29b
-	vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
5de29b
-	vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
5de29b
-	vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
5de29b
+	VMOV %VEC(0),		      (LR_VECTOR_OFFSET)(%rsp)
5de29b
+	VMOV %VEC(1), (LR_VECTOR_OFFSET +   VECTOR_SIZE)(%rsp)
5de29b
+	VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
5de29b
+	VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
5de29b
+	VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
5de29b
+	VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
5de29b
+	VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
5de29b
+	VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
5de29b
 
5de29b
 	/* Save xmm0-xmm7 registers to detect if any of them are
5de29b
 	   changed by audit module.  */
5de29b
@@ -72,7 +72,7 @@
5de29b
 	je 2f
5de29b
 	vmovdqa	%xmm0, (LR_VECTOR_OFFSET)(%rsp)
5de29b
 	jmp 1f
5de29b
-2:	vmovdqu	(LR_VECTOR_OFFSET)(%rsp), %ymm0
5de29b
+2:	VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
5de29b
 	vmovdqa	%xmm0, (LR_XMM_OFFSET)(%rsp)
5de29b
 
5de29b
 1:	vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
5de29b
@@ -81,7 +81,7 @@
5de29b
 	je 2f
5de29b
 	vmovdqa	%xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
5de29b
 	jmp 1f
5de29b
-2:	vmovdqu	(LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
5de29b
+2:	VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
5de29b
 	vmovdqa	%xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
5de29b
 
5de29b
 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
5de29b
@@ -90,7 +90,7 @@
5de29b
 	je 2f
5de29b
 	vmovdqa	%xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
5de29b
 	jmp 1f
5de29b
-2:	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
5de29b
+2:	VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
5de29b
 	vmovdqa	%xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
5de29b
 
5de29b
 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
5de29b
@@ -99,7 +99,7 @@
5de29b
 	je 2f
5de29b
 	vmovdqa	%xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
5de29b
 	jmp 1f
5de29b
-2:	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
5de29b
+2:	VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
5de29b
 	vmovdqa	%xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
5de29b
 
5de29b
 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
5de29b
@@ -108,7 +108,7 @@
5de29b
 	je 2f
5de29b
 	vmovdqa	%xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
5de29b
 	jmp 1f
5de29b
-2:	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
5de29b
+2:	VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
5de29b
 	vmovdqa	%xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
5de29b
 
5de29b
 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
5de29b
@@ -117,7 +117,7 @@
5de29b
 	je 2f
5de29b
 	vmovdqa	%xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
5de29b
 	jmp 1f
5de29b
-2:	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
5de29b
+2:	VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
5de29b
 	vmovdqa	%xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
5de29b
 
5de29b
 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
5de29b
@@ -126,7 +126,7 @@
5de29b
 	je 2f
5de29b
 	vmovdqa	%xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
5de29b
 	jmp 1f
5de29b
-2:	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
5de29b
+2:	VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
5de29b
 	vmovdqa	%xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
5de29b
 
5de29b
 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
5de29b
@@ -135,7 +135,7 @@
5de29b
 	je 2f
5de29b
 	vmovdqa	%xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
5de29b
 	jmp 1f
5de29b
-2:	vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
5de29b
+2:	VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
5de29b
 	vmovdqa	%xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
5de29b
 
5de29b
 1:
5de29b
@@ -213,8 +213,8 @@
5de29b
 
5de29b
 #ifdef RESTORE_AVX
5de29b
 	/* This is to support AVX audit modules.  */
5de29b
-	vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
5de29b
-	vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
5de29b
+	VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
5de29b
+	VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
5de29b
 
5de29b
 	/* Save xmm0/xmm1 registers to detect if they are changed
5de29b
 	   by audit module.  */
5de29b
@@ -243,13 +243,13 @@
5de29b
 	vpmovmskb %xmm2, %esi
5de29b
 	cmpl $0xffff, %esi
5de29b
 	jne 1f
5de29b
-	vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
5de29b
+	VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
5de29b
 
5de29b
 1:	vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
5de29b
 	vpmovmskb %xmm2, %esi
5de29b
 	cmpl $0xffff, %esi
5de29b
 	jne 1f
5de29b
-	vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
5de29b
+	VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
5de29b
 
5de29b
 1:
5de29b
 #endif
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S
5de29b
--- glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S	2012-12-24 22:02:13.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S	2014-09-10 23:16:36.334167050 -0400
5de29b
@@ -96,7 +96,7 @@
5de29b
 
5de29b
 	/* Actively align the La_x86_64_regs structure.  */
5de29b
 	andq $0xfffffffffffffff0, %rsp
5de29b
-# ifdef HAVE_AVX_SUPPORT
5de29b
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
5de29b
 	/* sizeof(La_x86_64_regs).  Need extra space for 8 SSE registers
5de29b
 	   to detect if any xmm0-xmm7 registers are changed by audit
5de29b
 	   module.  */
5de29b
@@ -130,7 +130,7 @@
5de29b
 	movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
5de29b
 	movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
5de29b
 
5de29b
-# ifdef HAVE_AVX_SUPPORT
5de29b
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
5de29b
 	.data
5de29b
 L(have_avx):
5de29b
 	.zero 4
5de29b
@@ -138,7 +138,7 @@
5de29b
 	.previous
5de29b
 
5de29b
 	cmpl	$0, L(have_avx)(%rip)
5de29b
-	jne	1f
5de29b
+	jne	L(defined)
5de29b
 	movq	%rbx, %r11		# Save rbx
5de29b
 	movl	$1, %eax
5de29b
 	cpuid
5de29b
@@ -147,18 +147,54 @@
5de29b
 	// AVX and XSAVE supported?
5de29b
 	andl	$((1 << 28) | (1 << 27)), %ecx
5de29b
 	cmpl	$((1 << 28) | (1 << 27)), %ecx
5de29b
-	jne	2f
5de29b
+	jne	10f
5de29b
+#  ifdef HAVE_AVX512_ASM_SUPPORT
5de29b
+	// AVX512 supported in processor?
5de29b
+	movq	%rbx, %r11		# Save rbx
5de29b
+	xorl	%ecx, %ecx
5de29b
+	mov	$0x7, %eax
5de29b
+	cpuid
5de29b
+	andl	$(1 << 16), %ebx
5de29b
+#  endif
5de29b
 	xorl	%ecx, %ecx
5de29b
 	// Get XFEATURE_ENABLED_MASK
5de29b
 	xgetbv
5de29b
-	andl	$0x6, %eax
5de29b
-2:	subl	$0x5, %eax
5de29b
+#  ifdef HAVE_AVX512_ASM_SUPPORT
5de29b
+	test	%ebx, %ebx
5de29b
+	movq	%r11, %rbx		# Restore rbx
5de29b
+	je	20f
5de29b
+	// Verify that XCR0[7:5] = '111b' and
5de29b
+	// XCR0[2:1] = '11b' which means
5de29b
+	// that zmm state is enabled
5de29b
+	andl	$0xe6, %eax
5de29b
+	cmpl	$0xe6, %eax
5de29b
+	jne	20f
5de29b
+	movl	%eax, L(have_avx)(%rip)
5de29b
+L(avx512):
5de29b
+#   define RESTORE_AVX
5de29b
+#   define VMOV    vmovdqu64
5de29b
+#   define VEC(i)  zmm##i
5de29b
+#   define MORE_CODE
5de29b
+#   include "dl-trampoline.h"
5de29b
+#   undef VMOV
5de29b
+#   undef VEC
5de29b
+#   undef RESTORE_AVX
5de29b
+#  endif
5de29b
+20:	andl	$0x6, %eax
5de29b
+10:	subl	$0x5, %eax
5de29b
 	movl	%eax, L(have_avx)(%rip)
5de29b
 	cmpl	$0, %eax
5de29b
 
5de29b
-1:	js	L(no_avx)
5de29b
+L(defined):
5de29b
+	js	L(no_avx)
5de29b
+#  ifdef HAVE_AVX512_ASM_SUPPORT
5de29b
+	cmpl	$0xe6, L(have_avx)(%rip)
5de29b
+	je	L(avx512)
5de29b
+#  endif
5de29b
 
5de29b
 #  define RESTORE_AVX
5de29b
+#  define VMOV    vmovdqu
5de29b
+#  define VEC(i)  ymm##i
5de29b
 #  define MORE_CODE
5de29b
 #  include "dl-trampoline.h"
5de29b
 
5de29b
@@ -180,9 +216,9 @@
5de29b
 	.align 16
5de29b
 	cfi_startproc
5de29b
 _dl_x86_64_save_sse:
5de29b
-# ifdef HAVE_AVX_SUPPORT
5de29b
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
5de29b
 	cmpl	$0, L(have_avx)(%rip)
5de29b
-	jne	1f
5de29b
+	jne	L(defined_5)
5de29b
 	movq	%rbx, %r11		# Save rbx
5de29b
 	movl	$1, %eax
5de29b
 	cpuid
5de29b
@@ -191,21 +227,43 @@
5de29b
 	// AVX and XSAVE supported?
5de29b
 	andl	$((1 << 28) | (1 << 27)), %ecx
5de29b
 	cmpl	$((1 << 28) | (1 << 27)), %ecx
5de29b
-	jne	2f
5de29b
+	jne	1f
5de29b
+#  ifdef HAVE_AVX512_ASM_SUPPORT
5de29b
+	// AVX512 supported in a processor?
5de29b
+	movq	%rbx, %r11              # Save rbx
5de29b
+	xorl	%ecx,%ecx
5de29b
+	mov	$0x7,%eax
5de29b
+	cpuid
5de29b
+	andl	$(1 << 16), %ebx
5de29b
+#  endif
5de29b
 	xorl	%ecx, %ecx
5de29b
 	// Get XFEATURE_ENABLED_MASK
5de29b
 	xgetbv
5de29b
-	andl	$0x6, %eax
5de29b
-	cmpl	$0x6, %eax
5de29b
-	// Nonzero if SSE and AVX state saving is enabled.
5de29b
-	sete	%al
5de29b
-2:	leal	-1(%eax,%eax), %eax
5de29b
+#  ifdef HAVE_AVX512_ASM_SUPPORT
5de29b
+	test	%ebx, %ebx
5de29b
+	movq	%r11, %rbx		# Restore rbx
5de29b
+	je	2f
5de29b
+	// Verify that XCR0[7:5] = '111b' and
5de29b
+	// XCR0[2:1] = '11b' which means
5de29b
+	// that zmm state is enabled
5de29b
+	andl	$0xe6, %eax
5de29b
+	movl	%eax, L(have_avx)(%rip)
5de29b
+	cmpl	$0xe6, %eax
5de29b
+	je	L(avx512_5)
5de29b
+#  endif
5de29b
+
5de29b
+2:	andl	$0x6, %eax
5de29b
+1:	subl	$0x5, %eax
5de29b
 	movl	%eax, L(have_avx)(%rip)
5de29b
 	cmpl	$0, %eax
5de29b
 
5de29b
-1:	js	L(no_avx5)
5de29b
+L(defined_5):
5de29b
+	js	L(no_avx5)
5de29b
+#  ifdef HAVE_AVX512_ASM_SUPPORT
5de29b
+	cmpl	$0xe6, L(have_avx)(%rip)
5de29b
+	je	L(avx512_5)
5de29b
+#  endif
5de29b
 
5de29b
-#  define YMM_SIZE 32
5de29b
 	vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
5de29b
 	vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
5de29b
 	vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
5de29b
@@ -215,6 +273,18 @@
5de29b
 	vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
5de29b
 	vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
5de29b
 	ret
5de29b
+#  ifdef HAVE_AVX512_ASM_SUPPORT
5de29b
+L(avx512_5):
5de29b
+	vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE
5de29b
+	vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE
5de29b
+	vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE
5de29b
+	vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE
5de29b
+	vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE
5de29b
+	vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE
5de29b
+	vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE
5de29b
+	vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE
5de29b
+	ret
5de29b
+#  endif
5de29b
 L(no_avx5):
5de29b
 # endif
5de29b
 	movdqa	%xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
5de29b
@@ -235,9 +305,13 @@
5de29b
 	.align 16
5de29b
 	cfi_startproc
5de29b
 _dl_x86_64_restore_sse:
5de29b
-# ifdef HAVE_AVX_SUPPORT
5de29b
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
5de29b
 	cmpl	$0, L(have_avx)(%rip)
5de29b
 	js	L(no_avx6)
5de29b
+#  ifdef HAVE_AVX512_ASM_SUPPORT
5de29b
+	cmpl	$0xe6, L(have_avx)(%rip)
5de29b
+	je	L(avx512_6)
5de29b
+#  endif
5de29b
 
5de29b
 	vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
5de29b
 	vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
5de29b
@@ -248,6 +322,18 @@
5de29b
 	vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
5de29b
 	vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
5de29b
 	ret
5de29b
+#  ifdef HAVE_AVX512_ASM_SUPPORT
5de29b
+L(avx512_6):
5de29b
+	vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0
5de29b
+	vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1
5de29b
+	vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2
5de29b
+	vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3
5de29b
+	vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4
5de29b
+	vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5
5de29b
+	vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6
5de29b
+	vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7
5de29b
+	ret
5de29b
+#  endif
5de29b
 L(no_avx6):
5de29b
 # endif
5de29b
 	movdqa	%fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym
5de29b
--- glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym	2012-12-24 22:02:13.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym	2014-09-10 23:16:36.335167048 -0400
5de29b
@@ -4,6 +4,8 @@
5de29b
 --
5de29b
 VECTOR_SIZE		sizeof (La_x86_64_vector)
5de29b
 XMM_SIZE		sizeof (La_x86_64_xmm)
5de29b
+YMM_SIZE		sizeof (La_x86_64_ymm)
5de29b
+ZMM_SIZE		sizeof (La_x86_64_zmm)
5de29b
 
5de29b
 LR_SIZE			sizeof (struct La_x86_64_regs)
5de29b
 LR_RDX_OFFSET		offsetof (struct La_x86_64_regs, lr_rdx)
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/Makefile glibc-2.17-c758a686/sysdeps/x86_64/Makefile
5de29b
--- glibc-2.17-c758a686/sysdeps/x86_64/Makefile	2012-12-24 22:02:13.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86_64/Makefile	2014-09-10 23:22:04.269518711 -0400
5de29b
@@ -37,6 +37,20 @@
5de29b
 
5de29b
 $(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o
5de29b
 $(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o
5de29b
+
5de29b
+tests += tst-audit10
5de29b
+modules-names += tst-auditmod10a tst-auditmod10b
5de29b
+
5de29b
+$(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so
5de29b
+$(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so
5de29b
+tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so
5de29b
+
5de29b
+ifeq (yes,$(config-cflags-avx512))
5de29b
+AVX512-CFLAGS = -mavx512f
5de29b
+CFLAGS-tst-audit10.c += $(AVX512-CFLAGS)
5de29b
+CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS)
5de29b
+CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS)
5de29b
+endif
5de29b
 endif
5de29b
 
5de29b
 ifeq ($(subdir),csu)
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c
5de29b
--- glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c	1969-12-31 19:00:00.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c	2014-09-10 23:16:36.335167048 -0400
5de29b
@@ -0,0 +1,70 @@
5de29b
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
5de29b
+   This file is part of the GNU C Library.
5de29b
+
5de29b
+   The GNU C Library is free software; you can redistribute it and/or
5de29b
+   modify it under the terms of the GNU Lesser General Public
5de29b
+   License as published by the Free Software Foundation; either
5de29b
+   version 2.1 of the License, or (at your option) any later version.
5de29b
+
5de29b
+   The GNU C Library is distributed in the hope that it will be useful,
5de29b
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
5de29b
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
5de29b
+   Lesser General Public License for more details.
5de29b
+
5de29b
+   You should have received a copy of the GNU Lesser General Public
5de29b
+   License along with the GNU C Library; if not, see
5de29b
+   <http://www.gnu.org/licenses/>.  */
5de29b
+
5de29b
+/* Test case for x86-64 preserved registers in dynamic linker.  */
5de29b
+
5de29b
+#ifdef __AVX512F__
5de29b
+#include <stdlib.h>
5de29b
+#include <string.h>
5de29b
+#include <cpuid.h>
5de29b
+#include <immintrin.h>
5de29b
+
5de29b
+static int
5de29b
+avx512_enabled (void)
5de29b
+{
5de29b
+  unsigned int eax, ebx, ecx, edx;
5de29b
+
5de29b
+  if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
5de29b
+      || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
5de29b
+    return 0;
5de29b
+
5de29b
+  __cpuid_count (7, 0, eax, ebx, ecx, edx);
5de29b
+  if (!(ebx & bit_AVX512F))
5de29b
+    return 0;
5de29b
+
5de29b
+  asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
5de29b
+
5de29b
+  /* Verify that ZMM, YMM and XMM states are enabled.  */
5de29b
+  return (eax & 0xe6) == 0xe6;
5de29b
+}
5de29b
+
5de29b
+
5de29b
+extern __m512i audit_test (__m512i, __m512i, __m512i, __m512i,
5de29b
+			   __m512i, __m512i, __m512i, __m512i);
5de29b
+int
5de29b
+main (void)
5de29b
+{
5de29b
+  /* Run AVX512 test only if AVX512 is supported.  */
5de29b
+  if (avx512_enabled ())
5de29b
+    {
5de29b
+      __m512i zmm = _mm512_setzero_si512 ();
5de29b
+      __m512i ret = audit_test (zmm, zmm, zmm, zmm, zmm, zmm, zmm, zmm);
5de29b
+
5de29b
+      zmm = _mm512_set1_epi64 (0x12349876);
5de29b
+
5de29b
+      if (memcmp (&zmm, &ret, sizeof (ret)))
5de29b
+	abort ();
5de29b
+    }
5de29b
+  return 0;
5de29b
+}
5de29b
+#else
5de29b
+int
5de29b
+main (void)
5de29b
+{
5de29b
+  return 0;
5de29b
+}
5de29b
+#endif
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c
5de29b
--- glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c	1969-12-31 19:00:00.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c	2014-09-10 23:16:36.335167048 -0400
5de29b
@@ -0,0 +1,65 @@
5de29b
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
5de29b
+   This file is part of the GNU C Library.
5de29b
+
5de29b
+   The GNU C Library is free software; you can redistribute it and/or
5de29b
+   modify it under the terms of the GNU Lesser General Public
5de29b
+   License as published by the Free Software Foundation; either
5de29b
+   version 2.1 of the License, or (at your option) any later version.
5de29b
+
5de29b
+   The GNU C Library is distributed in the hope that it will be useful,
5de29b
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
5de29b
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
5de29b
+   Lesser General Public License for more details.
5de29b
+
5de29b
+   You should have received a copy of the GNU Lesser General Public
5de29b
+   License along with the GNU C Library; if not, see
5de29b
+   <http://www.gnu.org/licenses/>.  */
5de29b
+
5de29b
+/* Test case for x86-64 preserved registers in dynamic linker.  */
5de29b
+
5de29b
+#ifdef __AVX512F__
5de29b
+#include <stdlib.h>
5de29b
+#include <string.h>
5de29b
+#include <immintrin.h>
5de29b
+
5de29b
+__m512i
5de29b
+audit_test (__m512i x0, __m512i x1, __m512i x2, __m512i x3,
5de29b
+	    __m512i x4, __m512i x5, __m512i x6, __m512i x7)
5de29b
+{
5de29b
+  __m512i zmm;
5de29b
+
5de29b
+  zmm = _mm512_set1_epi64 (1);
5de29b
+  if (memcmp (&zmm, &x0, sizeof (zmm)))
5de29b
+    abort ();
5de29b
+
5de29b
+  zmm = _mm512_set1_epi64 (2);
5de29b
+  if (memcmp (&zmm, &x1, sizeof (zmm)))
5de29b
+    abort ();
5de29b
+
5de29b
+  zmm = _mm512_set1_epi64 (3);
5de29b
+  if (memcmp (&zmm, &x2, sizeof (zmm)))
5de29b
+    abort ();
5de29b
+
5de29b
+  zmm = _mm512_set1_epi64 (4);
5de29b
+  if (memcmp (&zmm, &x3, sizeof (zmm)))
5de29b
+    abort ();
5de29b
+
5de29b
+  zmm = _mm512_set1_epi64 (5);
5de29b
+  if (memcmp (&zmm, &x4, sizeof (zmm)))
5de29b
+    abort ();
5de29b
+
5de29b
+  zmm = _mm512_set1_epi64 (6);
5de29b
+  if (memcmp (&zmm, &x5, sizeof (zmm)))
5de29b
+    abort ();
5de29b
+
5de29b
+  zmm = _mm512_set1_epi64 (7);
5de29b
+  if (memcmp (&zmm, &x6, sizeof (zmm)))
5de29b
+    abort ();
5de29b
+
5de29b
+  zmm = _mm512_set1_epi64 (8);
5de29b
+  if (memcmp (&zmm, &x7, sizeof (zmm)))
5de29b
+    abort ();
5de29b
+
5de29b
+  return _mm512_setzero_si512 ();
5de29b
+}
5de29b
+#endif
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c
5de29b
--- glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c	1969-12-31 19:00:00.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c	2014-09-10 23:16:36.336167046 -0400
5de29b
@@ -0,0 +1,219 @@
5de29b
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
5de29b
+   This file is part of the GNU C Library.
5de29b
+
5de29b
+   The GNU C Library is free software; you can redistribute it and/or
5de29b
+   modify it under the terms of the GNU Lesser General Public
5de29b
+   License as published by the Free Software Foundation; either
5de29b
+   version 2.1 of the License, or (at your option) any later version.
5de29b
+
5de29b
+   The GNU C Library is distributed in the hope that it will be useful,
5de29b
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
5de29b
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
5de29b
+   Lesser General Public License for more details.
5de29b
+
5de29b
+   You should have received a copy of the GNU Lesser General Public
5de29b
+   License along with the GNU C Library; if not, see
5de29b
+   <http://www.gnu.org/licenses/>.  */
5de29b
+
5de29b
+/* Verify that changing AVX512 registers in audit library won't affect
5de29b
+   function parameter passing/return.  */
5de29b
+
5de29b
+#include <dlfcn.h>
5de29b
+#include <stdint.h>
5de29b
+#include <stdio.h>
5de29b
+#include <stdlib.h>
5de29b
+#include <string.h>
5de29b
+#include <unistd.h>
5de29b
+#include <bits/wordsize.h>
5de29b
+#include <gnu/lib-names.h>
5de29b
+
5de29b
+unsigned int
5de29b
+la_version (unsigned int v)
5de29b
+{
5de29b
+  setlinebuf (stdout);
5de29b
+
5de29b
+  printf ("version: %u\n", v);
5de29b
+
5de29b
+  char buf[20];
5de29b
+  sprintf (buf, "%u", v);
5de29b
+
5de29b
+  return v;
5de29b
+}
5de29b
+
5de29b
+void
5de29b
+la_activity (uintptr_t *cookie, unsigned int flag)
5de29b
+{
5de29b
+  if (flag == LA_ACT_CONSISTENT)
5de29b
+    printf ("activity: consistent\n");
5de29b
+  else if (flag == LA_ACT_ADD)
5de29b
+    printf ("activity: add\n");
5de29b
+  else if (flag == LA_ACT_DELETE)
5de29b
+    printf ("activity: delete\n");
5de29b
+  else
5de29b
+    printf ("activity: unknown activity %u\n", flag);
5de29b
+}
5de29b
+
5de29b
+char *
5de29b
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
5de29b
+{
5de29b
+  char buf[100];
5de29b
+  const char *flagstr;
5de29b
+  if (flag == LA_SER_ORIG)
5de29b
+    flagstr = "LA_SET_ORIG";
5de29b
+  else if (flag == LA_SER_LIBPATH)
5de29b
+    flagstr = "LA_SER_LIBPATH";
5de29b
+  else if (flag == LA_SER_RUNPATH)
5de29b
+    flagstr = "LA_SER_RUNPATH";
5de29b
+  else if (flag == LA_SER_CONFIG)
5de29b
+    flagstr = "LA_SER_CONFIG";
5de29b
+  else if (flag == LA_SER_DEFAULT)
5de29b
+    flagstr = "LA_SER_DEFAULT";
5de29b
+  else if (flag == LA_SER_SECURE)
5de29b
+    flagstr = "LA_SER_SECURE";
5de29b
+  else
5de29b
+    {
5de29b
+       sprintf (buf, "unknown flag %d", flag);
5de29b
+       flagstr = buf;
5de29b
+    }
5de29b
+  printf ("objsearch: %s, %s\n", name, flagstr);
5de29b
+
5de29b
+  return (char *) name;
5de29b
+}
5de29b
+
5de29b
+unsigned int
5de29b
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
5de29b
+{
5de29b
+  printf ("objopen: %ld, %s\n", lmid, l->l_name);
5de29b
+
5de29b
+  return 3;
5de29b
+}
5de29b
+
5de29b
+void
5de29b
+la_preinit (uintptr_t *cookie)
5de29b
+{
5de29b
+  printf ("preinit\n");
5de29b
+}
5de29b
+
5de29b
+unsigned int
5de29b
+la_objclose  (uintptr_t *cookie)
5de29b
+{
5de29b
+  printf ("objclose\n");
5de29b
+  return 0;
5de29b
+}
5de29b
+
5de29b
+uintptr_t
5de29b
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
5de29b
+	      uintptr_t *defcook, unsigned int *flags, const char *symname)
5de29b
+{
5de29b
+  printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
5de29b
+	  symname, (long int) sym->st_value, ndx, *flags);
5de29b
+
5de29b
+  return sym->st_value;
5de29b
+}
5de29b
+
5de29b
+#include <tst-audit.h>
5de29b
+
5de29b
+#ifdef __AVX512F__
5de29b
+#include <immintrin.h>
5de29b
+#include <cpuid.h>
5de29b
+
5de29b
+static int
5de29b
+check_avx512 (void)
5de29b
+{
5de29b
+  unsigned int eax, ebx, ecx, edx;
5de29b
+
5de29b
+  if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
5de29b
+      || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
5de29b
+    return 0;
5de29b
+
5de29b
+  __cpuid_count (7, 0, eax, ebx, ecx, edx);
5de29b
+  if (!(ebx & bit_AVX512F))
5de29b
+    return 0;
5de29b
+
5de29b
+  asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
5de29b
+
5de29b
+  /* Verify that ZMM, YMM and XMM states are enabled.  */
5de29b
+  return (eax & 0xe6) == 0xe6;
5de29b
+}
5de29b
+
5de29b
+#else
5de29b
+#include <emmintrin.h>
5de29b
+#endif
5de29b
+
5de29b
+ElfW(Addr)
5de29b
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
5de29b
+	  uintptr_t *defcook, La_regs *regs, unsigned int *flags,
5de29b
+	  const char *symname, long int *framesizep)
5de29b
+{
5de29b
+  printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
5de29b
+	  symname, (long int) sym->st_value, ndx, *flags);
5de29b
+
5de29b
+#ifdef __AVX512F__
5de29b
+  if (check_avx512 () && strcmp (symname, "audit_test") == 0)
5de29b
+    {
5de29b
+      __m512i zero = _mm512_setzero_si512 ();
5de29b
+      if (memcmp (&regs->lr_vector[0], &zero, sizeof (zero))
5de29b
+	  || memcmp (&regs->lr_vector[1], &zero, sizeof (zero))
5de29b
+	  || memcmp (&regs->lr_vector[2], &zero, sizeof (zero))
5de29b
+	  || memcmp (&regs->lr_vector[3], &zero, sizeof (zero))
5de29b
+	  || memcmp (&regs->lr_vector[4], &zero, sizeof (zero))
5de29b
+	  || memcmp (&regs->lr_vector[5], &zero, sizeof (zero))
5de29b
+	  || memcmp (&regs->lr_vector[6], &zero, sizeof (zero))
5de29b
+	  || memcmp (&regs->lr_vector[7], &zero, sizeof (zero)))
5de29b
+	abort ();
5de29b
+
5de29b
+      for (int i = 0; i < 8; i++)
5de29b
+	regs->lr_vector[i].zmm[0]
5de29b
+	  = (La_x86_64_zmm) _mm512_set1_epi64 (i + 1);
5de29b
+
5de29b
+      __m512i zmm = _mm512_set1_epi64 (-1);
5de29b
+      asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" );
5de29b
+      asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" );
5de29b
+      asm volatile ("vmovdqa64 %0, %%zmm2" : : "x" (zmm) : "xmm2" );
5de29b
+      asm volatile ("vmovdqa64 %0, %%zmm3" : : "x" (zmm) : "xmm3" );
5de29b
+      asm volatile ("vmovdqa64 %0, %%zmm4" : : "x" (zmm) : "xmm4" );
5de29b
+      asm volatile ("vmovdqa64 %0, %%zmm5" : : "x" (zmm) : "xmm5" );
5de29b
+      asm volatile ("vmovdqa64 %0, %%zmm6" : : "x" (zmm) : "xmm6" );
5de29b
+      asm volatile ("vmovdqa64 %0, %%zmm7" : : "x" (zmm) : "xmm7" );
5de29b
+
5de29b
+      *framesizep = 1024;
5de29b
+    }
5de29b
+#endif
5de29b
+
5de29b
+  return sym->st_value;
5de29b
+}
5de29b
+
5de29b
+unsigned int
5de29b
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
5de29b
+	 uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
5de29b
+	 const char *symname)
5de29b
+{
5de29b
+  printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
5de29b
+	  symname, (long int) sym->st_value, ndx,
5de29b
+	  (ptrdiff_t) outregs->int_retval);
5de29b
+
5de29b
+#ifdef __AVX512F__
5de29b
+  if (check_avx512 () && strcmp (symname, "audit_test") == 0)
5de29b
+    {
5de29b
+      __m512i zero = _mm512_setzero_si512 ();
5de29b
+      if (memcmp (&outregs->lrv_vector0, &zero, sizeof (zero)))
5de29b
+	abort ();
5de29b
+
5de29b
+      for (int i = 0; i < 8; i++)
5de29b
+	{
5de29b
+	  __m512i zmm = _mm512_set1_epi64 (i + 1);
5de29b
+	  if (memcmp (&inregs->lr_vector[i], &zmm, sizeof (zmm)) != 0)
5de29b
+	    abort ();
5de29b
+	}
5de29b
+
5de29b
+      outregs->lrv_vector0.zmm[0]
5de29b
+	= (La_x86_64_zmm) _mm512_set1_epi64 (0x12349876);
5de29b
+
5de29b
+      __m512i zmm = _mm512_set1_epi64 (-1);
5de29b
+      asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" );
5de29b
+      asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" );
5de29b
+    }
5de29b
+#endif
5de29b
+
5de29b
+  return 0;
5de29b
+}
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86/Makefile glibc-2.17-c758a686/sysdeps/x86/Makefile
5de29b
--- glibc-2.17-c758a686/sysdeps/x86/Makefile	2012-12-24 22:02:13.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86/Makefile	2014-09-11 16:06:03.121319867 -0400
5de29b
@@ -2,8 +2,8 @@
5de29b
 CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
5de29b
 		   -mno-sse -mno-mmx)
5de29b
 
5de29b
-tests: $(objpfx)tst-xmmymm.out
5de29b
-$(objpfx)tst-xmmymm.out: ../sysdeps/x86/tst-xmmymm.sh $(objpfx)ld.so
5de29b
+tests: $(objpfx)tst-xmmymmzmm.out
5de29b
+$(objpfx)tst-xmmymmzmm.out: ../sysdeps/x86/tst-xmmymmzmm.sh $(objpfx)ld.so
5de29b
 	@echo "Checking ld.so for SSE register use.  This will take a few seconds..."
5de29b
 	$(SHELL) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@
5de29b
 endif
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh
5de29b
--- glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh	2012-12-24 22:02:13.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh	1969-12-31 19:00:00.000000000 -0500
5de29b
@@ -1,103 +0,0 @@
5de29b
-#! /bin/bash
5de29b
-# Make sure no code in ld.so uses xmm/ymm registers on x86-64.
5de29b
-# Copyright (C) 2009-2012 Free Software Foundation, Inc.
5de29b
-# This file is part of the GNU C Library.
5de29b
-
5de29b
-# The GNU C Library is free software; you can redistribute it and/or
5de29b
-# modify it under the terms of the GNU Lesser General Public
5de29b
-# License as published by the Free Software Foundation; either
5de29b
-# version 2.1 of the License, or (at your option) any later version.
5de29b
-
5de29b
-# The GNU C Library is distributed in the hope that it will be useful,
5de29b
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
5de29b
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
5de29b
-# Lesser General Public License for more details.
5de29b
-
5de29b
-# You should have received a copy of the GNU Lesser General Public
5de29b
-# License along with the GNU C Library; if not, see
5de29b
-# <http://www.gnu.org/licenses/>.
5de29b
-
5de29b
-set -e
5de29b
-
5de29b
-objpfx="$1"
5de29b
-NM="$2"
5de29b
-OBJDUMP="$3"
5de29b
-READELF="$4"
5de29b
-
5de29b
-tmp=$(mktemp ${objpfx}tst-xmmymm.XXXXXX)
5de29b
-trap 'rm -f "$tmp"' 1 2 3 15
5de29b
-
5de29b
-# List of object files we have to test
5de29b
-rtldobjs=$($READELF -W -wi ${objpfx}dl-allobjs.os |
5de29b
-    awk '/^ </ { if ($5 == "(DW_TAG_compile_unit)") c=1; else c=0 } $2 == "DW_AT_name" { if (c == 1) print $NF }' |
5de29b
-    sed 's,\(.*/\|\)\([_[:alnum:]-]*[.]\).$,\2os,')
5de29b
-rtldobjs="$rtldobjs $(ar t ${objpfx}rtld-libc.a)"
5de29b
-
5de29b
-# OBJECT symbols can be ignored.
5de29b
-$READELF -sW ${objpfx}dl-allobjs.os ${objpfx}rtld-libc.a |
5de29b
-egrep " OBJECT  *GLOBAL " |
5de29b
-awk '{if ($7 != "ABS") print $8 }' |
5de29b
-sort -u > "$tmp"
5de29b
-declare -a objects
5de29b
-objects=($(cat "$tmp"))
5de29b
-
5de29b
-objs="dl-runtime.os"
5de29b
-tocheck="dl-runtime.os"
5de29b
-
5de29b
-while test -n "$objs"; do
5de29b
-  this="$objs"
5de29b
-  objs=""
5de29b
-
5de29b
-  for f in $this; do
5de29b
-    undef=$($NM -u "$objpfx"../*/"$f" | awk '{print $2}')
5de29b
-    if test -n "$undef"; then
5de29b
-      for s in $undef; do
5de29b
-	for obj in ${objects[*]} "_GLOBAL_OFFSET_TABLE_"; do
5de29b
-	  if test "$obj" = "$s"; then
5de29b
-	    continue 2
5de29b
-	  fi
5de29b
-	done
5de29b
-        for o in $rtldobjs; do
5de29b
-	  ro=$(echo "$objpfx"../*/"$o")
5de29b
-	  if $NM -g --defined-only "$ro" | egrep -qs " $s\$"; then
5de29b
-	    if ! (echo "$tocheck $objs" | fgrep -qs "$o"); then
5de29b
-	      echo "$o needed for $s"
5de29b
-	      objs="$objs $o"
5de29b
-	    fi
5de29b
-	    break;
5de29b
-	  fi
5de29b
-	done
5de29b
-      done
5de29b
-    fi
5de29b
-  done
5de29b
-  tocheck="$tocheck$objs"
5de29b
-done
5de29b
-
5de29b
-echo
5de29b
-echo
5de29b
-echo "object files needed: $tocheck"
5de29b
-
5de29b
-cp /dev/null "$tmp"
5de29b
-for f in $tocheck; do
5de29b
-  $OBJDUMP -d "$objpfx"../*/"$f" |
5de29b
-  awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' |
5de29b
-  while read fct; do
5de29b
-    if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then
5de29b
-      continue;
5de29b
-    fi
5de29b
-    echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
5de29b
-    result=1
5de29b
-  done
5de29b
-done
5de29b
-
5de29b
-if test -s "$tmp"; then
5de29b
-  echo
5de29b
-  echo
5de29b
-  cat "$tmp"
5de29b
-  result=1
5de29b
-else
5de29b
-  result=0
5de29b
-fi
5de29b
-
5de29b
-rm "$tmp"
5de29b
-exit $result
12745e
diff -urN glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh
5de29b
--- glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh	1969-12-31 19:00:00.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh	2014-09-11 16:05:10.073426623 -0400
5de29b
@@ -0,0 +1,103 @@
5de29b
+#! /bin/bash
5de29b
+# Make sure no code in ld.so uses xmm/ymm/zmm registers on x86-64.
5de29b
+# Copyright (C) 2009-2012 Free Software Foundation, Inc.
5de29b
+# This file is part of the GNU C Library.
5de29b
+
5de29b
+# The GNU C Library is free software; you can redistribute it and/or
5de29b
+# modify it under the terms of the GNU Lesser General Public
5de29b
+# License as published by the Free Software Foundation; either
5de29b
+# version 2.1 of the License, or (at your option) any later version.
5de29b
+
5de29b
+# The GNU C Library is distributed in the hope that it will be useful,
5de29b
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
5de29b
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
5de29b
+# Lesser General Public License for more details.
5de29b
+
5de29b
+# You should have received a copy of the GNU Lesser General Public
5de29b
+# License along with the GNU C Library; if not, see
5de29b
+# <http://www.gnu.org/licenses/>.
5de29b
+
5de29b
+set -e
5de29b
+
5de29b
+objpfx="$1"
5de29b
+NM="$2"
5de29b
+OBJDUMP="$3"
5de29b
+READELF="$4"
5de29b
+
5de29b
+tmp=$(mktemp ${objpfx}tst-xmmymmzmm.XXXXXX)
5de29b
+trap 'rm -f "$tmp"' 1 2 3 15
5de29b
+
5de29b
+# List of object files we have to test
5de29b
+rtldobjs=$($READELF -W -wi ${objpfx}dl-allobjs.os |
5de29b
+    awk '/^ </ { if ($5 == "(DW_TAG_compile_unit)") c=1; else c=0 } $2 == "DW_AT_name" { if (c == 1) print $NF }' |
5de29b
+    sed 's,\(.*/\|\)\([_[:alnum:]-]*[.]\).$,\2os,')
5de29b
+rtldobjs="$rtldobjs $(ar t ${objpfx}rtld-libc.a)"
5de29b
+
5de29b
+# OBJECT symbols can be ignored.
5de29b
+$READELF -sW ${objpfx}dl-allobjs.os ${objpfx}rtld-libc.a |
5de29b
+egrep " OBJECT  *GLOBAL " |
5de29b
+awk '{if ($7 != "ABS") print $8 }' |
5de29b
+sort -u > "$tmp"
5de29b
+declare -a objects
5de29b
+objects=($(cat "$tmp"))
5de29b
+
5de29b
+objs="dl-runtime.os"
5de29b
+tocheck="dl-runtime.os"
5de29b
+
5de29b
+while test -n "$objs"; do
5de29b
+  this="$objs"
5de29b
+  objs=""
5de29b
+
5de29b
+  for f in $this; do
5de29b
+    undef=$($NM -u "$objpfx"../*/"$f" | awk '{print $2}')
5de29b
+    if test -n "$undef"; then
5de29b
+      for s in $undef; do
5de29b
+	for obj in ${objects[*]} "_GLOBAL_OFFSET_TABLE_"; do
5de29b
+	  if test "$obj" = "$s"; then
5de29b
+	    continue 2
5de29b
+	  fi
5de29b
+	done
5de29b
+        for o in $rtldobjs; do
5de29b
+	  ro=$(echo "$objpfx"../*/"$o")
5de29b
+	  if $NM -g --defined-only "$ro" | egrep -qs " $s\$"; then
5de29b
+	    if ! (echo "$tocheck $objs" | fgrep -qs "$o"); then
5de29b
+	      echo "$o needed for $s"
5de29b
+	      objs="$objs $o"
5de29b
+	    fi
5de29b
+	    break;
5de29b
+	  fi
5de29b
+	done
5de29b
+      done
5de29b
+    fi
5de29b
+  done
5de29b
+  tocheck="$tocheck$objs"
5de29b
+done
5de29b
+
5de29b
+echo
5de29b
+echo
5de29b
+echo "object files needed: $tocheck"
5de29b
+
5de29b
+cp /dev/null "$tmp"
5de29b
+for f in $tocheck; do
5de29b
+  $OBJDUMP -d "$objpfx"../*/"$f" |
5de29b
+  awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xyz]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' |
5de29b
+  while read fct; do
5de29b
+    if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then
5de29b
+      continue;
5de29b
+    fi
5de29b
+    echo "function $fct in $f modifies xmm/ymm/zmm" >> "$tmp"
5de29b
+    result=1
5de29b
+  done
5de29b
+done
5de29b
+
5de29b
+if test -s "$tmp"; then
5de29b
+  echo
5de29b
+  echo
5de29b
+  cat "$tmp"
5de29b
+  result=1
5de29b
+else
5de29b
+  result=0
5de29b
+fi
5de29b
+
5de29b
+rm "$tmp"
5de29b
+exit $result