#
# AVX-512 support for glibc:
#
# Notes: Renamed configure.ac changes to configure.in.
#
# commit aa4de9cea5c07d43caeaca9722c2d417e9a2919c
# Author: H.J. Lu <hjl.tools@gmail.com>
# Date: Fri Mar 14 08:51:25 2014 -0700
#
# Check AVX-512 assembler support first
#
# It checks AVX-512 assembler support first and sets libc_cv_cc_avx512 to
# $libc_cv_asm_avx512, instead of yes. GCC won't support AVX-512 if
# assembler doesn't support it.
#
# * sysdeps/x86_64/configure.ac: Check AVX-512 assembler support
# first. Disable AVX-512 GCC support if assembler doesn't support
# it.
# * sysdeps/x86_64/configure: Regenerated.
#
# commit 2d63a517e4084ec80403cd9f278690fa8b676cc4
# Author: Igor Zamyatin <igor.zamyatin@intel.com>
# Date: Thu Mar 13 11:10:22 2014 -0700
#
# Save and restore AVX-512 zmm registers to x86-64 ld.so
#
# AVX-512 ISA adds 512-bit zmm registers. This patch updates
# _dl_runtime_profile to pass zmm registers to run-time audit. It also
# changes _dl_x86_64_save_sse and _dl_x86_64_restore_sse to upport zmm
# registers, which are called when only when RTLD_PREPARE_FOREIGN_CALL
# is used. Its performance impact is minimum.
#
# * config.h.in (HAVE_AVX512_SUPPORT): New #undef.
# (HAVE_AVX512_ASM_SUPPORT): Likewise.
# * sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New.
# (La_x86_64_vector): Add zmm.
# * sysdeps/x86_64/Makefile (tests): Add tst-audit10.
# (modules-names): Add tst-auditmod10a and tst-auditmod10b.
# ($(objpfx)tst-audit10): New target.
# ($(objpfx)tst-audit10.out): Likewise.
# (tst-audit10-ENV): New.
# (AVX512-CFLAGS): Likewise.
# (CFLAGS-tst-audit10.c): Likewise.
# (CFLAGS-tst-auditmod10a.c): Likewise.
# (CFLAGS-tst-auditmod10b.c): Likewise.
# * sysdeps/x86_64/configure.ac: Set config-cflags-avx512,
# HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT.
# * sysdeps/x86_64/configure: Regenerated.
# * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add
# AVX-512 zmm register support.
# (_dl_x86_64_save_sse): Likewise.
# (_dl_x86_64_restore_sse): Likewise.
# * sysdeps/x86_64/dl-trampoline.h: Updated to support different
# size vector registers.
# * sysdeps/x86_64/link-defines.sym (YMM_SIZE): New.
# (ZMM_SIZE): Likewise.
# * sysdeps/x86_64/tst-audit10.c: New file.
# * sysdeps/x86_64/tst-auditmod10a.c: Likewise.
# * sysdeps/x86_64/tst-auditmod10b.c: Likewise.
#
# In addition adds:
# https://sourceware.org/ml/libc-alpha/2014-09/msg00228.html
# To extend zmm register checking.
#
diff -urN glibc-2.17-c758a686/config.h.in glibc-2.17-c758a686.mod-mpx/config.h.in
--- glibc-2.17-c758a686/config.h.in 2014-09-10 23:11:14.605787816 -0400
+++ glibc-2.17-c758a686.mod-mpx/config.h.in 2014-09-10 23:16:36.331167056 -0400
@@ -101,6 +101,12 @@
/* Define if gcc supports VEX encoding. */
#undef HAVE_SSE2AVX_SUPPORT
+/* Define if compiler supports AVX512. */
+#undef HAVE_AVX512_SUPPORT
+
+/* Define if assembler supports AVX512. */
+#undef HAVE_AVX512_ASM_SUPPORT
+
/* Define if gcc supports FMA4. */
#undef HAVE_FMA4_SUPPORT
diff -urN glibc-2.17-c758a686/sysdeps/x86/bits/link.h glibc-2.17-c758a686.mod-mpx/sysdeps/x86/bits/link.h
--- glibc-2.17-c758a686/sysdeps/x86/bits/link.h 2012-12-24 22:02:13.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86/bits/link.h 2014-09-10 23:16:36.331167056 -0400
@@ -66,6 +66,8 @@
typedef float La_x86_64_xmm __attribute__ ((__vector_size__ (16)));
typedef float La_x86_64_ymm
__attribute__ ((__vector_size__ (32), __aligned__ (16)));
+typedef double La_x86_64_zmm
+ __attribute__ ((__vector_size__ (64), __aligned__ (16)));
# else
typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__)));
# endif
@@ -74,6 +76,7 @@
{
# if __GNUC_PREREQ (4,0)
La_x86_64_ymm ymm[2];
+ La_x86_64_zmm zmm[1];
# endif
La_x86_64_xmm xmm[4];
} La_x86_64_vector __attribute__ ((__aligned__ (16)));
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/configure glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/configure
--- glibc-2.17-c758a686/sysdeps/x86_64/configure 2014-09-10 23:11:15.000787061 -0400
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/configure 2014-09-10 23:16:36.338167042 -0400
@@ -91,6 +91,59 @@
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support in assembler" >&5
+$as_echo_n "checking for AVX512 support in assembler... " >&6; }
+if ${libc_cv_asm_avx512+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest.s <<\EOF
+ vmovdqu64 %zmm0, (%rsp)
+EOF
+if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ libc_cv_asm_avx512=yes
+else
+ libc_cv_asm_avx512=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512" >&5
+$as_echo "$libc_cv_asm_avx512" >&6; }
+if test $libc_cv_asm_avx512 == yes; then
+ $as_echo "#define HAVE_AVX512_ASM_SUPPORT 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support" >&5
+$as_echo_n "checking for AVX512 support... " >&6; }
+if ${libc_cv_cc_avx512+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if { ac_try='${CC-cc} -mavx512f -xc /dev/null -S -o /dev/null'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ libc_cv_cc_avx512=$libc_cv_asm_avx512
+else
+ libc_cv_cc_avx512=no
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx512" >&5
+$as_echo "$libc_cv_cc_avx512" >&6; }
+if test $libc_cv_cc_avx512 = yes; then
+ $as_echo "#define HAVE_AVX512_SUPPORT 1" >>confdefs.h
+
+fi
+config_vars="$config_vars
+config-cflags-avx512 = $libc_cv_cc_avx512"
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
if ${libc_cv_cc_sse2avx+:} false; then :
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/configure.in glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/configure.in
--- glibc-2.17-c758a686/sysdeps/x86_64/configure.in 2012-12-24 22:02:13.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/configure.in 2014-09-10 23:16:36.338167042 -0400
@@ -21,6 +21,30 @@
AC_DEFINE(HAVE_AVX_SUPPORT)
fi
+dnl Check if asm supports AVX512.
+AC_CACHE_CHECK(for AVX512 support in assembler, libc_cv_asm_avx512, [dnl
+cat > conftest.s <<\EOF
+ vmovdqu64 %zmm0, (%rsp)
+EOF
+if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
+ libc_cv_asm_avx512=yes
+else
+ libc_cv_asm_avx512=no
+fi
+rm -f conftest*])
+if test $libc_cv_asm_avx512 == yes; then
+ AC_DEFINE(HAVE_AVX512_ASM_SUPPORT)
+fi
+
+dnl Check if -mavx512f works.
+AC_CACHE_CHECK(for AVX512 support, libc_cv_cc_avx512, [dnl
+LIBC_TRY_CC_OPTION([-mavx512f], [libc_cv_cc_avx512=$libc_cv_asm_avx512], [libc_cv_cc_avx512=no])
+])
+if test $libc_cv_cc_avx512 = yes; then
+ AC_DEFINE(HAVE_AVX512_SUPPORT)
+fi
+LIBC_CONFIG_VAR([config-cflags-avx512], [$libc_cv_cc_avx512])
+
dnl Check if -msse2avx works.
AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
LIBC_TRY_CC_OPTION([-msse2avx],
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/configure.in.orig glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/configure.in.orig
--- glibc-2.17-c758a686/sysdeps/x86_64/configure.in.orig 1969-12-31 19:00:00.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/configure.in.orig 2014-09-10 23:16:28.418182701 -0400
@@ -0,0 +1,52 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/x86_64.
+
+AC_CHECK_HEADER([cpuid.h], ,
+ [AC_MSG_ERROR([gcc must provide the <cpuid.h> header])],
+ [/* No default includes. */])
+
+dnl Check if -msse4 works.
+AC_CACHE_CHECK(for SSE4 support, libc_cv_cc_sse4, [dnl
+LIBC_TRY_CC_OPTION([-msse4], [libc_cv_cc_sse4=yes], [libc_cv_cc_sse4=no])
+])
+if test $libc_cv_cc_sse4 = yes; then
+ AC_DEFINE(HAVE_SSE4_SUPPORT)
+fi
+
+dnl Check if -mavx works.
+AC_CACHE_CHECK(for AVX support, libc_cv_cc_avx, [dnl
+LIBC_TRY_CC_OPTION([-mavx], [libc_cv_cc_avx=yes], [libc_cv_cc_avx=no])
+])
+if test $libc_cv_cc_avx = yes; then
+ AC_DEFINE(HAVE_AVX_SUPPORT)
+fi
+
+dnl Check if -msse2avx works.
+AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
+LIBC_TRY_CC_OPTION([-msse2avx],
+ [libc_cv_cc_sse2avx=yes],
+ [libc_cv_cc_sse2avx=no])
+])
+if test $libc_cv_cc_sse2avx = yes; then
+ AC_DEFINE(HAVE_SSE2AVX_SUPPORT)
+fi
+
+dnl Check if -mfma4 works.
+AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
+LIBC_TRY_CC_OPTION([-mfma4], [libc_cv_cc_fma4=yes], [libc_cv_cc_fma4=no])
+])
+if test $libc_cv_cc_fma4 = yes; then
+ AC_DEFINE(HAVE_FMA4_SUPPORT)
+fi
+
+dnl Check if -mno-vzeroupper works.
+AC_CACHE_CHECK(for -mno-vzeroupper support, libc_cv_cc_novzeroupper, [dnl
+LIBC_TRY_CC_OPTION([-mno-vzeroupper],
+ [libc_cv_cc_novzeroupper=yes],
+ [libc_cv_cc_novzeroupper=no])
+])
+
+dnl It is always possible to access static and hidden symbols in an
+dnl position independent way.
+AC_DEFINE(PI_STATIC_AND_HIDDEN)
+# work around problem with autoconf and empty lines at the end of files
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/configure.orig glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/configure.orig
--- glibc-2.17-c758a686/sysdeps/x86_64/configure.orig 1969-12-31 19:00:00.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/configure.orig 2014-09-10 23:16:28.419182699 -0400
@@ -0,0 +1,164 @@
+
+# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists and can be compiled using the include files in
+# INCLUDES, setting the cache variable VAR accordingly.
+ac_fn_c_check_header_compile ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ eval "$3=yes"
+else
+ eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_compile
+# This file is generated from configure.in by Autoconf. DO NOT EDIT!
+ # Local configure fragment for sysdeps/x86_64.
+
+
+ac_fn_c_check_header_compile "$LINENO" "cpuid.h" "ac_cv_header_cpuid_h" "/* No default includes. */
+"
+if test "x$ac_cv_header_cpuid_h" = xyes; then :
+
+else
+ as_fn_error $? "gcc must provide the <cpuid.h> header" "$LINENO" 5
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SSE4 support" >&5
+$as_echo_n "checking for SSE4 support... " >&6; }
+if ${libc_cv_cc_sse4+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if { ac_try='${CC-cc} -msse4 -xc /dev/null -S -o /dev/null'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ libc_cv_cc_sse4=yes
+else
+ libc_cv_cc_sse4=no
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse4" >&5
+$as_echo "$libc_cv_cc_sse4" >&6; }
+if test $libc_cv_cc_sse4 = yes; then
+ $as_echo "#define HAVE_SSE4_SUPPORT 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX support" >&5
+$as_echo_n "checking for AVX support... " >&6; }
+if ${libc_cv_cc_avx+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if { ac_try='${CC-cc} -mavx -xc /dev/null -S -o /dev/null'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ libc_cv_cc_avx=yes
+else
+ libc_cv_cc_avx=no
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx" >&5
+$as_echo "$libc_cv_cc_avx" >&6; }
+if test $libc_cv_cc_avx = yes; then
+ $as_echo "#define HAVE_AVX_SUPPORT 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
+$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
+if ${libc_cv_cc_sse2avx+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ libc_cv_cc_sse2avx=yes
+else
+ libc_cv_cc_sse2avx=no
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5
+$as_echo "$libc_cv_cc_sse2avx" >&6; }
+if test $libc_cv_cc_sse2avx = yes; then
+ $as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
+$as_echo_n "checking for FMA4 support... " >&6; }
+if ${libc_cv_cc_fma4+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if { ac_try='${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ libc_cv_cc_fma4=yes
+else
+ libc_cv_cc_fma4=no
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_fma4" >&5
+$as_echo "$libc_cv_cc_fma4" >&6; }
+if test $libc_cv_cc_fma4 = yes; then
+ $as_echo "#define HAVE_FMA4_SUPPORT 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -mno-vzeroupper support" >&5
+$as_echo_n "checking for -mno-vzeroupper support... " >&6; }
+if ${libc_cv_cc_novzeroupper+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if { ac_try='${CC-cc} -mno-vzeroupper -xc /dev/null -S -o /dev/null'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ libc_cv_cc_novzeroupper=yes
+else
+ libc_cv_cc_novzeroupper=no
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_novzeroupper" >&5
+$as_echo "$libc_cv_cc_novzeroupper" >&6; }
+
+$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
+
+# work around problem with autoconf and empty lines at the end of files
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/dl-trampoline.h
--- glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h 2012-12-24 22:02:13.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/dl-trampoline.h 2014-09-10 23:16:36.334167050 -0400
@@ -19,14 +19,14 @@
#ifdef RESTORE_AVX
/* This is to support AVX audit modules. */
- vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp)
- vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
- vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
- vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
- vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
- vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
- vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
- vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
+ VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp)
+ VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
+ VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
+ VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
+ VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
+ VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
+ VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
+ VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
/* Save xmm0-xmm7 registers to detect if any of them are
changed by audit module. */
@@ -72,7 +72,7 @@
je 2f
vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
+2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
@@ -81,7 +81,7 @@
je 2f
vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
@@ -90,7 +90,7 @@
je 2f
vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
@@ -99,7 +99,7 @@
je 2f
vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
@@ -108,7 +108,7 @@
je 2f
vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
@@ -117,7 +117,7 @@
je 2f
vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
@@ -126,7 +126,7 @@
je 2f
vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
@@ -135,7 +135,7 @@
je 2f
vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
1:
@@ -213,8 +213,8 @@
#ifdef RESTORE_AVX
/* This is to support AVX audit modules. */
- vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
- vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
+ VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
+ VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
/* Save xmm0/xmm1 registers to detect if they are changed
by audit module. */
@@ -243,13 +243,13 @@
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
jne 1f
- vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
+ VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
jne 1f
- vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
+ VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
1:
#endif
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/dl-trampoline.S
--- glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S 2012-12-24 22:02:13.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/dl-trampoline.S 2014-09-10 23:16:36.334167050 -0400
@@ -96,7 +96,7 @@
/* Actively align the La_x86_64_regs structure. */
andq $0xfffffffffffffff0, %rsp
-# ifdef HAVE_AVX_SUPPORT
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
/* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
to detect if any xmm0-xmm7 registers are changed by audit
module. */
@@ -130,7 +130,7 @@
movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
-# ifdef HAVE_AVX_SUPPORT
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
.data
L(have_avx):
.zero 4
@@ -138,7 +138,7 @@
.previous
cmpl $0, L(have_avx)(%rip)
- jne 1f
+ jne L(defined)
movq %rbx, %r11 # Save rbx
movl $1, %eax
cpuid
@@ -147,18 +147,54 @@
// AVX and XSAVE supported?
andl $((1 << 28) | (1 << 27)), %ecx
cmpl $((1 << 28) | (1 << 27)), %ecx
- jne 2f
+ jne 10f
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ // AVX512 supported in processor?
+ movq %rbx, %r11 # Save rbx
+ xorl %ecx, %ecx
+ mov $0x7, %eax
+ cpuid
+ andl $(1 << 16), %ebx
+# endif
xorl %ecx, %ecx
// Get XFEATURE_ENABLED_MASK
xgetbv
- andl $0x6, %eax
-2: subl $0x5, %eax
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ test %ebx, %ebx
+ movq %r11, %rbx # Restore rbx
+ je 20f
+ // Verify that XCR0[7:5] = '111b' and
+ // XCR0[2:1] = '11b' which means
+ // that zmm state is enabled
+ andl $0xe6, %eax
+ cmpl $0xe6, %eax
+ jne 20f
+ movl %eax, L(have_avx)(%rip)
+L(avx512):
+# define RESTORE_AVX
+# define VMOV vmovdqu64
+# define VEC(i) zmm##i
+# define MORE_CODE
+# include "dl-trampoline.h"
+# undef VMOV
+# undef VEC
+# undef RESTORE_AVX
+# endif
+20: andl $0x6, %eax
+10: subl $0x5, %eax
movl %eax, L(have_avx)(%rip)
cmpl $0, %eax
-1: js L(no_avx)
+L(defined):
+ js L(no_avx)
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ cmpl $0xe6, L(have_avx)(%rip)
+ je L(avx512)
+# endif
# define RESTORE_AVX
+# define VMOV vmovdqu
+# define VEC(i) ymm##i
# define MORE_CODE
# include "dl-trampoline.h"
@@ -180,9 +216,9 @@
.align 16
cfi_startproc
_dl_x86_64_save_sse:
-# ifdef HAVE_AVX_SUPPORT
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
cmpl $0, L(have_avx)(%rip)
- jne 1f
+ jne L(defined_5)
movq %rbx, %r11 # Save rbx
movl $1, %eax
cpuid
@@ -191,21 +227,43 @@
// AVX and XSAVE supported?
andl $((1 << 28) | (1 << 27)), %ecx
cmpl $((1 << 28) | (1 << 27)), %ecx
- jne 2f
+ jne 1f
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ // AVX512 supported in a processor?
+ movq %rbx, %r11 # Save rbx
+ xorl %ecx,%ecx
+ mov $0x7,%eax
+ cpuid
+ andl $(1 << 16), %ebx
+# endif
xorl %ecx, %ecx
// Get XFEATURE_ENABLED_MASK
xgetbv
- andl $0x6, %eax
- cmpl $0x6, %eax
- // Nonzero if SSE and AVX state saving is enabled.
- sete %al
-2: leal -1(%eax,%eax), %eax
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ test %ebx, %ebx
+ movq %r11, %rbx # Restore rbx
+ je 2f
+ // Verify that XCR0[7:5] = '111b' and
+ // XCR0[2:1] = '11b' which means
+ // that zmm state is enabled
+ andl $0xe6, %eax
+ movl %eax, L(have_avx)(%rip)
+ cmpl $0xe6, %eax
+ je L(avx512_5)
+# endif
+
+2: andl $0x6, %eax
+1: subl $0x5, %eax
movl %eax, L(have_avx)(%rip)
cmpl $0, %eax
-1: js L(no_avx5)
+L(defined_5):
+ js L(no_avx5)
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ cmpl $0xe6, L(have_avx)(%rip)
+ je L(avx512_5)
+# endif
-# define YMM_SIZE 32
vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
@@ -215,6 +273,18 @@
vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
ret
+# ifdef HAVE_AVX512_ASM_SUPPORT
+L(avx512_5):
+ vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE
+ vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE
+ vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE
+ vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE
+ vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE
+ vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE
+ vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE
+ vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE
+ ret
+# endif
L(no_avx5):
# endif
movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
@@ -235,9 +305,13 @@
.align 16
cfi_startproc
_dl_x86_64_restore_sse:
-# ifdef HAVE_AVX_SUPPORT
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
cmpl $0, L(have_avx)(%rip)
js L(no_avx6)
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ cmpl $0xe6, L(have_avx)(%rip)
+ je L(avx512_6)
+# endif
vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
@@ -248,6 +322,18 @@
vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
ret
+# ifdef HAVE_AVX512_ASM_SUPPORT
+L(avx512_6):
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7
+ ret
+# endif
L(no_avx6):
# endif
movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/link-defines.sym
--- glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym 2012-12-24 22:02:13.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/link-defines.sym 2014-09-10 23:16:36.335167048 -0400
@@ -4,6 +4,8 @@
--
VECTOR_SIZE sizeof (La_x86_64_vector)
XMM_SIZE sizeof (La_x86_64_xmm)
+YMM_SIZE sizeof (La_x86_64_ymm)
+ZMM_SIZE sizeof (La_x86_64_zmm)
LR_SIZE sizeof (struct La_x86_64_regs)
LR_RDX_OFFSET offsetof (struct La_x86_64_regs, lr_rdx)
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/Makefile glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/Makefile
--- glibc-2.17-c758a686/sysdeps/x86_64/Makefile 2012-12-24 22:02:13.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/Makefile 2014-09-10 23:22:04.269518711 -0400
@@ -37,6 +37,20 @@
$(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o
$(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o
+
+tests += tst-audit10
+modules-names += tst-auditmod10a tst-auditmod10b
+
+$(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so
+$(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so
+tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so
+
+ifeq (yes,$(config-cflags-avx512))
+AVX512-CFLAGS = -mavx512f
+CFLAGS-tst-audit10.c += $(AVX512-CFLAGS)
+CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS)
+CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS)
+endif
endif
ifeq ($(subdir),csu)
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/Makefile.orig glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/Makefile.orig
--- glibc-2.17-c758a686/sysdeps/x86_64/Makefile.orig 1969-12-31 19:00:00.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/Makefile.orig 2014-09-10 23:16:36.332167054 -0400
@@ -0,0 +1,44 @@
+# The i387 `long double' is a distinct type we support.
+long-double-fcts = yes
+
+ifeq ($(subdir),csu)
+sysdep_routines += hp-timing
+elide-routines.os += hp-timing
+gen-as-const-headers += link-defines.sym
+endif
+
+ifeq ($(subdir),gmon)
+sysdep_routines += _mcount
+endif
+
+ifeq ($(subdir),malloc)
+tests += tst-mallocalign1
+endif
+
+ifeq ($(subdir),string)
+sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii
+gen-as-const-headers += locale-defines.sym
+endif
+
+ifeq ($(subdir),elf)
+sysdep-dl-routines += tlsdesc dl-tlsdesc
+sysdep_routines += tlsdesc dl-tlsdesc
+sysdep-rtld-routines += tlsdesc dl-tlsdesc
+
+tests += tst-quad1 tst-quad2
+modules-names += tst-quadmod1 tst-quadmod2
+
+$(objpfx)tst-quad1: $(objpfx)tst-quadmod1.so
+$(objpfx)tst-quad2: $(objpfx)tst-quadmod2.so
+
+quad-pie-test += tst-quad1pie tst-quad2pie
+tests += $(quad-pie-test)
+tests-pie += $(quad-pie-test)
+
+$(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o
+$(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o
+endif
+
+ifeq ($(subdir),csu)
+gen-as-const-headers += tlsdesc.sym
+endif
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/tst-audit10.c
--- glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c 1969-12-31 19:00:00.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/tst-audit10.c 2014-09-10 23:16:36.335167048 -0400
@@ -0,0 +1,70 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Test case for x86-64 preserved registers in dynamic linker. */
+
+#ifdef __AVX512F__
+#include <stdlib.h>
+#include <string.h>
+#include <cpuid.h>
+#include <immintrin.h>
+
+static int
+avx512_enabled (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+ if (!(ebx & bit_AVX512F))
+ return 0;
+
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+ /* Verify that ZMM, YMM and XMM states are enabled. */
+ return (eax & 0xe6) == 0xe6;
+}
+
+
+extern __m512i audit_test (__m512i, __m512i, __m512i, __m512i,
+ __m512i, __m512i, __m512i, __m512i);
+int
+main (void)
+{
+ /* Run AVX512 test only if AVX512 is supported. */
+ if (avx512_enabled ())
+ {
+ __m512i zmm = _mm512_setzero_si512 ();
+ __m512i ret = audit_test (zmm, zmm, zmm, zmm, zmm, zmm, zmm, zmm);
+
+ zmm = _mm512_set1_epi64 (0x12349876);
+
+ if (memcmp (&zmm, &ret, sizeof (ret)))
+ abort ();
+ }
+ return 0;
+}
+#else
+int
+main (void)
+{
+ return 0;
+}
+#endif
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/tst-auditmod10a.c
--- glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c 1969-12-31 19:00:00.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/tst-auditmod10a.c 2014-09-10 23:16:36.335167048 -0400
@@ -0,0 +1,65 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Test case for x86-64 preserved registers in dynamic linker. */
+
+#ifdef __AVX512F__
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m512i
+audit_test (__m512i x0, __m512i x1, __m512i x2, __m512i x3,
+ __m512i x4, __m512i x5, __m512i x6, __m512i x7)
+{
+ __m512i zmm;
+
+ zmm = _mm512_set1_epi64 (1);
+ if (memcmp (&zmm, &x0, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (2);
+ if (memcmp (&zmm, &x1, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (3);
+ if (memcmp (&zmm, &x2, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (4);
+ if (memcmp (&zmm, &x3, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (5);
+ if (memcmp (&zmm, &x4, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (6);
+ if (memcmp (&zmm, &x5, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (7);
+ if (memcmp (&zmm, &x6, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (8);
+ if (memcmp (&zmm, &x7, sizeof (zmm)))
+ abort ();
+
+ return _mm512_setzero_si512 ();
+}
+#endif
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/tst-auditmod10b.c
--- glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c 1969-12-31 19:00:00.000000000 -0500
+++ glibc-2.17-c758a686.mod-mpx/sysdeps/x86_64/tst-auditmod10b.c 2014-09-10 23:16:36.336167046 -0400
@@ -0,0 +1,219 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Verify that changing AVX512 registers in audit library won't affect
+ function parameter passing/return. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <gnu/lib-names.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+ setlinebuf (stdout);
+
+ printf ("version: %u\n", v);
+
+ char buf[20];
+ sprintf (buf, "%u", v);
+
+ return v;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ if (flag == LA_ACT_CONSISTENT)
+ printf ("activity: consistent\n");
+ else if (flag == LA_ACT_ADD)
+ printf ("activity: add\n");
+ else if (flag == LA_ACT_DELETE)
+ printf ("activity: delete\n");
+ else
+ printf ("activity: unknown activity %u\n", flag);
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ char buf[100];
+ const char *flagstr;
+ if (flag == LA_SER_ORIG)
+ flagstr = "LA_SET_ORIG";
+ else if (flag == LA_SER_LIBPATH)
+ flagstr = "LA_SER_LIBPATH";
+ else if (flag == LA_SER_RUNPATH)
+ flagstr = "LA_SER_RUNPATH";
+ else if (flag == LA_SER_CONFIG)
+ flagstr = "LA_SER_CONFIG";
+ else if (flag == LA_SER_DEFAULT)
+ flagstr = "LA_SER_DEFAULT";
+ else if (flag == LA_SER_SECURE)
+ flagstr = "LA_SER_SECURE";
+ else
+ {
+ sprintf (buf, "unknown flag %d", flag);
+ flagstr = buf;
+ }
+ printf ("objsearch: %s, %s\n", name, flagstr);
+
+ return (char *) name;
+}
+
+unsigned int
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
+{
+ printf ("objopen: %ld, %s\n", lmid, l->l_name);
+
+ return 3;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+ printf ("preinit\n");
+}
+
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ printf ("objclose\n");
+ return 0;
+}
+
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+ printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ return sym->st_value;
+}
+
+#include <tst-audit.h>
+
+#ifdef __AVX512F__
+#include <immintrin.h>
+#include <cpuid.h>
+
+static int
+check_avx512 (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+ if (!(ebx & bit_AVX512F))
+ return 0;
+
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+ /* Verify that ZMM, YMM and XMM states are enabled. */
+ return (eax & 0xe6) == 0xe6;
+}
+
+#else
+#include <emmintrin.h>
+#endif
+
+ElfW(Addr)
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, La_regs *regs, unsigned int *flags,
+ const char *symname, long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+#ifdef __AVX512F__
+ if (check_avx512 () && strcmp (symname, "audit_test") == 0)
+ {
+ __m512i zero = _mm512_setzero_si512 ();
+ if (memcmp (®s->lr_vector[0], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[1], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[2], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[3], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[4], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[5], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[6], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[7], &zero, sizeof (zero)))
+ abort ();
+
+ for (int i = 0; i < 8; i++)
+ regs->lr_vector[i].zmm[0]
+ = (La_x86_64_zmm) _mm512_set1_epi64 (i + 1);
+
+ __m512i zmm = _mm512_set1_epi64 (-1);
+ asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" );
+ asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" );
+ asm volatile ("vmovdqa64 %0, %%zmm2" : : "x" (zmm) : "xmm2" );
+ asm volatile ("vmovdqa64 %0, %%zmm3" : : "x" (zmm) : "xmm3" );
+ asm volatile ("vmovdqa64 %0, %%zmm4" : : "x" (zmm) : "xmm4" );
+ asm volatile ("vmovdqa64 %0, %%zmm5" : : "x" (zmm) : "xmm5" );
+ asm volatile ("vmovdqa64 %0, %%zmm6" : : "x" (zmm) : "xmm6" );
+ asm volatile ("vmovdqa64 %0, %%zmm7" : : "x" (zmm) : "xmm7" );
+
+ *framesizep = 1024;
+ }
+#endif
+
+ return sym->st_value;
+}
+
+unsigned int
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
+ const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
+ symname, (long int) sym->st_value, ndx,
+ (ptrdiff_t) outregs->int_retval);
+
+#ifdef __AVX512F__
+ if (check_avx512 () && strcmp (symname, "audit_test") == 0)
+ {
+ __m512i zero = _mm512_setzero_si512 ();
+ if (memcmp (&outregs->lrv_vector0, &zero, sizeof (zero)))
+ abort ();
+
+ for (int i = 0; i < 8; i++)
+ {
+ __m512i zmm = _mm512_set1_epi64 (i + 1);
+ if (memcmp (&inregs->lr_vector[i], &zmm, sizeof (zmm)) != 0)
+ abort ();
+ }
+
+ outregs->lrv_vector0.zmm[0]
+ = (La_x86_64_zmm) _mm512_set1_epi64 (0x12349876);
+
+ __m512i zmm = _mm512_set1_epi64 (-1);
+ asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" );
+ asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" );
+ }
+#endif
+
+ return 0;
+}
diff -urN glibc-2.17-c758a686/sysdeps/x86/Makefile glibc-2.17-c758a686.mod/sysdeps/x86/Makefile
--- glibc-2.17-c758a686/sysdeps/x86/Makefile 2012-12-24 22:02:13.000000000 -0500
+++ glibc-2.17-c758a686.mod/sysdeps/x86/Makefile 2014-09-11 16:06:03.121319867 -0400
@@ -2,8 +2,8 @@
CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
-mno-sse -mno-mmx)
-tests: $(objpfx)tst-xmmymm.out
-$(objpfx)tst-xmmymm.out: ../sysdeps/x86/tst-xmmymm.sh $(objpfx)ld.so
+tests: $(objpfx)tst-xmmymmzmm.out
+$(objpfx)tst-xmmymmzmm.out: ../sysdeps/x86/tst-xmmymmzmm.sh $(objpfx)ld.so
@echo "Checking ld.so for SSE register use. This will take a few seconds..."
$(SHELL) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@
endif
diff -urN glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh glibc-2.17-c758a686.mod/sysdeps/x86/tst-xmmymm.sh
--- glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh 2012-12-24 22:02:13.000000000 -0500
+++ glibc-2.17-c758a686.mod/sysdeps/x86/tst-xmmymm.sh 1969-12-31 19:00:00.000000000 -0500
@@ -1,103 +0,0 @@
-#! /bin/bash
-# Make sure no code in ld.so uses xmm/ymm registers on x86-64.
-# Copyright (C) 2009-2012 Free Software Foundation, Inc.
-# This file is part of the GNU C Library.
-
-# The GNU C Library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-
-# The GNU C Library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-
-# You should have received a copy of the GNU Lesser General Public
-# License along with the GNU C Library; if not, see
-# <http://www.gnu.org/licenses/>.
-
-set -e
-
-objpfx="$1"
-NM="$2"
-OBJDUMP="$3"
-READELF="$4"
-
-tmp=$(mktemp ${objpfx}tst-xmmymm.XXXXXX)
-trap 'rm -f "$tmp"' 1 2 3 15
-
-# List of object files we have to test
-rtldobjs=$($READELF -W -wi ${objpfx}dl-allobjs.os |
- awk '/^ </ { if ($5 == "(DW_TAG_compile_unit)") c=1; else c=0 } $2 == "DW_AT_name" { if (c == 1) print $NF }' |
- sed 's,\(.*/\|\)\([_[:alnum:]-]*[.]\).$,\2os,')
-rtldobjs="$rtldobjs $(ar t ${objpfx}rtld-libc.a)"
-
-# OBJECT symbols can be ignored.
-$READELF -sW ${objpfx}dl-allobjs.os ${objpfx}rtld-libc.a |
-egrep " OBJECT *GLOBAL " |
-awk '{if ($7 != "ABS") print $8 }' |
-sort -u > "$tmp"
-declare -a objects
-objects=($(cat "$tmp"))
-
-objs="dl-runtime.os"
-tocheck="dl-runtime.os"
-
-while test -n "$objs"; do
- this="$objs"
- objs=""
-
- for f in $this; do
- undef=$($NM -u "$objpfx"../*/"$f" | awk '{print $2}')
- if test -n "$undef"; then
- for s in $undef; do
- for obj in ${objects[*]} "_GLOBAL_OFFSET_TABLE_"; do
- if test "$obj" = "$s"; then
- continue 2
- fi
- done
- for o in $rtldobjs; do
- ro=$(echo "$objpfx"../*/"$o")
- if $NM -g --defined-only "$ro" | egrep -qs " $s\$"; then
- if ! (echo "$tocheck $objs" | fgrep -qs "$o"); then
- echo "$o needed for $s"
- objs="$objs $o"
- fi
- break;
- fi
- done
- done
- fi
- done
- tocheck="$tocheck$objs"
-done
-
-echo
-echo
-echo "object files needed: $tocheck"
-
-cp /dev/null "$tmp"
-for f in $tocheck; do
- $OBJDUMP -d "$objpfx"../*/"$f" |
- awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' |
- while read fct; do
- if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then
- continue;
- fi
- echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
- result=1
- done
-done
-
-if test -s "$tmp"; then
- echo
- echo
- cat "$tmp"
- result=1
-else
- result=0
-fi
-
-rm "$tmp"
-exit $result
diff -urN glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh glibc-2.17-c758a686.mod/sysdeps/x86/tst-xmmymmzmm.sh
--- glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh 1969-12-31 19:00:00.000000000 -0500
+++ glibc-2.17-c758a686.mod/sysdeps/x86/tst-xmmymmzmm.sh 2014-09-11 16:05:10.073426623 -0400
@@ -0,0 +1,103 @@
+#! /bin/bash
+# Make sure no code in ld.so uses xmm/ymm/zmm registers on x86-64.
+# Copyright (C) 2009-2012 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+set -e
+
+objpfx="$1"
+NM="$2"
+OBJDUMP="$3"
+READELF="$4"
+
+tmp=$(mktemp ${objpfx}tst-xmmymmzmm.XXXXXX)
+trap 'rm -f "$tmp"' 1 2 3 15
+
+# List of object files we have to test
+rtldobjs=$($READELF -W -wi ${objpfx}dl-allobjs.os |
+ awk '/^ </ { if ($5 == "(DW_TAG_compile_unit)") c=1; else c=0 } $2 == "DW_AT_name" { if (c == 1) print $NF }' |
+ sed 's,\(.*/\|\)\([_[:alnum:]-]*[.]\).$,\2os,')
+rtldobjs="$rtldobjs $(ar t ${objpfx}rtld-libc.a)"
+
+# OBJECT symbols can be ignored.
+$READELF -sW ${objpfx}dl-allobjs.os ${objpfx}rtld-libc.a |
+egrep " OBJECT *GLOBAL " |
+awk '{if ($7 != "ABS") print $8 }' |
+sort -u > "$tmp"
+declare -a objects
+objects=($(cat "$tmp"))
+
+objs="dl-runtime.os"
+tocheck="dl-runtime.os"
+
+while test -n "$objs"; do
+ this="$objs"
+ objs=""
+
+ for f in $this; do
+ undef=$($NM -u "$objpfx"../*/"$f" | awk '{print $2}')
+ if test -n "$undef"; then
+ for s in $undef; do
+ for obj in ${objects[*]} "_GLOBAL_OFFSET_TABLE_"; do
+ if test "$obj" = "$s"; then
+ continue 2
+ fi
+ done
+ for o in $rtldobjs; do
+ ro=$(echo "$objpfx"../*/"$o")
+ if $NM -g --defined-only "$ro" | egrep -qs " $s\$"; then
+ if ! (echo "$tocheck $objs" | fgrep -qs "$o"); then
+ echo "$o needed for $s"
+ objs="$objs $o"
+ fi
+ break;
+ fi
+ done
+ done
+ fi
+ done
+ tocheck="$tocheck$objs"
+done
+
+echo
+echo
+echo "object files needed: $tocheck"
+
+cp /dev/null "$tmp"
+for f in $tocheck; do
+ $OBJDUMP -d "$objpfx"../*/"$f" |
+ awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xyz]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' |
+ while read fct; do
+ if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then
+ continue;
+ fi
+ echo "function $fct in $f modifies xmm/ymm/zmm" >> "$tmp"
+ result=1
+ done
+done
+
+if test -s "$tmp"; then
+ echo
+ echo
+ cat "$tmp"
+ result=1
+else
+ result=0
+fi
+
+rm "$tmp"
+exit $result