bca718
We add back Prefer_SSE_for_memop since we still need it for all of the
bca718
existing era implementations for RHEL 7.3. To remove it would require
bca718
a more wholesale backport of optmized routines.
bca718
bca718
commit e2e4f56056adddc3c1efe676b40a4b4f2453103b
bca718
Author: H.J. Lu <hjl.tools@gmail.com>
bca718
Date:   Thu Aug 13 03:37:47 2015 -0700
bca718
bca718
    Add _dl_x86_cpu_features to rtld_global
bca718
    
bca718
    This patch adds _dl_x86_cpu_features to rtld_global in x86 ld.so
bca718
    and initializes it early before __libc_start_main is called so that
bca718
    cpu_features is always available when it is used and we can avoid
bca718
    calling __init_cpu_features in IFUNC selectors.
bca718
bca718
Index: glibc-2.17-c758a686/sysdeps/i386/dl-machine.h
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/i386/dl-machine.h
bca718
+++ glibc-2.17-c758a686/sysdeps/i386/dl-machine.h
bca718
@@ -25,6 +25,7 @@
bca718
 #include <sysdep.h>
bca718
 #include <tls.h>
bca718
 #include <dl-tlsdesc.h>
bca718
+#include <cpu-features.c>
bca718
 
bca718
 /* Return nonzero iff ELF header is compatible with the running host.  */
bca718
 static inline int __attribute__ ((unused))
bca718
@@ -266,6 +267,8 @@ dl_platform_init (void)
bca718
   if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
bca718
     /* Avoid an empty string which would disturb us.  */
bca718
     GLRO(dl_platform) = NULL;
bca718
+
bca718
+  init_cpu_features (&GLRO(dl_x86_cpu_features));
bca718
 }
bca718
 
bca718
 static inline Elf32_Addr
bca718
Index: glibc-2.17-c758a686/sysdeps/i386/dl-procinfo.c
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/i386/dl-procinfo.c
bca718
+++ glibc-2.17-c758a686/sysdeps/i386/dl-procinfo.c
bca718
@@ -43,6 +43,22 @@
bca718
 # define PROCINFO_CLASS
bca718
 #endif
bca718
 
bca718
+#if !IS_IN (ldconfig)
bca718
+# if !defined PROCINFO_DECL && defined SHARED
bca718
+  ._dl_x86_cpu_features
bca718
+# else
bca718
+PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features
bca718
+# endif
bca718
+# ifndef PROCINFO_DECL
bca718
+= { }
bca718
+# endif
bca718
+# if !defined SHARED || defined PROCINFO_DECL
bca718
+;
bca718
+# else
bca718
+,
bca718
+# endif
bca718
+#endif
bca718
+
bca718
 #if !defined PROCINFO_DECL && defined SHARED
bca718
   ._dl_x86_cap_flags
bca718
 #else
bca718
Index: glibc-2.17-c758a686/sysdeps/i386/i686/cacheinfo.c
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/i386/i686/cacheinfo.c
bca718
+++ glibc-2.17-c758a686/sysdeps/i386/i686/cacheinfo.c
bca718
@@ -8,6 +8,5 @@
bca718
 #define __x86_64_raw_shared_cache_size_half __x86_raw_shared_cache_size_half
bca718
 
bca718
 #define DISABLE_PREFETCHW
bca718
-#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
bca718
 
bca718
 #include <sysdeps/x86_64/cacheinfo.c>
bca718
Index: glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/Makefile
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/i386/i686/multiarch/Makefile
bca718
+++ glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/Makefile
bca718
@@ -1,5 +1,4 @@
bca718
 ifeq ($(subdir),csu)
bca718
-aux += init-arch
bca718
 tests += test-multiarch
bca718
 gen-as-const-headers += ifunc-defines.sym
bca718
 endif
bca718
Index: glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/Versions
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/i386/i686/multiarch/Versions
bca718
+++ /dev/null
bca718
@@ -1,5 +0,0 @@
bca718
-libc {
bca718
-  GLIBC_PRIVATE {
bca718
-    __get_cpu_features;
bca718
-  }
bca718
-}
bca718
Index: glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/ifunc-defines.sym
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/i386/i686/multiarch/ifunc-defines.sym
bca718
+++ glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/ifunc-defines.sym
bca718
@@ -4,7 +4,6 @@
bca718
 --
bca718
 
bca718
 CPU_FEATURES_SIZE	sizeof (struct cpu_features)
bca718
-KIND_OFFSET		offsetof (struct cpu_features, kind)
bca718
 CPUID_OFFSET		offsetof (struct cpu_features, cpuid)
bca718
 CPUID_SIZE		sizeof (struct cpuid_registers)
bca718
 CPUID_EAX_OFFSET	offsetof (struct cpuid_registers, eax)
bca718
Index: glibc-2.17-c758a686/sysdeps/i386/ldsodefs.h
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/i386/ldsodefs.h
bca718
+++ glibc-2.17-c758a686/sysdeps/i386/ldsodefs.h
bca718
@@ -20,6 +20,7 @@
bca718
 #define	_I386_LDSODEFS_H	1
bca718
 
bca718
 #include <elf.h>
bca718
+#include <cpu-features.h>
bca718
 
bca718
 struct La_i86_regs;
bca718
 struct La_i86_retval;
bca718
Index: glibc-2.17-c758a686/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c
bca718
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c
bca718
@@ -1,5 +1,5 @@
bca718
 #if IS_IN (ldconfig)
bca718
 # include <sysdeps/i386/dl-procinfo.c>
bca718
 #else
bca718
-# include <sysdeps/generic/dl-procinfo.c>
bca718
+# include <sysdeps/x86_64/dl-procinfo.c>
bca718
 #endif
bca718
Index: glibc-2.17-c758a686/sysdeps/x86/Makefile
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/x86/Makefile
bca718
+++ glibc-2.17-c758a686/sysdeps/x86/Makefile
bca718
@@ -7,3 +7,14 @@ $(objpfx)tst-xmmymmzmm.out: ../sysdeps/x
bca718
 	@echo "Checking ld.so for SSE register use.  This will take a few seconds..."
bca718
 	$(SHELL) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@
bca718
 endif
bca718
+
bca718
+ifeq ($(subdir),csu)
bca718
+gen-as-const-headers += cpu-features-offsets.sym rtld-global-offsets.sym
bca718
+endif
bca718
+
bca718
+ifeq ($(subdir),elf)
bca718
+sysdep-dl-routines += dl-get-cpu-features
bca718
+
bca718
+tests += tst-get-cpu-features
bca718
+tests-static += tst-get-cpu-features-static
bca718
+endif
bca718
Index: glibc-2.17-c758a686/sysdeps/x86/Versions
bca718
===================================================================
bca718
--- /dev/null
bca718
+++ glibc-2.17-c758a686/sysdeps/x86/Versions
bca718
@@ -0,0 +1,5 @@
bca718
+ld {
bca718
+  GLIBC_PRIVATE {
bca718
+    __get_cpu_features;
bca718
+  }
bca718
+}
bca718
Index: glibc-2.17-c758a686/sysdeps/x86/cpu-features-offsets.sym
bca718
===================================================================
bca718
--- /dev/null
bca718
+++ glibc-2.17-c758a686/sysdeps/x86/cpu-features-offsets.sym
bca718
@@ -0,0 +1,7 @@
bca718
+#define SHARED 1
bca718
+
bca718
+#include <ldsodefs.h>
bca718
+
bca718
+#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem)
bca718
+
bca718
+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features)
bca718
Index: glibc-2.17-c758a686/sysdeps/x86/cpu-features.c
bca718
===================================================================
bca718
--- /dev/null
bca718
+++ glibc-2.17-c758a686/sysdeps/x86/cpu-features.c
bca718
@@ -0,0 +1,213 @@
bca718
+/* Initialize CPU feature data.
bca718
+   This file is part of the GNU C Library.
bca718
+   Copyright (C) 2008-2015 Free Software Foundation, Inc.
bca718
+
bca718
+   The GNU C Library is free software; you can redistribute it and/or
bca718
+   modify it under the terms of the GNU Lesser General Public
bca718
+   License as published by the Free Software Foundation; either
bca718
+   version 2.1 of the License, or (at your option) any later version.
bca718
+
bca718
+   The GNU C Library is distributed in the hope that it will be useful,
bca718
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
bca718
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
bca718
+   Lesser General Public License for more details.
bca718
+
bca718
+   You should have received a copy of the GNU Lesser General Public
bca718
+   License along with the GNU C Library; if not, see
bca718
+   <http://www.gnu.org/licenses/>.  */
bca718
+
bca718
+#include <cpuid.h>
bca718
+#include <cpu-features.h>
bca718
+
bca718
+static inline void
bca718
+get_common_indeces (struct cpu_features *cpu_features,
bca718
+		    unsigned int *family, unsigned int *model)
bca718
+{
bca718
+  unsigned int eax;
bca718
+  __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
bca718
+	   cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
bca718
+	   cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
bca718
+  GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax;
bca718
+  *family = (eax >> 8) & 0x0f;
bca718
+  *model = (eax >> 4) & 0x0f;
bca718
+}
bca718
+
bca718
+static inline void
bca718
+init_cpu_features (struct cpu_features *cpu_features)
bca718
+{
bca718
+  unsigned int ebx, ecx, edx;
bca718
+  unsigned int family = 0;
bca718
+  unsigned int model = 0;
bca718
+  enum cpu_features_kind kind;
bca718
+
bca718
+  __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx);
bca718
+
bca718
+  /* This spells out "GenuineIntel".  */
bca718
+  if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
bca718
+    {
bca718
+      kind = arch_kind_intel;
bca718
+
bca718
+      get_common_indeces (cpu_features, &family, &model);
bca718
+
bca718
+      /* Intel processors prefer SSE instruction for memory/string
bca718
+        routines if they are available.  */
bca718
+      cpu_features->feature[index_Prefer_SSE_for_memop]
bca718
+        |= bit_Prefer_SSE_for_memop;
bca718
+
bca718
+      unsigned int eax = cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax;
bca718
+      unsigned int extended_family = (eax >> 20) & 0xff;
bca718
+      unsigned int extended_model = (eax >> 12) & 0xf0;
bca718
+      if (family == 0x0f)
bca718
+	{
bca718
+	  family += extended_family;
bca718
+	  model += extended_model;
bca718
+	}
bca718
+      else if (family == 0x06)
bca718
+	{
bca718
+	  ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
bca718
+	  model += extended_model;
bca718
+	  switch (model)
bca718
+	    {
bca718
+	    case 0x1c:
bca718
+	    case 0x26:
bca718
+	      /* BSF is slow on Atom.  */
bca718
+	      cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF;
bca718
+	      break;
bca718
+
bca718
+	    case 0x37:
bca718
+	    case 0x4a:
bca718
+	    case 0x4d:
bca718
+	    case 0x5a:
bca718
+	    case 0x5d:
bca718
+	      /* Unaligned load versions are faster than SSSE3
bca718
+		 on Silvermont.  */
bca718
+#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
bca718
+# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
bca718
+#endif
bca718
+#if index_Fast_Unaligned_Load != index_Slow_SSE4_2
bca718
+# error index_Fast_Unaligned_Load != index_Slow_SSE4_2
bca718
+#endif
bca718
+	      cpu_features->feature[index_Fast_Unaligned_Load]
bca718
+		|= (bit_Fast_Unaligned_Load
bca718
+		    | bit_Prefer_PMINUB_for_stringop
bca718
+		    | bit_Slow_SSE4_2);
bca718
+	      break;
bca718
+
bca718
+	    default:
bca718
+	      /* Unknown family 0x06 processors.  Assuming this is one
bca718
+		 of Core i3/i5/i7 processors if AVX is available.  */
bca718
+	      if ((ecx & bit_AVX) == 0)
bca718
+		break;
bca718
+
bca718
+	    case 0x1a:
bca718
+	    case 0x1e:
bca718
+	    case 0x1f:
bca718
+	    case 0x25:
bca718
+	    case 0x2c:
bca718
+	    case 0x2e:
bca718
+	    case 0x2f:
bca718
+	      /* Rep string instructions, copy backward, unaligned loads
bca718
+		 and pminub are fast on Intel Core i3, i5 and i7.  */
bca718
+#if index_Fast_Rep_String != index_Fast_Copy_Backward
bca718
+# error index_Fast_Rep_String != index_Fast_Copy_Backward
bca718
+#endif
bca718
+#if index_Fast_Rep_String != index_Fast_Unaligned_Load
bca718
+# error index_Fast_Rep_String != index_Fast_Unaligned_Load
bca718
+#endif
bca718
+#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
bca718
+# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
bca718
+#endif
bca718
+	      cpu_features->feature[index_Fast_Rep_String]
bca718
+		|= (bit_Fast_Rep_String
bca718
+		    | bit_Fast_Copy_Backward
bca718
+		    | bit_Fast_Unaligned_Load
bca718
+		    | bit_Prefer_PMINUB_for_stringop);
bca718
+	      break;
bca718
+	    }
bca718
+	}
bca718
+    }
bca718
+  /* This spells out "AuthenticAMD".  */
bca718
+  else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
bca718
+    {
bca718
+      kind = arch_kind_amd;
bca718
+
bca718
+      get_common_indeces (cpu_features, &family, &model);
bca718
+
bca718
+      ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
bca718
+
bca718
+      /* AMD processors prefer SSE instructions for memory/string routines
bca718
+        if they are available, otherwise they prefer integer instructions.  */
bca718
+      if ((ecx & 0x200))
bca718
+	cpu_features->feature[index_Prefer_SSE_for_memop]
bca718
+	  |= bit_Prefer_SSE_for_memop;
bca718
+
bca718
+      unsigned int eax;
bca718
+      __cpuid (0x80000000, eax, ebx, ecx, edx);
bca718
+      if (eax >= 0x80000001)
bca718
+	__cpuid (0x80000001,
bca718
+		 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax,
bca718
+		 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx,
bca718
+		 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx,
bca718
+		 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx);
bca718
+    }
bca718
+  else
bca718
+    kind = arch_kind_other;
bca718
+
bca718
+  if (cpu_features->max_cpuid >= 7)
bca718
+    __cpuid_count (7, 0,
bca718
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
bca718
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
bca718
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
bca718
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
bca718
+
bca718
+  /* Can we call xgetbv?  */
bca718
+  if (HAS_CPU_FEATURE (OSXSAVE))
bca718
+    {
bca718
+      unsigned int xcrlow;
bca718
+      unsigned int xcrhigh;
bca718
+      asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
bca718
+      /* Is YMM and XMM state usable?  */
bca718
+      if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
bca718
+	  (bit_YMM_state | bit_XMM_state))
bca718
+	{
bca718
+	  /* Determine if AVX is usable.  */
bca718
+	  if (HAS_CPU_FEATURE (AVX))
bca718
+	    cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable;
bca718
+#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
bca718
+# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
bca718
+#endif
bca718
+	  /* Determine if AVX2 is usable.  Unaligned load with 256-bit
bca718
+	     AVX registers are faster on processors with AVX2.  */
bca718
+	  if (HAS_CPU_FEATURE (AVX2))
bca718
+	    cpu_features->feature[index_AVX2_Usable]
bca718
+	      |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
bca718
+	  /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
bca718
+	     ZMM16-ZMM31 state are enabled.  */
bca718
+	  if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
bca718
+			 | bit_ZMM16_31_state)) ==
bca718
+	      (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
bca718
+	    {
bca718
+	      /* Determine if AVX512F is usable.  */
bca718
+	      if (HAS_CPU_FEATURE (AVX512F))
bca718
+		{
bca718
+		  cpu_features->feature[index_AVX512F_Usable]
bca718
+		    |= bit_AVX512F_Usable;
bca718
+		  /* Determine if AVX512DQ is usable.  */
bca718
+		  if (HAS_CPU_FEATURE (AVX512DQ))
bca718
+		    cpu_features->feature[index_AVX512DQ_Usable]
bca718
+		      |= bit_AVX512DQ_Usable;
bca718
+		}
bca718
+	    }
bca718
+	  /* Determine if FMA is usable.  */
bca718
+	  if (HAS_CPU_FEATURE (FMA))
bca718
+	    cpu_features->feature[index_FMA_Usable] |= bit_FMA_Usable;
bca718
+	  /* Determine if FMA4 is usable.  */
bca718
+	  if (HAS_CPU_FEATURE (FMA4))
bca718
+	    cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable;
bca718
+	}
bca718
+    }
bca718
+
bca718
+  cpu_features->family = family;
bca718
+  cpu_features->model = model;
bca718
+  cpu_features->kind = kind;
bca718
+}
bca718
Index: glibc-2.17-c758a686/sysdeps/x86/cpu-features.h
bca718
===================================================================
bca718
--- /dev/null
bca718
+++ glibc-2.17-c758a686/sysdeps/x86/cpu-features.h
bca718
@@ -0,0 +1,273 @@
bca718
+/* This file is part of the GNU C Library.
bca718
+   Copyright (C) 2008-2015 Free Software Foundation, Inc.
bca718
+
bca718
+   The GNU C Library is free software; you can redistribute it and/or
bca718
+   modify it under the terms of the GNU Lesser General Public
bca718
+   License as published by the Free Software Foundation; either
bca718
+   version 2.1 of the License, or (at your option) any later version.
bca718
+
bca718
+   The GNU C Library is distributed in the hope that it will be useful,
bca718
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
bca718
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
bca718
+   Lesser General Public License for more details.
bca718
+
bca718
+   You should have received a copy of the GNU Lesser General Public
bca718
+   License along with the GNU C Library; if not, see
bca718
+   <http://www.gnu.org/licenses/>.  */
bca718
+
bca718
+#ifndef cpu_features_h
bca718
+#define cpu_features_h
bca718
+
bca718
+#define bit_Fast_Rep_String		(1 << 0)
bca718
+#define bit_Fast_Copy_Backward		(1 << 1)
bca718
+#define bit_Slow_BSF			(1 << 2)
bca718
+#define bit_Prefer_SSE_for_memop	(1 << 3)
bca718
+#define bit_Fast_Unaligned_Load		(1 << 4)
bca718
+#define bit_Prefer_PMINUB_for_stringop	(1 << 5)
bca718
+#define bit_AVX_Usable			(1 << 6)
bca718
+#define bit_FMA_Usable			(1 << 7)
bca718
+#define bit_FMA4_Usable			(1 << 8)
bca718
+#define bit_Slow_SSE4_2			(1 << 9)
bca718
+#define bit_AVX2_Usable			(1 << 10)
bca718
+#define bit_AVX_Fast_Unaligned_Load	(1 << 11)
bca718
+#define bit_AVX512F_Usable		(1 << 12)
bca718
+#define bit_AVX512DQ_Usable		(1 << 13)
bca718
+
bca718
+/* CPUID Feature flags.  */
bca718
+
bca718
+/* COMMON_CPUID_INDEX_1.  */
bca718
+#define bit_SSE2	(1 << 26)
bca718
+#define bit_SSSE3	(1 << 9)
bca718
+#define bit_SSE4_1	(1 << 19)
bca718
+#define bit_SSE4_2	(1 << 20)
bca718
+#define bit_OSXSAVE	(1 << 27)
bca718
+#define bit_AVX		(1 << 28)
bca718
+#define bit_POPCOUNT	(1 << 23)
bca718
+#define bit_FMA		(1 << 12)
bca718
+#define bit_FMA4	(1 << 16)
bca718
+
bca718
+/* COMMON_CPUID_INDEX_7.  */
bca718
+#define bit_RTM		(1 << 11)
bca718
+#define bit_AVX2	(1 << 5)
bca718
+#define bit_AVX512F	(1 << 16)
bca718
+#define bit_AVX512DQ	(1 << 17)
bca718
+
bca718
+/* XCR0 Feature flags.  */
bca718
+#define bit_XMM_state  (1 << 1)
bca718
+#define bit_YMM_state  (2 << 1)
bca718
+#define bit_Opmask_state	(1 << 5)
bca718
+#define bit_ZMM0_15_state	(1 << 6)
bca718
+#define bit_ZMM16_31_state	(1 << 7)
bca718
+
bca718
+/* The integer bit array index for the first set of internal feature bits.  */
bca718
+#define FEATURE_INDEX_1 0
bca718
+
bca718
+/* The current maximum size of the feature integer bit array.  */
bca718
+#define FEATURE_INDEX_MAX 1
bca718
+
bca718
+#ifdef	__ASSEMBLER__
bca718
+
bca718
+# include <ifunc-defines.h>
bca718
+# include <rtld-global-offsets.h>
bca718
+
bca718
+# define index_SSE2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
bca718
+# define index_SSSE3	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
bca718
+# define index_SSE4_1	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
bca718
+# define index_SSE4_2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
bca718
+# define index_AVX	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
bca718
+# define index_AVX2	COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
bca718
+
bca718
+# define index_Fast_Rep_String		FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_Slow_BSF			FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_Prefer_SSE_for_memop	FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_AVX_Usable		FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_FMA_Usable		FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_FMA4_Usable		FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_Slow_SSE4_2		FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_AVX2_Usable		FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_AVX_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_AVX512F_Usable		FEATURE_INDEX_1*FEATURE_SIZE
bca718
+# define index_AVX512DQ_Usable		FEATURE_INDEX_1*FEATURE_SIZE
bca718
+
bca718
+# if defined (_LIBC) && !IS_IN (nonlib)
bca718
+#  ifdef __x86_64__
bca718
+#   ifdef SHARED
bca718
+#    if IS_IN (rtld)
bca718
+#     define LOAD_RTLD_GLOBAL_RO_RDX
bca718
+#     define HAS_FEATURE(offset, name) \
bca718
+  testl $(bit_##name), _rtld_local_ro+offset+(index_##name)(%rip)
bca718
+#    else
bca718
+#      define LOAD_RTLD_GLOBAL_RO_RDX \
bca718
+  mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP
bca718
+#     define HAS_FEATURE(offset, name) \
bca718
+  testl $(bit_##name), \
bca718
+	RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%rdx)
bca718
+#    endif
bca718
+#   else /* SHARED */
bca718
+#    define LOAD_RTLD_GLOBAL_RO_RDX
bca718
+#    define HAS_FEATURE(offset, name) \
bca718
+  testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)(%rip)
bca718
+#   endif /* !SHARED */
bca718
+#  else  /* __x86_64__ */
bca718
+#   ifdef SHARED
bca718
+#    define LOAD_FUNC_GOT_EAX(func) \
bca718
+  leal func@GOTOFF(%edx), %eax
bca718
+#    if IS_IN (rtld)
bca718
+#    define LOAD_GOT_AND_RTLD_GLOBAL_RO \
bca718
+  LOAD_PIC_REG(dx)
bca718
+#     define HAS_FEATURE(offset, name) \
bca718
+  testl $(bit_##name), offset+(index_##name)+_rtld_local_ro@GOTOFF(%edx)
bca718
+#    else
bca718
+#     define LOAD_GOT_AND_RTLD_GLOBAL_RO \
bca718
+  LOAD_PIC_REG(dx); \
bca718
+  mov _rtld_global_ro@GOT(%edx), %ecx
bca718
+#     define HAS_FEATURE(offset, name) \
bca718
+  testl $(bit_##name), \
bca718
+	RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%ecx)
bca718
+#    endif
bca718
+#   else  /* SHARED */
bca718
+#    define LOAD_FUNC_GOT_EAX(func) \
bca718
+  leal func, %eax
bca718
+#    define LOAD_GOT_AND_RTLD_GLOBAL_RO
bca718
+#    define HAS_FEATURE(offset, name) \
bca718
+  testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)
bca718
+#   endif /* !SHARED */
bca718
+#  endif /* !__x86_64__ */
bca718
+# else /* _LIBC && !nonlib */
bca718
+#  error "Sorry, <cpu-features.h> is unimplemented for assembler"
bca718
+# endif /* !_LIBC || nonlib */
bca718
+
bca718
+/* HAS_* evaluates to true if we may use the feature at runtime.  */
bca718
+# define HAS_CPU_FEATURE(name)	HAS_FEATURE (CPUID_OFFSET, name)
bca718
+# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, name)
bca718
+
bca718
+#else	/* __ASSEMBLER__ */
bca718
+
bca718
+# include <sys/param.h>
bca718
+# include <sys/types.h>
bca718
+# include <sysdep.h>
bca718
+# include <stdbool.h>
bca718
+
bca718
+/* Ugly hack to make it possible to select a strstr and strcasestr
bca718
+   implementation that avoids using the stack for 16-byte aligned
bca718
+   SSE temporaries.  Doing so makes it possible to call the functions
bca718
+   with a stack that's not 16-byte aligned as can happen, for example,
bca718
+   as a result of compiling the functions' callers with the GCC
bca718
+   -mpreferred-stack-boubdary=2 or =3 option, or with the ICC
bca718
+   -falign-stack=assume-4-byte option.  See rhbz 1150282 for details.
bca718
+
bca718
+   The ifunc selector uses the unaligned version by default if this
bca718
+   file exists and is accessible.  */
bca718
+# define ENABLE_STRSTR_UNALIGNED_PATHNAME \
bca718
+    "/etc/sysconfig/64bit_strstr_via_64bit_strstr_sse2_unaligned"
bca718
+
bca718
+static bool __attribute__ ((unused))
bca718
+use_unaligned_strstr (void)
bca718
+{
bca718
+  struct stat unaligned_strstr_etc_sysconfig_file;
bca718
+
bca718
+  /* TLS may not have been set up yet, so avoid using stat since it tries to
bca718
+     set errno.  */
bca718
+  return INTERNAL_SYSCALL (stat, , 2,
bca718
+			   ENABLE_STRSTR_UNALIGNED_PATHNAME,
bca718
+			   &unaligned_strstr_etc_sysconfig_file) == 0;
bca718
+}
bca718
+
bca718
+enum
bca718
+  {
bca718
+    COMMON_CPUID_INDEX_1 = 0,
bca718
+    COMMON_CPUID_INDEX_7,
bca718
+    COMMON_CPUID_INDEX_80000001,	/* for AMD */
bca718
+    /* Keep the following line at the end.  */
bca718
+    COMMON_CPUID_INDEX_MAX
bca718
+  };
bca718
+
bca718
+struct cpu_features
bca718
+{
bca718
+  enum cpu_features_kind
bca718
+    {
bca718
+      arch_kind_unknown = 0,
bca718
+      arch_kind_intel,
bca718
+      arch_kind_amd,
bca718
+      arch_kind_other
bca718
+    } kind;
bca718
+  int max_cpuid;
bca718
+  struct cpuid_registers
bca718
+  {
bca718
+    unsigned int eax;
bca718
+    unsigned int ebx;
bca718
+    unsigned int ecx;
bca718
+    unsigned int edx;
bca718
+  } cpuid[COMMON_CPUID_INDEX_MAX];
bca718
+  unsigned int family;
bca718
+  unsigned int model;
bca718
+  unsigned int feature[FEATURE_INDEX_MAX];
bca718
+};
bca718
+
bca718
+/* Used from outside of glibc to get access to the CPU features
bca718
+   structure.  */
bca718
+extern const struct cpu_features *__get_cpu_features (void)
bca718
+     __attribute__ ((const));
bca718
+
bca718
+# if defined (_LIBC) && !IS_IN (nonlib)
bca718
+/* Unused for x86.  */
bca718
+#  define INIT_ARCH()
bca718
+#  define __get_cpu_features()	(&GLRO(dl_x86_cpu_features))
bca718
+# endif
bca718
+
bca718
+
bca718
+/* HAS_* evaluates to true if we may use the feature at runtime.  */
bca718
+# define HAS_CPU_FEATURE(name) \
bca718
+  ((__get_cpu_features ()->cpuid[index_##name].reg_##name & (bit_##name)) != 0)
bca718
+# define HAS_ARCH_FEATURE(name) \
bca718
+  ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
bca718
+
bca718
+# define index_SSE2		COMMON_CPUID_INDEX_1
bca718
+# define index_SSSE3		COMMON_CPUID_INDEX_1
bca718
+# define index_SSE4_1		COMMON_CPUID_INDEX_1
bca718
+# define index_SSE4_2		COMMON_CPUID_INDEX_1
bca718
+# define index_AVX		COMMON_CPUID_INDEX_1
bca718
+# define index_AVX2		COMMON_CPUID_INDEX_7
bca718
+# define index_AVX512F		COMMON_CPUID_INDEX_7
bca718
+# define index_AVX512DQ		COMMON_CPUID_INDEX_7
bca718
+# define index_RTM		COMMON_CPUID_INDEX_7
bca718
+# define index_FMA		COMMON_CPUID_INDEX_1
bca718
+# define index_FMA4		COMMON_CPUID_INDEX_80000001
bca718
+# define index_POPCOUNT		COMMON_CPUID_INDEX_1
bca718
+# define index_OSXSAVE		COMMON_CPUID_INDEX_1
bca718
+
bca718
+# define reg_SSE2		edx
bca718
+# define reg_SSSE3		ecx
bca718
+# define reg_SSE4_1		ecx
bca718
+# define reg_SSE4_2		ecx
bca718
+# define reg_AVX		ecx
bca718
+# define reg_AVX2		ebx
bca718
+# define reg_AVX512F		ebx
bca718
+# define reg_AVX512DQ		ebx
bca718
+# define reg_RTM		ebx
bca718
+# define reg_FMA		ecx
bca718
+# define reg_FMA4		ecx
bca718
+# define reg_POPCOUNT		ecx
bca718
+# define reg_OSXSAVE		ecx
bca718
+
bca718
+# define index_Fast_Rep_String		FEATURE_INDEX_1
bca718
+# define index_Fast_Copy_Backward	FEATURE_INDEX_1
bca718
+# define index_Slow_BSF			FEATURE_INDEX_1
bca718
+# define index_Prefer_SSE_for_memop	FEATURE_INDEX_1
bca718
+# define index_Fast_Unaligned_Load	FEATURE_INDEX_1
bca718
+# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
bca718
+# define index_AVX_Usable		FEATURE_INDEX_1
bca718
+# define index_FMA_Usable		FEATURE_INDEX_1
bca718
+# define index_FMA4_Usable		FEATURE_INDEX_1
bca718
+# define index_Slow_SSE4_2		FEATURE_INDEX_1
bca718
+# define index_AVX2_Usable		FEATURE_INDEX_1
bca718
+# define index_AVX_Fast_Unaligned_Load	FEATURE_INDEX_1
bca718
+# define index_AVX512F_Usable		FEATURE_INDEX_1
bca718
+# define index_AVX512DQ_Usable		FEATURE_INDEX_1
bca718
+
bca718
+#endif	/* !__ASSEMBLER__ */
bca718
+
bca718
+#endif  /* cpu_features_h */
bca718
Index: glibc-2.17-c758a686/sysdeps/x86/dl-get-cpu-features.c
bca718
===================================================================
bca718
--- /dev/null
bca718
+++ glibc-2.17-c758a686/sysdeps/x86/dl-get-cpu-features.c
bca718
@@ -0,0 +1,27 @@
bca718
+/* This file is part of the GNU C Library.
bca718
+   Copyright (C) 2015 Free Software Foundation, Inc.
bca718
+
bca718
+   The GNU C Library is free software; you can redistribute it and/or
bca718
+   modify it under the terms of the GNU Lesser General Public
bca718
+   License as published by the Free Software Foundation; either
bca718
+   version 2.1 of the License, or (at your option) any later version.
bca718
+
bca718
+   The GNU C Library is distributed in the hope that it will be useful,
bca718
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
bca718
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
bca718
+   Lesser General Public License for more details.
bca718
+
bca718
+   You should have received a copy of the GNU Lesser General Public
bca718
+   License along with the GNU C Library; if not, see
bca718
+   <http://www.gnu.org/licenses/>.  */
bca718
+
bca718
+
bca718
+#include <ldsodefs.h>
bca718
+
bca718
+#undef __get_cpu_features
bca718
+
bca718
+const struct cpu_features *
bca718
+__get_cpu_features (void)
bca718
+{
bca718
+  return &GLRO(dl_x86_cpu_features);
bca718
+}
bca718
Index: glibc-2.17-c758a686/sysdeps/x86/libc-start.c
bca718
===================================================================
bca718
--- /dev/null
bca718
+++ glibc-2.17-c758a686/sysdeps/x86/libc-start.c
bca718
@@ -0,0 +1,41 @@
bca718
+/* Copyright (C) 2015 Free Software Foundation, Inc.
bca718
+   This file is part of the GNU C Library.
bca718
+
bca718
+   The GNU C Library is free software; you can redistribute it and/or
bca718
+   modify it under the terms of the GNU Lesser General Public
bca718
+   License as published by the Free Software Foundation; either
bca718
+   version 2.1 of the License, or (at your option) any later version.
bca718
+
bca718
+   The GNU C Library is distributed in the hope that it will be useful,
bca718
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
bca718
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
bca718
+   Lesser General Public License for more details.
bca718
+
bca718
+   You should have received a copy of the GNU Lesser General Public
bca718
+   License along with the GNU C Library; if not, see
bca718
+   <http://www.gnu.org/licenses/>.  */
bca718
+
bca718
+#ifdef SHARED
bca718
+# include <csu/libc-start.c>
bca718
+# else
bca718
+/* The main work is done in the generic function.  */
bca718
+# define LIBC_START_DISABLE_INLINE
bca718
+# define LIBC_START_MAIN generic_start_main
bca718
+# include <csu/libc-start.c>
bca718
+# include <cpu-features.h>
bca718
+# include <cpu-features.c>
bca718
+
bca718
+extern struct cpu_features _dl_x86_cpu_features;
bca718
+
bca718
+int
bca718
+__libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
bca718
+		   int argc, char **argv,
bca718
+		   __typeof (main) init,
bca718
+		   void (*fini) (void),
bca718
+		   void (*rtld_fini) (void), void *stack_end)
bca718
+{
bca718
+  init_cpu_features (&_dl_x86_cpu_features);
bca718
+  return generic_start_main (main, argc, argv, init, fini, rtld_fini,
bca718
+			     stack_end);
bca718
+}
bca718
+#endif
bca718
Index: glibc-2.17-c758a686/sysdeps/x86/rtld-global-offsets.sym
bca718
===================================================================
bca718
--- /dev/null
bca718
+++ glibc-2.17-c758a686/sysdeps/x86/rtld-global-offsets.sym
bca718
@@ -0,0 +1,7 @@
bca718
+#define SHARED 1
bca718
+
bca718
+#include <ldsodefs.h>
bca718
+
bca718
+#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem)
bca718
+
bca718
+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features)
bca718
Index: glibc-2.17-c758a686/sysdeps/x86/tst-get-cpu-features-static.c
bca718
===================================================================
bca718
--- /dev/null
bca718
+++ glibc-2.17-c758a686/sysdeps/x86/tst-get-cpu-features-static.c
bca718
@@ -0,0 +1 @@
bca718
+#include "tst-get-cpu-features.c"
bca718
Index: glibc-2.17-c758a686/sysdeps/x86/tst-get-cpu-features.c
bca718
===================================================================
bca718
--- /dev/null
bca718
+++ glibc-2.17-c758a686/sysdeps/x86/tst-get-cpu-features.c
bca718
@@ -0,0 +1,31 @@
bca718
+/* Test case for x86 __get_cpu_features interface
bca718
+   Copyright (C) 2015 Free Software Foundation, Inc.
bca718
+   This file is part of the GNU C Library.
bca718
+
bca718
+   The GNU C Library is free software; you can redistribute it and/or
bca718
+   modify it under the terms of the GNU Lesser General Public
bca718
+   License as published by the Free Software Foundation; either
bca718
+   version 2.1 of the License, or (at your option) any later version.
bca718
+
bca718
+   The GNU C Library is distributed in the hope that it will be useful,
bca718
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
bca718
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
bca718
+   Lesser General Public License for more details.
bca718
+
bca718
+   You should have received a copy of the GNU Lesser General Public
bca718
+   License along with the GNU C Library; if not, see
bca718
+   <http://www.gnu.org/licenses/>.  */
bca718
+
bca718
+#include <stdlib.h>
bca718
+#include <cpu-features.h>
bca718
+
bca718
+static int
bca718
+do_test (void)
bca718
+{
bca718
+  if (__get_cpu_features ()->kind == arch_kind_unknown)
bca718
+    abort ();
bca718
+  return 0;
bca718
+}
bca718
+
bca718
+#define TEST_FUNCTION do_test ()
bca718
+#include "../../test-skeleton.c"
bca718
Index: glibc-2.17-c758a686/sysdeps/x86_64/cacheinfo.c
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/cacheinfo.c
bca718
+++ glibc-2.17-c758a686/sysdeps/x86_64/cacheinfo.c
bca718
@@ -21,40 +21,11 @@
bca718
 #include <stdlib.h>
bca718
 #include <unistd.h>
bca718
 #include <cpuid.h>
bca718
+#include "multiarch/init-arch.h"
bca718
 
bca718
-#ifndef __cpuid_count
bca718
-/* FIXME: Provide __cpuid_count if it isn't defined.  Copied from gcc
bca718
-   4.4.0.  Remove this if gcc 4.4 is the minimum requirement.  */
bca718
-# if defined(__i386__) && defined(__PIC__)
bca718
-/* %ebx may be the PIC register.  */
bca718
-#  define __cpuid_count(level, count, a, b, c, d)		\
bca718
-  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"			\
bca718
-	   "cpuid\n\t"					\
bca718
-	   "xchg{l}\t{%%}ebx, %1\n\t"			\
bca718
-	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
bca718
-	   : "0" (level), "2" (count))
bca718
-# else
bca718
-#  define __cpuid_count(level, count, a, b, c, d)		\
bca718
-  __asm__ ("cpuid\n\t"					\
bca718
-	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
bca718
-	   : "0" (level), "2" (count))
bca718
-# endif
bca718
-#endif
bca718
-
bca718
-#ifdef USE_MULTIARCH
bca718
-# include "multiarch/init-arch.h"
bca718
-
bca718
-# define is_intel __cpu_features.kind == arch_kind_intel
bca718
-# define is_amd __cpu_features.kind == arch_kind_amd
bca718
-# define max_cpuid __cpu_features.max_cpuid
bca718
-#else
bca718
-  /* This spells out "GenuineIntel".  */
bca718
-# define is_intel \
bca718
-  ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
bca718
-  /* This spells out "AuthenticAMD".  */
bca718
-# define is_amd \
bca718
-  ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
bca718
-#endif
bca718
+#define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel
bca718
+#define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd
bca718
+#define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid
bca718
 
bca718
 static const struct intel_02_cache_info
bca718
 {
bca718
@@ -237,21 +208,8 @@ intel_check_word (int name, unsigned int
bca718
 	      /* Intel reused this value.  For family 15, model 6 it
bca718
 		 specifies the 3rd level cache.  Otherwise the 2nd
bca718
 		 level cache.  */
bca718
-	      unsigned int family;
bca718
-	      unsigned int model;
bca718
-#ifdef USE_MULTIARCH
bca718
-	      family = __cpu_features.family;
bca718
-	      model = __cpu_features.model;
bca718
-#else
bca718
-	      unsigned int eax;
bca718
-	      unsigned int ebx;
bca718
-	      unsigned int ecx;
bca718
-	      unsigned int edx;
bca718
-	      __cpuid (1, eax, ebx, ecx, edx);
bca718
-
bca718
-	      family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
bca718
-	      model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
bca718
-#endif
bca718
+	      unsigned int family = GLRO(dl_x86_cpu_features).family;
bca718
+	      unsigned int model = GLRO(dl_x86_cpu_features).model;
bca718
 
bca718
 	      if (family == 15 && model == 6)
bca718
 		{
bca718
@@ -478,18 +436,6 @@ long int
bca718
 attribute_hidden
bca718
 __cache_sysconf (int name)
bca718
 {
bca718
-#ifdef USE_MULTIARCH
bca718
-  if (__cpu_features.kind == arch_kind_unknown)
bca718
-    __init_cpu_features ();
bca718
-#else
bca718
-  /* Find out what brand of processor.  */
bca718
-  unsigned int max_cpuid;
bca718
-  unsigned int ebx;
bca718
-  unsigned int ecx;
bca718
-  unsigned int edx;
bca718
-  __cpuid (0, max_cpuid, ebx, ecx, edx);
bca718
-#endif
bca718
-
bca718
   if (is_intel)
bca718
     return handle_intel (name, max_cpuid);
bca718
 
bca718
@@ -525,18 +471,6 @@ long int __x86_64_raw_shared_cache_size
bca718
 int __x86_64_prefetchw attribute_hidden;
bca718
 #endif
bca718
 
bca718
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
bca718
-/* Instructions preferred for memory and string routines.
bca718
-
bca718
-  0: Regular instructions
bca718
-  1: MMX instructions
bca718
-  2: SSE2 instructions
bca718
-  3: SSSE3 instructions
bca718
-
bca718
-  */
bca718
-int __x86_64_preferred_memory_instruction attribute_hidden;
bca718
-#endif
bca718
-
bca718
 
bca718
 static void
bca718
 __attribute__((constructor))
bca718
@@ -553,14 +487,6 @@ init_cacheinfo (void)
bca718
   unsigned int level;
bca718
   unsigned int threads = 0;
bca718
 
bca718
-#ifdef USE_MULTIARCH
bca718
-  if (__cpu_features.kind == arch_kind_unknown)
bca718
-    __init_cpu_features ();
bca718
-#else
bca718
-  int max_cpuid;
bca718
-  __cpuid (0, max_cpuid, ebx, ecx, edx);
bca718
-#endif
bca718
-
bca718
   if (is_intel)
bca718
     {
bca718
       data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
bca718
@@ -576,34 +502,13 @@ init_cacheinfo (void)
bca718
 	  shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
bca718
 	}
bca718
 
bca718
-      unsigned int ebx_1;
bca718
-
bca718
-#ifdef USE_MULTIARCH
bca718
-      eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
bca718
-      ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
bca718
-      ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
bca718
-      edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
bca718
-#else
bca718
-      __cpuid (1, eax, ebx_1, ecx, edx);
bca718
-#endif
bca718
-
bca718
-      unsigned int family = (eax >> 8) & 0x0f;
bca718
-      unsigned int model = (eax >> 4) & 0x0f;
bca718
-      unsigned int extended_model = (eax >> 12) & 0xf0;
bca718
-
bca718
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
bca718
-      /* Intel prefers SSSE3 instructions for memory/string routines
bca718
-	 if they are available.  */
bca718
-      if ((ecx & 0x200))
bca718
-	__x86_64_preferred_memory_instruction = 3;
bca718
-      else
bca718
-	__x86_64_preferred_memory_instruction = 2;
bca718
-#endif
bca718
-
bca718
       /* Figure out the number of logical threads that share the
bca718
 	 highest cache level.  */
bca718
       if (max_cpuid >= 4)
bca718
 	{
bca718
+	  unsigned int family = GLRO(dl_x86_cpu_features).family;
bca718
+	  unsigned int model = GLRO(dl_x86_cpu_features).model;
bca718
+
bca718
 	  int i = 0;
bca718
 
bca718
 	  /* Query until desired cache level is enumerated.  */
bca718
@@ -655,7 +560,6 @@ init_cacheinfo (void)
bca718
 	  threads += 1;
bca718
 	  if (threads > 2 && level == 2 && family == 6)
bca718
 	    {
bca718
-	      model += extended_model;
bca718
 	      switch (model)
bca718
 		{
bca718
 		case 0x57:
bca718
@@ -678,7 +582,9 @@ init_cacheinfo (void)
bca718
 	intel_bug_no_cache_info:
bca718
 	  /* Assume that all logical threads share the highest cache level.  */
bca718
 
bca718
-	  threads = (ebx_1 >> 16) & 0xff;
bca718
+	  threads
bca718
+	    = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
bca718
+		>> 16) & 0xff);
bca718
 	}
bca718
 
bca718
       /* Cap usage of highest cache level to the number of supported
bca718
@@ -693,25 +599,6 @@ init_cacheinfo (void)
bca718
       long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
bca718
       shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
bca718
 
bca718
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
bca718
-# ifdef USE_MULTIARCH
bca718
-      eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
bca718
-      ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
bca718
-      ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
bca718
-      edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
bca718
-# else
bca718
-      __cpuid (1, eax, ebx, ecx, edx);
bca718
-# endif
bca718
-
bca718
-      /* AMD prefers SSSE3 instructions for memory/string routines
bca718
-	 if they are avaiable, otherwise it prefers integer
bca718
-	 instructions.  */
bca718
-      if ((ecx & 0x200))
bca718
-	__x86_64_preferred_memory_instruction = 3;
bca718
-      else
bca718
-	__x86_64_preferred_memory_instruction = 0;
bca718
-#endif
bca718
-
bca718
       /* Get maximum extended function. */
bca718
       __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
bca718
 
bca718
Index: glibc-2.17-c758a686/sysdeps/x86_64/dl-machine.h
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/dl-machine.h
bca718
+++ glibc-2.17-c758a686/sysdeps/x86_64/dl-machine.h
bca718
@@ -26,6 +26,7 @@
bca718
 #include <sysdep.h>
bca718
 #include <tls.h>
bca718
 #include <dl-tlsdesc.h>
bca718
+#include <cpu-features.c>
bca718
 
bca718
 /* Return nonzero iff ELF header is compatible with the running host.  */
bca718
 static inline int __attribute__ ((unused))
bca718
@@ -200,6 +201,8 @@ dl_platform_init (void)
bca718
   if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
bca718
     /* Avoid an empty string which would disturb us.  */
bca718
     GLRO(dl_platform) = NULL;
bca718
+
bca718
+  init_cpu_features (&GLRO(dl_x86_cpu_features));
bca718
 }
bca718
 
bca718
 static inline ElfW(Addr)
bca718
Index: glibc-2.17-c758a686/sysdeps/x86_64/dl-procinfo.c
bca718
===================================================================
bca718
--- /dev/null
bca718
+++ glibc-2.17-c758a686/sysdeps/x86_64/dl-procinfo.c
bca718
@@ -0,0 +1,57 @@
bca718
+/* Data for x86-64 version of processor capability information.
bca718
+   Copyright (C) 2015 Free Software Foundation, Inc.
bca718
+   This file is part of the GNU C Library.
bca718
+
bca718
+   The GNU C Library is free software; you can redistribute it and/or
bca718
+   modify it under the terms of the GNU Lesser General Public
bca718
+   License as published by the Free Software Foundation; either
bca718
+   version 2.1 of the License, or (at your option) any later version.
bca718
+
bca718
+   The GNU C Library is distributed in the hope that it will be useful,
bca718
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
bca718
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
bca718
+   Lesser General Public License for more details.
bca718
+
bca718
+   You should have received a copy of the GNU Lesser General Public
bca718
+   License along with the GNU C Library; if not, see
bca718
+   <http://www.gnu.org/licenses/>.  */
bca718
+
bca718
+/* If anything should be added here check whether the size of each string
bca718
+   is still ok with the given array size.
bca718
+
bca718
+   All the #ifdefs in the definitions are quite irritating but
bca718
+   necessary if we want to avoid duplicating the information.  There
bca718
+   are three different modes:
bca718
+
bca718
+   - PROCINFO_DECL is defined.  This means we are only interested in
bca718
+     declarations.
bca718
+
bca718
+   - PROCINFO_DECL is not defined:
bca718
+
bca718
+     + if SHARED is defined the file is included in an array
bca718
+       initializer.  The .element = { ... } syntax is needed.
bca718
+
bca718
+     + if SHARED is not defined a normal array initialization is
bca718
+       needed.
bca718
+  */
bca718
+
bca718
+#ifndef PROCINFO_CLASS
bca718
+# define PROCINFO_CLASS
bca718
+#endif
bca718
+
bca718
+#if !defined PROCINFO_DECL && defined SHARED
bca718
+  ._dl_x86_cpu_features
bca718
+#else
bca718
+PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features
bca718
+#endif
bca718
+#ifndef PROCINFO_DECL
bca718
+= { }
bca718
+#endif
bca718
+#if !defined SHARED || defined PROCINFO_DECL
bca718
+;
bca718
+#else
bca718
+,
bca718
+#endif
bca718
+
bca718
+#undef PROCINFO_DECL
bca718
+#undef PROCINFO_CLASS
bca718
Index: glibc-2.17-c758a686/sysdeps/x86_64/ldsodefs.h
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/ldsodefs.h
bca718
+++ glibc-2.17-c758a686/sysdeps/x86_64/ldsodefs.h
bca718
@@ -20,6 +20,7 @@
bca718
 #define	_X86_64_LDSODEFS_H	1
bca718
 
bca718
 #include <elf.h>
bca718
+#include <cpu-features.h>
bca718
 
bca718
 struct La_x86_64_regs;
bca718
 struct La_x86_64_retval;
bca718
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Makefile
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/Makefile
bca718
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Makefile
bca718
@@ -1,5 +1,4 @@
bca718
 ifeq ($(subdir),csu)
bca718
-aux += init-arch
bca718
 tests += test-multiarch
bca718
 gen-as-const-headers += ifunc-defines.sym
bca718
 endif
bca718
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Versions
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/Versions
bca718
+++ /dev/null
bca718
@@ -1,5 +0,0 @@
bca718
-libc {
bca718
-  GLIBC_PRIVATE {
bca718
-    __get_cpu_features;
bca718
-  }
bca718
-}
bca718
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/cacheinfo.c
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/cacheinfo.c
bca718
+++ /dev/null
bca718
@@ -1,2 +0,0 @@
bca718
-#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
bca718
-#include "../cacheinfo.c"
bca718
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/ifunc-defines.sym
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/ifunc-defines.sym
bca718
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/ifunc-defines.sym
bca718
@@ -4,7 +4,6 @@
bca718
 --
bca718
 
bca718
 CPU_FEATURES_SIZE	sizeof (struct cpu_features)
bca718
-KIND_OFFSET		offsetof (struct cpu_features, kind)
bca718
 CPUID_OFFSET		offsetof (struct cpu_features, cpuid)
bca718
 CPUID_SIZE		sizeof (struct cpuid_registers)
bca718
 CPUID_EAX_OFFSET	offsetof (struct cpuid_registers, eax)
bca718
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/init-arch.c
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/init-arch.c
bca718
+++ /dev/null
bca718
@@ -1,183 +0,0 @@
bca718
-/* Initialize CPU feature data.
bca718
-   This file is part of the GNU C Library.
bca718
-   Copyright (C) 2008-2012 Free Software Foundation, Inc.
bca718
-   Contributed by Ulrich Drepper <drepper@redhat.com>.
bca718
-
bca718
-   The GNU C Library is free software; you can redistribute it and/or
bca718
-   modify it under the terms of the GNU Lesser General Public
bca718
-   License as published by the Free Software Foundation; either
bca718
-   version 2.1 of the License, or (at your option) any later version.
bca718
-
bca718
-   The GNU C Library is distributed in the hope that it will be useful,
bca718
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
bca718
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
bca718
-   Lesser General Public License for more details.
bca718
-
bca718
-   You should have received a copy of the GNU Lesser General Public
bca718
-   License along with the GNU C Library; if not, see
bca718
-   <http://www.gnu.org/licenses/>.  */
bca718
-
bca718
-#include <atomic.h>
bca718
-#include <cpuid.h>
bca718
-#include "init-arch.h"
bca718
-
bca718
-
bca718
-struct cpu_features __cpu_features attribute_hidden;
bca718
-
bca718
-
bca718
-static void
bca718
-get_common_indeces (unsigned int *family, unsigned int *model)
bca718
-{
bca718
-  __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax,
bca718
-	   __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx,
bca718
-	   __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx,
bca718
-	   __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx);
bca718
-
bca718
-  unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
bca718
-  *family = (eax >> 8) & 0x0f;
bca718
-  *model = (eax >> 4) & 0x0f;
bca718
-}
bca718
-
bca718
-
bca718
-void
bca718
-__init_cpu_features (void)
bca718
-{
bca718
-  unsigned int ebx;
bca718
-  unsigned int ecx;
bca718
-  unsigned int edx;
bca718
-  unsigned int family = 0;
bca718
-  unsigned int model = 0;
bca718
-  enum cpu_features_kind kind;
bca718
-
bca718
-  __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx);
bca718
-
bca718
-  /* This spells out "GenuineIntel".  */
bca718
-  if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
bca718
-    {
bca718
-      kind = arch_kind_intel;
bca718
-
bca718
-      get_common_indeces (&family, &model);
bca718
-
bca718
-      /* Intel processors prefer SSE instruction for memory/string
bca718
-	 routines if they are available.  */
bca718
-      __cpu_features.feature[index_Prefer_SSE_for_memop]
bca718
-	|= bit_Prefer_SSE_for_memop;
bca718
-
bca718
-      unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
bca718
-      unsigned int extended_family = (eax >> 20) & 0xff;
bca718
-      unsigned int extended_model = (eax >> 12) & 0xf0;
bca718
-      if (family == 0x0f)
bca718
-	{
bca718
-	  family += extended_family;
bca718
-	  model += extended_model;
bca718
-	}
bca718
-      else if (family == 0x06)
bca718
-	{
bca718
-	  ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
bca718
-	  model += extended_model;
bca718
-	  switch (model)
bca718
-	    {
bca718
-	    case 0x1c:
bca718
-	    case 0x26:
bca718
-	      /* BSF is slow on Atom.  */
bca718
-	      __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF;
bca718
-	      break;
bca718
-
bca718
-	    default:
bca718
-	      /* Unknown family 0x06 processors.  Assuming this is one
bca718
-		 of Core i3/i5/i7 processors if AVX is available.  */
bca718
-	      if ((ecx & bit_AVX) == 0)
bca718
-		break;
bca718
-
bca718
-	    case 0x1a:
bca718
-	    case 0x1e:
bca718
-	    case 0x1f:
bca718
-	    case 0x25:
bca718
-	    case 0x2c:
bca718
-	    case 0x2e:
bca718
-	    case 0x2f:
bca718
-	      /* Rep string instructions, copy backward, unaligned loads
bca718
-		 and pminub are fast on Intel Core i3, i5 and i7.  */
bca718
-#if index_Fast_Rep_String != index_Fast_Copy_Backward
bca718
-# error index_Fast_Rep_String != index_Fast_Copy_Backward
bca718
-#endif
bca718
-#if index_Fast_Rep_String != index_Fast_Unaligned_Load
bca718
-# error index_Fast_Rep_String != index_Fast_Unaligned_Load
bca718
-#endif
bca718
-#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
bca718
-# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
bca718
-#endif
bca718
-	      __cpu_features.feature[index_Fast_Rep_String]
bca718
-		|= (bit_Fast_Rep_String
bca718
-		    | bit_Fast_Copy_Backward
bca718
-		    | bit_Fast_Unaligned_Load
bca718
-		    | bit_Prefer_PMINUB_for_stringop);
bca718
-	      break;
bca718
-	    }
bca718
-	}
bca718
-    }
bca718
-  /* This spells out "AuthenticAMD".  */
bca718
-  else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
bca718
-    {
bca718
-      kind = arch_kind_amd;
bca718
-
bca718
-      get_common_indeces (&family, &model);
bca718
-
bca718
-      ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
bca718
-
bca718
-      /* AMD processors prefer SSE instructions for memory/string routines
bca718
-	 if they are available, otherwise they prefer integer instructions.  */
bca718
-      if ((ecx & 0x200))
bca718
-	__cpu_features.feature[index_Prefer_SSE_for_memop]
bca718
-	  |= bit_Prefer_SSE_for_memop;
bca718
-
bca718
-      unsigned int eax;
bca718
-      __cpuid (0x80000000, eax, ebx, ecx, edx);
bca718
-      if (eax >= 0x80000001)
bca718
-	__cpuid (0x80000001,
bca718
-		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax,
bca718
-		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx,
bca718
-		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx,
bca718
-		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx);
bca718
-    }
bca718
-  else
bca718
-    kind = arch_kind_other;
bca718
-
bca718
-  /* Can we call xgetbv?  */
bca718
-  if (CPUID_OSXSAVE)
bca718
-    {
bca718
-      unsigned int xcrlow;
bca718
-      unsigned int xcrhigh;
bca718
-      asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
bca718
-      /* Is YMM and XMM state usable?  */
bca718
-      if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
bca718
-	  (bit_YMM_state | bit_XMM_state))
bca718
-	{
bca718
-	  /* Determine if AVX is usable.  */
bca718
-	  if (CPUID_AVX)
bca718
-	    __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
bca718
-	  /* Determine if FMA is usable.  */
bca718
-	  if (CPUID_FMA)
bca718
-	    __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable;
bca718
-	  /* Determine if FMA4 is usable.  */
bca718
-	  if (CPUID_FMA4)
bca718
-	    __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable;
bca718
-	}
bca718
-    }
bca718
-
bca718
-  __cpu_features.family = family;
bca718
-  __cpu_features.model = model;
bca718
-  atomic_write_barrier ();
bca718
-  __cpu_features.kind = kind;
bca718
-}
bca718
-
bca718
-#undef __get_cpu_features
bca718
-
bca718
-const struct cpu_features *
bca718
-__get_cpu_features (void)
bca718
-{
bca718
-  if (__cpu_features.kind == arch_kind_unknown)
bca718
-    __init_cpu_features ();
bca718
-
bca718
-  return &__cpu_features;
bca718
-}
bca718
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/init-arch.h
bca718
===================================================================
bca718
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/init-arch.h
bca718
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/init-arch.h
bca718
@@ -15,183 +15,8 @@
bca718
    License along with the GNU C Library; if not, see
bca718
    <http://www.gnu.org/licenses/>.  */
bca718
 
bca718
-#define bit_Fast_Rep_String		(1 << 0)
bca718
-#define bit_Fast_Copy_Backward		(1 << 1)
bca718
-#define bit_Slow_BSF			(1 << 2)
bca718
-#define bit_Prefer_SSE_for_memop	(1 << 3)
bca718
-#define bit_Fast_Unaligned_Load		(1 << 4)
bca718
-#define bit_Prefer_PMINUB_for_stringop	(1 << 5)
bca718
-#define bit_AVX_Usable			(1 << 6)
bca718
-#define bit_FMA_Usable			(1 << 7)
bca718
-#define bit_FMA4_Usable			(1 << 8)
bca718
-
bca718
-/* CPUID Feature flags.  */
bca718
-#define bit_SSE2	(1 << 26)
bca718
-#define bit_SSSE3	(1 << 9)
bca718
-#define bit_SSE4_1	(1 << 19)
bca718
-#define bit_SSE4_2	(1 << 20)
bca718
-#define bit_OSXSAVE	(1 << 27)
bca718
-#define bit_AVX		(1 << 28)
bca718
-#define bit_POPCOUNT	(1 << 23)
bca718
-#define bit_FMA		(1 << 12)
bca718
-#define bit_FMA4	(1 << 16)
bca718
-
bca718
-/* XCR0 Feature flags.  */
bca718
-#define bit_XMM_state  (1 << 1)
bca718
-#define bit_YMM_state  (2 << 1)
bca718
-
bca718
-#ifdef	__ASSEMBLER__
bca718
-
bca718
-# include <ifunc-defines.h>
bca718
-
bca718
-# define index_SSE2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
bca718
-# define index_SSSE3	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
bca718
-# define index_SSE4_1	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
bca718
-# define index_SSE4_2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
bca718
-# define index_AVX	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
bca718
-
bca718
-# define index_Fast_Rep_String		FEATURE_INDEX_1*FEATURE_SIZE
bca718
-# define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
bca718
-# define index_Slow_BSF			FEATURE_INDEX_1*FEATURE_SIZE
bca718
-# define index_Prefer_SSE_for_memop	FEATURE_INDEX_1*FEATURE_SIZE
bca718
-# define index_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
bca718
-# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
bca718
-# define index_AVX_Usable		FEATURE_INDEX_1*FEATURE_SIZE
bca718
-# define index_FMA_Usable		FEATURE_INDEX_1*FEATURE_SIZE
bca718
-# define index_FMA4_Usable		FEATURE_INDEX_1*FEATURE_SIZE
bca718
-
bca718
-#else	/* __ASSEMBLER__ */
bca718
-
bca718
-# include <sys/param.h>
bca718
-# include <sys/types.h>
bca718
-# include <sysdep.h>
bca718
-# include <stdbool.h>
bca718
-
bca718
-/* Ugly hack to make it possible to select a strstr and strcasestr
bca718
-   implementation that avoids using the stack for 16-byte aligned
bca718
-   SSE temporaries.  Doing so makes it possible to call the functions
bca718
-   with a stack that's not 16-byte aligned as can happen, for example,
bca718
-   as a result of compiling the functions' callers with the GCC
bca718
-   -mpreferred-stack-boubdary=2 or =3 option, or with the ICC
bca718
-   -falign-stack=assume-4-byte option.  See rhbz 1150282 for details.
bca718
-
bca718
-   The ifunc selector uses the unaligned version by default if this
bca718
-   file exists and is accessible.  */
bca718
-# define ENABLE_STRSTR_UNALIGNED_PATHNAME \
bca718
-    "/etc/sysconfig/64bit_strstr_via_64bit_strstr_sse2_unaligned"
bca718
-
bca718
-static bool __attribute__ ((unused))
bca718
-use_unaligned_strstr (void)
bca718
-{
bca718
-  struct stat unaligned_strstr_etc_sysconfig_file;
bca718
-
bca718
-  /* TLS may not have been set up yet, so avoid using stat since it tries to
bca718
-     set errno.  */
bca718
-  return INTERNAL_SYSCALL (stat, , 2,
bca718
-                           ENABLE_STRSTR_UNALIGNED_PATHNAME,
bca718
-                           &unaligned_strstr_etc_sysconfig_file) == 0;
bca718
-}
bca718
-
bca718
-enum
bca718
-  {
bca718
-    COMMON_CPUID_INDEX_1 = 0,
bca718
-    COMMON_CPUID_INDEX_80000001,	/* for AMD */
bca718
-    /* Keep the following line at the end.  */
bca718
-    COMMON_CPUID_INDEX_MAX
bca718
-  };
bca718
-
bca718
-enum
bca718
-  {
bca718
-    FEATURE_INDEX_1 = 0,
bca718
-    /* Keep the following line at the end.  */
bca718
-    FEATURE_INDEX_MAX
bca718
-  };
bca718
-
bca718
-extern struct cpu_features
bca718
-{
bca718
-  enum cpu_features_kind
bca718
-    {
bca718
-      arch_kind_unknown = 0,
bca718
-      arch_kind_intel,
bca718
-      arch_kind_amd,
bca718
-      arch_kind_other
bca718
-    } kind;
bca718
-  int max_cpuid;
bca718
-  struct cpuid_registers
bca718
-  {
bca718
-    unsigned int eax;
bca718
-    unsigned int ebx;
bca718
-    unsigned int ecx;
bca718
-    unsigned int edx;
bca718
-  } cpuid[COMMON_CPUID_INDEX_MAX];
bca718
-  unsigned int family;
bca718
-  unsigned int model;
bca718
-  unsigned int feature[FEATURE_INDEX_MAX];
bca718
-} __cpu_features attribute_hidden;
bca718
-
bca718
-
bca718
-extern void __init_cpu_features (void) attribute_hidden;
bca718
-# define INIT_ARCH() \
bca718
-  do							\
bca718
-    if (__cpu_features.kind == arch_kind_unknown)	\
bca718
-      __init_cpu_features ();				\
bca718
-  while (0)
bca718
-
bca718
-/* Used from outside libc.so to get access to the CPU features structure.  */
bca718
-extern const struct cpu_features *__get_cpu_features (void)
bca718
-     __attribute__ ((const));
bca718
-
bca718
-# if IS_IN (libc)
bca718
-#  define __get_cpu_features()	(&__cpu_features)
bca718
-# endif
bca718
-
bca718
-# define HAS_CPU_FEATURE(idx, reg, bit) \
bca718
-  ((__get_cpu_features ()->cpuid[idx].reg & (bit)) != 0)
bca718
-
bca718
-/* Following are the feature tests used throughout libc.  */
bca718
-
bca718
-/* CPUID_* evaluates to true if the feature flag is enabled.
bca718
-   We always use &__cpu_features because the HAS_CPUID_* macros
bca718
-   are called only within __init_cpu_features, where we can't
bca718
-   call __get_cpu_features without infinite recursion.  */
bca718
-# define HAS_CPUID_FLAG(idx, reg, bit) \
bca718
-  (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0)
bca718
-
bca718
-# define CPUID_OSXSAVE \
bca718
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE)
bca718
-# define CPUID_AVX \
bca718
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
bca718
-# define CPUID_FMA \
bca718
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
bca718
-# define CPUID_FMA4 \
bca718
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
bca718
-
bca718
-/* HAS_* evaluates to true if we may use the feature at runtime.  */
bca718
-# define HAS_SSE2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
bca718
-# define HAS_POPCOUNT	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT)
bca718
-# define HAS_SSSE3	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
bca718
-# define HAS_SSE4_1	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
bca718
-# define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
bca718
-
bca718
-# define index_Fast_Rep_String		FEATURE_INDEX_1
bca718
-# define index_Fast_Copy_Backward	FEATURE_INDEX_1
bca718
-# define index_Slow_BSF			FEATURE_INDEX_1
bca718
-# define index_Prefer_SSE_for_memop	FEATURE_INDEX_1
bca718
-# define index_Fast_Unaligned_Load	FEATURE_INDEX_1
bca718
-# define index_AVX_Usable		FEATURE_INDEX_1
bca718
-# define index_FMA_Usable		FEATURE_INDEX_1
bca718
-# define index_FMA4_Usable		FEATURE_INDEX_1
bca718
-
bca718
-# define HAS_ARCH_FEATURE(name) \
bca718
-  ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
bca718
-
bca718
-# define HAS_FAST_REP_STRING		HAS_ARCH_FEATURE (Fast_Rep_String)
bca718
-# define HAS_FAST_COPY_BACKWARD		HAS_ARCH_FEATURE (Fast_Copy_Backward)
bca718
-# define HAS_SLOW_BSF			HAS_ARCH_FEATURE (Slow_BSF)
bca718
-# define HAS_PREFER_SSE_FOR_MEMOP	HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
bca718
-# define HAS_FAST_UNALIGNED_LOAD	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
bca718
-# define HAS_AVX			HAS_ARCH_FEATURE (AVX_Usable)
bca718
-# define HAS_FMA			HAS_ARCH_FEATURE (FMA_Usable)
bca718
-# define HAS_FMA4			HAS_ARCH_FEATURE (FMA4_Usable)
bca718
-
bca718
-#endif	/* __ASSEMBLER__ */
bca718
+#ifdef  __ASSEMBLER__
bca718
+# include <cpu-features.h>
bca718
+#else
bca718
+# include <ldsodefs.h>
bca718
+#endif