diff --git a/SOURCES/glibc-rh1504969.patch b/SOURCES/glibc-rh1504969.patch
new file mode 100644
index 0000000..d6de9fe
--- /dev/null
+++ b/SOURCES/glibc-rh1504969.patch
@@ -0,0 +1,781 @@
+Backport from Hongjiu Lu <hongjiu.lu@intel.com> of these upstream
+commits:
+
+commit b52b0d793dcb226ecb0ecca1e672ca265973233c
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Fri Oct 20 11:00:08 2017 -0700
+
+    x86-64: Use fxsave/xsave/xsavec in _dl_runtime_resolve [BZ #21265]
+    
+    In _dl_runtime_resolve, use fxsave/xsave/xsavec to preserve all vector,
+    mask and bound registers.  It simplifies _dl_runtime_resolve and supports
+    different calling conventions.  ld.so code size is reduced by more than
+    1 KB.  However, use fxsave/xsave/xsavec takes a little bit more cycles
+    than saving and restoring vector and bound registers individually.
+    
+    Latency for _dl_runtime_resolve to lookup the function, foo, from one
+    shared library plus libc.so:
+    
+                                 Before    After     Change
+    
+    Westmere (SSE)/fxsave         345      866       151%
+    IvyBridge (AVX)/xsave         420      643       53%
+    Haswell (AVX)/xsave           713      1252      75%
+    Skylake (AVX+MPX)/xsavec      559      719       28%
+    Skylake (AVX512+MPX)/xsavec   145      272       87%
+    Ryzen (AVX)/xsavec            280      553       97%
+    
+    This is the worst case where portion of time spent for saving and
+    restoring registers is bigger than majority of cases.  With smaller
+    _dl_runtime_resolve code size, overall performance impact is negligible.
+    
+    On IvyBridge, differences in build and test time of binutils with lazy
+    binding GCC and binutils are noises.  On Westmere, differences in
+    bootstrap and "makc check" time of GCC 7 with lazy binding GCC and
+    binutils are also noises.
+
+commit 0ac8ee53e8efbfd6e1c37094b4653f5c2dad65b5
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Fri Aug 26 08:57:42 2016 -0700
+
+    X86-64: Correct CFA in _dl_runtime_resolve
+    
+    When stack is re-aligned in _dl_runtime_resolve, there is no need to
+    adjust CFA when allocating register save area on stack.
+    
+            * sysdeps/x86_64/dl-trampoline.h (_dl_runtime_resolve): Don't
+            adjust CFA when allocating register save area on re-aligned
+            stack.
+
+Storing the full xsave state size in xsave_state_full_size was not needed
+because RHEL7 does not have the full tunables support that would use this,
+therefore support for xsave_state_full_size has been removed from the
+changes in b52b0d793dcb226ecb0ecca1e672ca265973233c
+
+diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
+index a9d53d195f9eb609..1415005fc22be806 100644
+--- a/sysdeps/x86/cpu-features-offsets.sym
++++ b/sysdeps/x86/cpu-features-offsets.sym
+@@ -5,3 +5,5 @@
+ #define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem)
+ 
+ RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features)
++
++XSAVE_STATE_SIZE_OFFSET	offsetof (struct cpu_features, xsave_state_size)
+diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
+index 17e9835f5716ca12..c9bb4fa6f524ba4e 100644
+--- a/sysdeps/x86/cpu-features.c
++++ b/sysdeps/x86/cpu-features.c
+@@ -18,6 +18,7 @@
+ 
+ #include <cpuid.h>
+ #include <cpu-features.h>
++#include <libc-internal.h>
+ 
+ static inline void
+ get_common_indeces (struct cpu_features *cpu_features,
+@@ -148,20 +149,6 @@ init_cpu_features (struct cpu_features *cpu_features)
+ 	      break;
+ 	    }
+ 	}
+-
+-      /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow.
+-         If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt.  */
+-      cpu_features->feature[index_Use_dl_runtime_resolve_slow]
+-	|= bit_Use_dl_runtime_resolve_slow;
+-      if (cpu_features->max_cpuid >= 0xd)
+-	{
+-	  unsigned int eax;
+-
+-	  __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
+-	  if ((eax & (1 << 2)) != 0)
+-	    cpu_features->feature[index_Use_dl_runtime_resolve_opt]
+-	      |= bit_Use_dl_runtime_resolve_opt;
+-	}
+     }
+   /* This spells out "AuthenticAMD".  */
+   else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
+@@ -243,6 +230,71 @@ init_cpu_features (struct cpu_features *cpu_features)
+ 	  /* Determine if FMA4 is usable.  */
+ 	  if (HAS_CPU_FEATURE (FMA4))
+ 	    cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable;
++
++	  /* For _dl_runtime_resolve, set xsave_state_size to xsave area
++	     size + integer register save size and align it to 64 bytes.  */
++	  if (cpu_features->max_cpuid >= 0xd)
++	    {
++	      unsigned int eax, ebx, ecx, edx;
++
++	      __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
++	      if (ebx != 0)
++		{
++		  cpu_features->xsave_state_size
++		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
++
++		  __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
++
++		  /* Check if XSAVEC is available.  */
++		  if ((eax & (1 << 1)) != 0)
++		    {
++		      unsigned int xstate_comp_offsets[32];
++		      unsigned int xstate_comp_sizes[32];
++		      unsigned int i;
++
++		      xstate_comp_offsets[0] = 0;
++		      xstate_comp_offsets[1] = 160;
++		      xstate_comp_offsets[2] = 576;
++		      xstate_comp_sizes[0] = 160;
++		      xstate_comp_sizes[1] = 256;
++
++		      for (i = 2; i < 32; i++)
++			{
++			  if ((STATE_SAVE_MASK & (1 << i)) != 0)
++			    {
++			      __cpuid_count (0xd, i, eax, ebx, ecx, edx);
++			      xstate_comp_sizes[i] = eax;
++			    }
++			  else
++			    {
++			      ecx = 0;
++			      xstate_comp_sizes[i] = 0;
++			    }
++
++			  if (i > 2)
++			    {
++			      xstate_comp_offsets[i]
++				= (xstate_comp_offsets[i - 1]
++				   + xstate_comp_sizes[i -1]);
++			      if ((ecx & (1 << 1)) != 0)
++				xstate_comp_offsets[i]
++			      = ALIGN_UP (xstate_comp_offsets[i], 64);
++			    }
++			}
++
++		      /* Use XSAVEC.  */
++		      unsigned int size
++			= xstate_comp_offsets[31] + xstate_comp_sizes[31];
++		      if (size)
++			{
++			  cpu_features->xsave_state_size
++			    = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
++			  cpu_features->feature[index_XSAVEC_Usable]
++			    |= bit_XSAVEC_Usable;
++			}
++		    }
++		}
++	    }
+ 	}
+     }
+ 
+diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
+index c69abb349af8f09c..4e2e6fabb39ab600 100644
+--- a/sysdeps/x86/cpu-features.h
++++ b/sysdeps/x86/cpu-features.h
+@@ -34,8 +34,7 @@
+ #define bit_AVX512DQ_Usable		(1 << 13)
+ #define bit_Prefer_MAP_32BIT_EXEC	(1 << 16)
+ #define bit_Prefer_No_VZEROUPPER	(1 << 17)
+-#define bit_Use_dl_runtime_resolve_opt	(1 << 20)
+-#define bit_Use_dl_runtime_resolve_slow	(1 << 21)
++#define bit_XSAVEC_Usable		(1 << 18)
+ 
+ 
+ /* CPUID Feature flags.  */
+@@ -70,10 +69,20 @@
+ /* The current maximum size of the feature integer bit array.  */
+ #define FEATURE_INDEX_MAX 1
+ 
++/* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
++   space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
++   aligned to 16 bytes for fxsave and 64 bytes for xsave.  */
++#define STATE_SAVE_OFFSET (8 * 7 + 8)
++
++/* Save SSE, AVX, AVX512, mask and bound registers.  */
++#define STATE_SAVE_MASK \
++  ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
++
+ #ifdef	__ASSEMBLER__
+ 
+ # include <ifunc-defines.h>
+ # include <rtld-global-offsets.h>
++# include <cpu-features-offsets.h>
+ 
+ # define index_SSE2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
+ # define index_SSSE3	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+@@ -98,8 +107,6 @@
+ # define index_AVX512DQ_Usable		FEATURE_INDEX_1*FEATURE_SIZE
+ # define index_Prefer_MAP_32BIT_EXEC	FEATURE_INDEX_1*FEATURE_SIZE
+ # define index_Prefer_No_VZEROUPPER	FEATURE_INDEX_1*FEATURE_SIZE
+-# define index_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE
+-# define index_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE
+ 
+ 
+ # if defined (_LIBC) && !IS_IN (nonlib)
+@@ -214,6 +221,12 @@ struct cpu_features
+   } cpuid[COMMON_CPUID_INDEX_MAX];
+   unsigned int family;
+   unsigned int model;
++  /* The type must be unsigned long int so that we use
++
++	sub xsave_state_size_offset(%rip) %RSP_LP
++
++     in _dl_runtime_resolve.  */
++  unsigned long int xsave_state_size;
+   unsigned int feature[FEATURE_INDEX_MAX];
+ };
+ 
+@@ -279,8 +292,7 @@ extern const struct cpu_features *__get_cpu_features (void)
+ # define index_AVX512DQ_Usable		FEATURE_INDEX_1
+ # define index_Prefer_MAP_32BIT_EXEC	FEATURE_INDEX_1
+ # define index_Prefer_No_VZEROUPPER     FEATURE_INDEX_1
+-# define index_Use_dl_runtime_resolve_opt FEATURE_INDEX_1
+-# define index_Use_dl_runtime_resolve_slow FEATURE_INDEX_1
++# define index_XSAVEC_Usable		FEATURE_INDEX_1
+ 
+ #endif	/* !__ASSEMBLER__ */
+ 
+diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
+index 2a4cda1aff57db98..da89f2a6174a0d94 100644
+--- a/sysdeps/x86_64/dl-machine.h
++++ b/sysdeps/x86_64/dl-machine.h
+@@ -66,12 +66,9 @@ static inline int __attribute__ ((unused, always_inline))
+ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+ {
+   Elf64_Addr *got;
+-  extern void _dl_runtime_resolve_sse (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_avx (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_avx_slow (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_avx_opt (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_avx512 (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_avx512_opt (ElfW(Word)) attribute_hidden;
++  extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
++  extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
++  extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
+   extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
+   extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
+   extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
+@@ -120,29 +117,14 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+ 	  /* This function will get called to fix up the GOT entry
+ 	     indicated by the offset on the stack, and then jump to
+ 	     the resolved address.  */
+-	  if (HAS_ARCH_FEATURE (AVX512F_Usable))
+-	    {
+-	      if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt))
+-		*(ElfW(Addr) *) (got + 2)
+-		  = (ElfW(Addr)) &_dl_runtime_resolve_avx512_opt;
+-	      else
+-		*(ElfW(Addr) *) (got + 2)
+-		  = (ElfW(Addr)) &_dl_runtime_resolve_avx512;
+-	    }
+-	  else if (HAS_ARCH_FEATURE (AVX_Usable))
+-	    {
+-	      if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt))
+-		*(ElfW(Addr) *) (got + 2)
+-		  = (ElfW(Addr)) &_dl_runtime_resolve_avx_opt;
+-	      else if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_slow))
+-		*(ElfW(Addr) *) (got + 2)
+-		  = (ElfW(Addr)) &_dl_runtime_resolve_avx_slow;
+-	      else
+-		*(ElfW(Addr) *) (got + 2)
+-		  = (ElfW(Addr)) &_dl_runtime_resolve_avx;
+-	    }
++	  if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
++	    *(ElfW(Addr) *) (got + 2)
++	      = (HAS_ARCH_FEATURE (XSAVEC_Usable)
++		 ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
++		 : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
+ 	  else
+-	    *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_sse;
++	    *(ElfW(Addr) *) (got + 2)
++	      = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
+ 	}
+     }
+ 
+diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
+index bd2d72edfea406e5..215a314f06ca874c 100644
+--- a/sysdeps/x86_64/dl-trampoline.S
++++ b/sysdeps/x86_64/dl-trampoline.S
+@@ -34,37 +34,24 @@
+ # define DL_STACK_ALIGNMENT 8
+ #endif
+ 
+-#ifndef DL_RUNIME_UNALIGNED_VEC_SIZE
+-/* The maximum size of unaligned vector load and store.  */
+-# define DL_RUNIME_UNALIGNED_VEC_SIZE 16
+-#endif
+-
+-/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes.  */
+-#define DL_RUNIME_RESOLVE_REALIGN_STACK \
+-  (VEC_SIZE > DL_STACK_ALIGNMENT \
+-   && VEC_SIZE > DL_RUNIME_UNALIGNED_VEC_SIZE)
+-
+-/* Align vector register save area to 16 bytes.  */
+-#define REGISTER_SAVE_VEC_OFF	0
++/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
++   stack to 16 bytes before calling _dl_fixup.  */
++#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
++  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
++   || 16 > DL_STACK_ALIGNMENT)
+ 
+ /* Area on stack to save and restore registers used for parameter
+    passing when calling _dl_fixup.  */
+ #ifdef __ILP32__
+-# define REGISTER_SAVE_RAX	(REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
+ # define PRESERVE_BND_REGS_PREFIX
+ #else
+-/* Align bound register save area to 16 bytes.  */
+-# define REGISTER_SAVE_BND0	(REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
+-# define REGISTER_SAVE_BND1	(REGISTER_SAVE_BND0 + 16)
+-# define REGISTER_SAVE_BND2	(REGISTER_SAVE_BND1 + 16)
+-# define REGISTER_SAVE_BND3	(REGISTER_SAVE_BND2 + 16)
+-# define REGISTER_SAVE_RAX	(REGISTER_SAVE_BND3 + 16)
+ # ifdef HAVE_MPX_SUPPORT
+ #  define PRESERVE_BND_REGS_PREFIX bnd
+ # else
+ #  define PRESERVE_BND_REGS_PREFIX .byte 0xf2
+ # endif
+ #endif
++#define REGISTER_SAVE_RAX	0
+ #define REGISTER_SAVE_RCX	(REGISTER_SAVE_RAX + 8)
+ #define REGISTER_SAVE_RDX	(REGISTER_SAVE_RCX + 8)
+ #define REGISTER_SAVE_RSI	(REGISTER_SAVE_RDX + 8)
+@@ -72,71 +59,60 @@
+ #define REGISTER_SAVE_R8	(REGISTER_SAVE_RDI + 8)
+ #define REGISTER_SAVE_R9	(REGISTER_SAVE_R8 + 8)
+ 
++#define RESTORE_AVX
++
+ #define VEC_SIZE		64
+ #define VMOVA			vmovdqa64
+-#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+-# define VMOV			vmovdqa64
+-#else
+-# define VMOV			vmovdqu64
+-#endif
+ #define VEC(i)			zmm##i
+-#define _dl_runtime_resolve	_dl_runtime_resolve_avx512
+ #define _dl_runtime_profile	_dl_runtime_profile_avx512
+-#define RESTORE_AVX
+ #include "dl-trampoline.h"
+-#undef _dl_runtime_resolve
+ #undef _dl_runtime_profile
+ #undef VEC
+-#undef VMOV
+ #undef VMOVA
+ #undef VEC_SIZE
+ 
+ #define VEC_SIZE		32
+ #define VMOVA			vmovdqa
+-#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+-# define VMOV			vmovdqa
+-#else
+-# define VMOV			vmovdqu
+-#endif
+ #define VEC(i)			ymm##i
+-#define _dl_runtime_resolve	_dl_runtime_resolve_avx
+-#define _dl_runtime_resolve_opt	_dl_runtime_resolve_avx_opt
+ #define _dl_runtime_profile	_dl_runtime_profile_avx
+ #include "dl-trampoline.h"
+-#undef _dl_runtime_resolve
+-#undef _dl_runtime_resolve_opt
+ #undef _dl_runtime_profile
+ #undef VEC
+-#undef VMOV
+ #undef VMOVA
+ #undef VEC_SIZE
+ 
+ /* movaps/movups is 1-byte shorter.  */
+ #define VEC_SIZE		16
+ #define VMOVA			movaps
+-#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+-# define VMOV			movaps
+-#else
+-# define VMOV			movups
+- #endif
+ #define VEC(i)			xmm##i
+-#define _dl_runtime_resolve	_dl_runtime_resolve_sse
+ #define _dl_runtime_profile	_dl_runtime_profile_sse
+ #undef RESTORE_AVX
+ #include "dl-trampoline.h"
+-#undef _dl_runtime_resolve
+ #undef _dl_runtime_profile
+-#undef VMOV
++#undef VEC
+ #undef VMOVA
++#undef VEC_SIZE
+ 
+-/* Used by _dl_runtime_resolve_avx_opt/_dl_runtime_resolve_avx512_opt
+-   to preserve the full vector registers with zero upper bits.  */
+-#define VMOVA			vmovdqa
+-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+-# define VMOV			vmovdqa
+-#else
+-# define VMOV			vmovdqu
+-#endif
+-#define _dl_runtime_resolve	_dl_runtime_resolve_sse_vex
+-#define _dl_runtime_resolve_opt	_dl_runtime_resolve_avx512_opt
++#define USE_FXSAVE
++#define STATE_SAVE_ALIGNMENT	16
++#define _dl_runtime_resolve	_dl_runtime_resolve_fxsave
+ #include "dl-trampoline.h"
++#undef _dl_runtime_resolve
++#undef USE_FXSAVE
++#undef STATE_SAVE_ALIGNMENT
++
++#define USE_XSAVE
++#define STATE_SAVE_ALIGNMENT	64
++#define _dl_runtime_resolve	_dl_runtime_resolve_xsave
++#include "dl-trampoline.h"
++#undef _dl_runtime_resolve
++#undef USE_XSAVE
++#undef STATE_SAVE_ALIGNMENT
++
++#define USE_XSAVEC
++#define STATE_SAVE_ALIGNMENT	64
++#define _dl_runtime_resolve	_dl_runtime_resolve_xsavec
++#include "dl-trampoline.h"
++#undef _dl_runtime_resolve
++#undef USE_XSAVEC
++#undef STATE_SAVE_ALIGNMENT
+diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
+index 849cab4cd30e122a..525de575e3c4e52c 100644
+--- a/sysdeps/x86_64/dl-trampoline.h
++++ b/sysdeps/x86_64/dl-trampoline.h
+@@ -16,140 +16,47 @@
+    License along with the GNU C Library; if not, see
+    <http://www.gnu.org/licenses/>.  */
+ 
+-#undef REGISTER_SAVE_AREA_RAW
+-#ifdef __ILP32__
+-/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to
+-   VEC7.  */
+-# define REGISTER_SAVE_AREA_RAW	(8 * 7 + VEC_SIZE * 8)
+-#else
+-/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as
+-   BND0, BND1, BND2, BND3 and VEC0 to VEC7. */
+-# define REGISTER_SAVE_AREA_RAW	(8 * 7 + 16 * 4 + VEC_SIZE * 8)
+-#endif
++	.text
++#ifdef _dl_runtime_resolve
+ 
+-#undef REGISTER_SAVE_AREA
+-#undef LOCAL_STORAGE_AREA
+-#undef BASE
+-#if DL_RUNIME_RESOLVE_REALIGN_STACK
+-# define REGISTER_SAVE_AREA	(REGISTER_SAVE_AREA_RAW + 8)
+-/* Local stack area before jumping to function address: RBX.  */
+-# define LOCAL_STORAGE_AREA	8
+-# define BASE			rbx
+-# if (REGISTER_SAVE_AREA % VEC_SIZE) != 0
+-#  error REGISTER_SAVE_AREA must be multples of VEC_SIZE
+-# endif
+-#else
+-# define REGISTER_SAVE_AREA	REGISTER_SAVE_AREA_RAW
+-/* Local stack area before jumping to function address:  All saved
+-   registers.  */
+-# define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
+-# define BASE			rsp
+-# if (REGISTER_SAVE_AREA % 16) != 8
+-#  error REGISTER_SAVE_AREA must be odd multples of 8
++# undef REGISTER_SAVE_AREA
++# undef LOCAL_STORAGE_AREA
++# undef BASE
++
++# if (STATE_SAVE_ALIGNMENT % 16) != 0
++#  error STATE_SAVE_ALIGNMENT must be multples of 16
+ # endif
+-#endif
+ 
+-	.text
+-#ifdef _dl_runtime_resolve_opt
+-/* Use the smallest vector registers to preserve the full YMM/ZMM
+-   registers to avoid SSE transition penalty.  */
+-
+-# if VEC_SIZE == 32
+-/* Check if the upper 128 bits in %ymm0 - %ymm7 registers are non-zero
+-   and preserve %xmm0 - %xmm7 registers with the zero upper bits.  Since
+-   there is no SSE transition penalty on AVX512 processors which don't
+-   support XGETBV with ECX == 1, _dl_runtime_resolve_avx512_slow isn't
+-   provided.   */
+-	.globl _dl_runtime_resolve_avx_slow
+-	.hidden _dl_runtime_resolve_avx_slow
+-	.type _dl_runtime_resolve_avx_slow, @function
+-	.align 16
+-_dl_runtime_resolve_avx_slow:
+-	cfi_startproc
+-	cfi_adjust_cfa_offset(16) # Incorporate PLT
+-	vorpd %ymm0, %ymm1, %ymm8
+-	vorpd %ymm2, %ymm3, %ymm9
+-	vorpd %ymm4, %ymm5, %ymm10
+-	vorpd %ymm6, %ymm7, %ymm11
+-	vorpd %ymm8, %ymm9, %ymm9
+-	vorpd %ymm10, %ymm11, %ymm10
+-	vpcmpeqd %xmm8, %xmm8, %xmm8
+-	vorpd %ymm9, %ymm10, %ymm10
+-	vptest %ymm10, %ymm8
+-	# Preserve %ymm0 - %ymm7 registers if the upper 128 bits of any
+-	# %ymm0 - %ymm7 registers aren't zero.
+-	PRESERVE_BND_REGS_PREFIX
+-	jnc _dl_runtime_resolve_avx
+-	# Use vzeroupper to avoid SSE transition penalty.
+-	vzeroupper
+-	# Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits
+-	# when the upper 128 bits of %ymm0 - %ymm7 registers are zero.
+-	PRESERVE_BND_REGS_PREFIX
+-	jmp _dl_runtime_resolve_sse_vex
+-	cfi_adjust_cfa_offset(-16) # Restore PLT adjustment
+-	cfi_endproc
+-	.size _dl_runtime_resolve_avx_slow, .-_dl_runtime_resolve_avx_slow
++# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
++#  error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
+ # endif
+ 
+-/* Use XGETBV with ECX == 1 to check which bits in vector registers are
+-   non-zero and only preserve the non-zero lower bits with zero upper
+-   bits.  */
+-	.globl _dl_runtime_resolve_opt
+-	.hidden _dl_runtime_resolve_opt
+-	.type _dl_runtime_resolve_opt, @function
+-	.align 16
+-_dl_runtime_resolve_opt:
+-	cfi_startproc
+-	cfi_adjust_cfa_offset(16) # Incorporate PLT
+-	pushq %rax
+-	cfi_adjust_cfa_offset(8)
+-	cfi_rel_offset(%rax, 0)
+-	pushq %rcx
+-	cfi_adjust_cfa_offset(8)
+-	cfi_rel_offset(%rcx, 0)
+-	pushq %rdx
+-	cfi_adjust_cfa_offset(8)
+-	cfi_rel_offset(%rdx, 0)
+-	movl $1, %ecx
+-	xgetbv
+-	movl %eax, %r11d
+-	popq %rdx
+-	cfi_adjust_cfa_offset(-8)
+-	cfi_restore (%rdx)
+-	popq %rcx
+-	cfi_adjust_cfa_offset(-8)
+-	cfi_restore (%rcx)
+-	popq %rax
+-	cfi_adjust_cfa_offset(-8)
+-	cfi_restore (%rax)
+-# if VEC_SIZE == 32
+-	# For YMM registers, check if YMM state is in use.
+-	andl $bit_YMM_state, %r11d
+-	# Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits if
+-	# YMM state isn't in use.
+-	PRESERVE_BND_REGS_PREFIX
+-	jz _dl_runtime_resolve_sse_vex
+-# elif VEC_SIZE == 16
+-	# For ZMM registers, check if YMM state and ZMM state are in
+-	# use.
+-	andl $(bit_YMM_state | bit_ZMM0_15_state), %r11d
+-	cmpl $bit_YMM_state, %r11d
+-	# Preserve %zmm0 - %zmm7 registers if ZMM state is in use.
+-	PRESERVE_BND_REGS_PREFIX
+-	jg _dl_runtime_resolve_avx512
+-	# Preserve %ymm0 - %ymm7 registers with the zero upper 256 bits if
+-	# ZMM state isn't in use.
+-	PRESERVE_BND_REGS_PREFIX
+-	je _dl_runtime_resolve_avx
+-	# Preserve %xmm0 - %xmm7 registers with the zero upper 384 bits if
+-	# neither YMM state nor ZMM state are in use.
++# if DL_RUNTIME_RESOLVE_REALIGN_STACK
++/* Local stack area before jumping to function address: RBX.  */
++#  define LOCAL_STORAGE_AREA	8
++#  define BASE			rbx
++#  ifdef USE_FXSAVE
++/* Use fxsave to save XMM registers.  */
++#   define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET)
++#   if (REGISTER_SAVE_AREA % 16) != 0
++#    error REGISTER_SAVE_AREA must be multples of 16
++#   endif
++#  endif
+ # else
+-#  error Unsupported VEC_SIZE!
++#  ifndef USE_FXSAVE
++#   error USE_FXSAVE must be defined
++#  endif
++/* Use fxsave to save XMM registers.  */
++#  define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET + 8)
++/* Local stack area before jumping to function address:  All saved
++   registers.  */
++#  define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
++#  define BASE			rsp
++#  if (REGISTER_SAVE_AREA % 16) != 8
++#   error REGISTER_SAVE_AREA must be odd multples of 8
++#  endif
+ # endif
+-	cfi_adjust_cfa_offset(-16) # Restore PLT adjustment
+-	cfi_endproc
+-	.size _dl_runtime_resolve_opt, .-_dl_runtime_resolve_opt
+-#endif
++
+ 	.globl _dl_runtime_resolve
+ 	.hidden _dl_runtime_resolve
+ 	.type _dl_runtime_resolve, @function
+@@ -157,19 +64,30 @@ _dl_runtime_resolve_opt:
+ 	cfi_startproc
+ _dl_runtime_resolve:
+ 	cfi_adjust_cfa_offset(16) # Incorporate PLT
+-#if DL_RUNIME_RESOLVE_REALIGN_STACK
+-# if LOCAL_STORAGE_AREA != 8
+-#  error LOCAL_STORAGE_AREA must be 8
+-# endif
++# if DL_RUNTIME_RESOLVE_REALIGN_STACK
++#  if LOCAL_STORAGE_AREA != 8
++#   error LOCAL_STORAGE_AREA must be 8
++#  endif
+ 	pushq %rbx			# push subtracts stack by 8.
+ 	cfi_adjust_cfa_offset(8)
+ 	cfi_rel_offset(%rbx, 0)
+ 	mov %RSP_LP, %RBX_LP
+ 	cfi_def_cfa_register(%rbx)
+-	and $-VEC_SIZE, %RSP_LP
+-#endif
++	and $-STATE_SAVE_ALIGNMENT, %RSP_LP
++# endif
++# ifdef REGISTER_SAVE_AREA
+ 	sub $REGISTER_SAVE_AREA, %RSP_LP
++#  if !DL_RUNTIME_RESOLVE_REALIGN_STACK
+ 	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
++#  endif
++# else
++	# Allocate stack space of the required size to save the state.
++#  if IS_IN (rtld)
++	sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
++#  else
++	sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
++#  endif
++# endif
+ 	# Preserve registers otherwise clobbered.
+ 	movq %rax, REGISTER_SAVE_RAX(%rsp)
+ 	movq %rcx, REGISTER_SAVE_RCX(%rsp)
+@@ -178,59 +96,48 @@ _dl_runtime_resolve:
+ 	movq %rdi, REGISTER_SAVE_RDI(%rsp)
+ 	movq %r8, REGISTER_SAVE_R8(%rsp)
+ 	movq %r9, REGISTER_SAVE_R9(%rsp)
+-	VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp)
+-	VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp)
+-	VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp)
+-	VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp)
+-	VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp)
+-	VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp)
+-	VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp)
+-	VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp)
+-#ifndef __ILP32__
+-	# We also have to preserve bound registers.  These are nops if
+-	# Intel MPX isn't available or disabled.
+-# ifdef HAVE_MPX_SUPPORT
+-	bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
+-	bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
+-	bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
+-	bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
++# ifdef USE_FXSAVE
++	fxsave STATE_SAVE_OFFSET(%rsp)
+ # else
+-#  if REGISTER_SAVE_BND0 == 0
+-	.byte 0x66,0x0f,0x1b,0x04,0x24
++	movl $STATE_SAVE_MASK, %eax
++	xorl %edx, %edx
++	# Clear the XSAVE Header.
++#  ifdef USE_XSAVE
++	movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
++	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
++#  endif
++	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
++	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
++	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
++	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
++	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
++	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
++#  ifdef USE_XSAVE
++	xsave STATE_SAVE_OFFSET(%rsp)
+ #  else
+-	.byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
++	# Since glibc 2.23 requires only binutils 2.22 or later, xsavec
++	# may not be supported.  Use .byte directive instead.
++#   if STATE_SAVE_OFFSET != 0x40
++#    error STATE_SAVE_OFFSET != 0x40
++#   endif
++	# xsavec STATE_SAVE_OFFSET(%rsp)
++	.byte 0x0f, 0xc7, 0x64, 0x24, 0x40
+ #  endif
+-	.byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
+-	.byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
+-	.byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
+ # endif
+-#endif
+ 	# Copy args pushed by PLT in register.
+ 	# %rdi: link_map, %rsi: reloc_index
+ 	mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
+ 	mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
+ 	call _dl_fixup		# Call resolver.
+ 	mov %RAX_LP, %R11_LP	# Save return value
+-#ifndef __ILP32__
+-	# Restore bound registers.  These are nops if Intel MPX isn't
+-	# avaiable or disabled.
+-# ifdef HAVE_MPX_SUPPORT
+-	bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
+-	bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
+-	bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
+-	bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
++	# Get register content back.
++# ifdef USE_FXSAVE
++	fxrstor STATE_SAVE_OFFSET(%rsp)
+ # else
+-	.byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
+-	.byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
+-	.byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
+-#  if REGISTER_SAVE_BND0 == 0
+-	.byte 0x66,0x0f,0x1a,0x04,0x24
+-#  else
+-	.byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
+-#  endif
++	movl $STATE_SAVE_MASK, %eax
++	xorl %edx, %edx
++	xrstor STATE_SAVE_OFFSET(%rsp)
+ # endif
+-#endif
+-	# Get register content back.
+ 	movq REGISTER_SAVE_R9(%rsp), %r9
+ 	movq REGISTER_SAVE_R8(%rsp), %r8
+ 	movq REGISTER_SAVE_RDI(%rsp), %rdi
+@@ -238,20 +145,12 @@ _dl_runtime_resolve:
+ 	movq REGISTER_SAVE_RDX(%rsp), %rdx
+ 	movq REGISTER_SAVE_RCX(%rsp), %rcx
+ 	movq REGISTER_SAVE_RAX(%rsp), %rax
+-	VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7)
+-#if DL_RUNIME_RESOLVE_REALIGN_STACK
++# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+ 	mov %RBX_LP, %RSP_LP
+ 	cfi_def_cfa_register(%rsp)
+ 	movq (%rsp), %rbx
+ 	cfi_restore(%rbx)
+-#endif
++# endif
+ 	# Adjust stack(PLT did 2 pushes)
+ 	add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
+ 	cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
+@@ -260,11 +159,9 @@ _dl_runtime_resolve:
+ 	jmp *%r11		# Jump to function address.
+ 	cfi_endproc
+ 	.size _dl_runtime_resolve, .-_dl_runtime_resolve
++#endif
+ 
+ 
+-/* To preserve %xmm0 - %xmm7 registers, dl-trampoline.h is included
+-   twice, for _dl_runtime_resolve_sse and _dl_runtime_resolve_sse_vex.
+-   But we don't need another _dl_runtime_profile for XMM registers.  */
+ #if !defined PROF && defined _dl_runtime_profile
+ # if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
+ #  error LR_VECTOR_OFFSET must be multples of VEC_SIZE
diff --git a/SOURCES/glibc-rh1515114-1.patch b/SOURCES/glibc-rh1515114-1.patch
new file mode 100644
index 0000000..5ea0b91
--- /dev/null
+++ b/SOURCES/glibc-rh1515114-1.patch
@@ -0,0 +1,70 @@
+commit 911569d02dec023d949d96aa7b0e828c91c06f55
+Author: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
+Date:   Mon Dec 28 16:36:46 2015 -0200
+
+    powerpc: Fix dl-procinfo HWCAP
+    
+    HWCAP-related code should had been updated when the 32 bits of HWCAP were
+    used.  This patch updates the code in dl-procinfo.h to loop through all
+    the 32 bits in HWCAP and updates _dl_powerpc_cap_flags accordingly.
+
+diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
+index 6e7850e..0b55906 100644
+--- a/sysdeps/powerpc/dl-procinfo.c
++++ b/sysdeps/powerpc/dl-procinfo.c
+@@ -45,11 +45,12 @@
+ #if !defined PROCINFO_DECL && defined SHARED
+   ._dl_powerpc_cap_flags
+ #else
+-PROCINFO_CLASS const char _dl_powerpc_cap_flags[60][10]
++PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][10]
+ #endif
+ #ifndef PROCINFO_DECL
+ = {
+-    "ppcle", "true_le", "archpmu", "vsx",
++    "ppcle", "true_le", "", "",
++    "", "", "archpmu", "vsx",
+     "arch_2_06", "power6x", "dfp", "pa6t",
+     "arch_2_05", "ic_snoop", "smt", "booke",
+     "cellbe", "power5+", "power5", "power4",
+diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
+index bce3a49..2187c5e 100644
+--- a/sysdeps/powerpc/dl-procinfo.h
++++ b/sysdeps/powerpc/dl-procinfo.h
+@@ -22,9 +22,6 @@
+ #include <ldsodefs.h>
+ #include <sysdep.h>	/* This defines the PPC_FEATURE[2]_* macros.  */
+ 
+-/* There are 28 bits used, but they are bits 4..31.  */
+-#define _DL_HWCAP_FIRST		4
+-
+ /* The total number of available bits (including those prior to
+    _DL_HWCAP_FIRST).  Some of these bits might not be used.  */
+ #define _DL_HWCAP_COUNT		64
+@@ -68,7 +65,7 @@ static inline const char *
+ __attribute__ ((unused))
+ _dl_hwcap_string (int idx)
+ {
+-  return GLRO(dl_powerpc_cap_flags)[idx - _DL_HWCAP_FIRST];
++  return GLRO(dl_powerpc_cap_flags)[idx];
+ }
+ 
+ static inline const char *
+@@ -82,7 +79,7 @@ static inline int
+ __attribute__ ((unused))
+ _dl_string_hwcap (const char *str)
+ {
+-  for (int i = _DL_HWCAP_FIRST; i < _DL_HWCAP_COUNT; ++i)
++  for (int i = 0; i < _DL_HWCAP_COUNT; ++i)
+     if (strcmp (str, _dl_hwcap_string (i)) == 0)
+       return i;
+   return -1;
+@@ -180,7 +177,7 @@ _dl_procinfo (unsigned int type, unsigned long int word)
+     case AT_HWCAP:
+       _dl_printf ("AT_HWCAP:       ");
+ 
+-      for (int i = _DL_HWCAP_FIRST; i <= _DL_HWCAP_LAST; ++i)
++      for (int i = 0; i <= _DL_HWCAP_LAST; ++i)
+        if (word & (1 << i))
+          _dl_printf (" %s", _dl_hwcap_string (i));
+       break;
diff --git a/SOURCES/glibc-rh1515114-2.patch b/SOURCES/glibc-rh1515114-2.patch
new file mode 100644
index 0000000..255d05a
--- /dev/null
+++ b/SOURCES/glibc-rh1515114-2.patch
@@ -0,0 +1,49 @@
+commit 7dcdfbcf6749cdc4c63e2613cbb3e2392d2fc2fb
+Author: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
+Date:   Fri Jun 23 09:10:32 2017 -0300
+
+    powerpc: Update AT_HWCAP[2] bits
+    
+    Linux commit ID a4700a26107241cc7b9ac8528b2c6714ff99983d reserved 2 more
+    bits for the instructions darn (Deliver a Random Number) and scv (System
+    Call Vectored).
+    
+    Linux commit ID 6997e57d693b07289694239e52a10d2f02c3a46f reserved
+    another bit for internal usage.
+    
+            * sysdeps/powerpc/bits/hwcap.h: Add PPC_FEATURE2_DARN and
+            PPC_FEATURE2_SCV.
+            * sysdeps/powerpc/dl-procinfo.c (_dl_powerpc_cap_flags): Add scv
+            and darn.
+
+diff --git a/sysdeps/powerpc/bits/hwcap.h b/sysdeps/powerpc/bits/hwcap.h
+index c9daeed..dfc71c2 100644
+--- a/sysdeps/powerpc/bits/hwcap.h
++++ b/sysdeps/powerpc/bits/hwcap.h
+@@ -50,6 +50,7 @@
+ #define PPC_FEATURE_ARCH_2_06	    0x00000100 /* ISA 2.06 */
+ #define PPC_FEATURE_HAS_VSX	    0x00000080 /* P7 Vector Extension.  */
+ #define PPC_FEATURE_PSERIES_PERFMON_COMPAT  0x00000040
++/* Reserved by the kernel.	    0x00000004  Do not use.  */
+ #define PPC_FEATURE_TRUE_LE	    0x00000002
+ #define PPC_FEATURE_PPC_LE	    0x00000001
+ 
+@@ -69,3 +70,5 @@
+ #define PPC_FEATURE2_ARCH_3_00	   0x00800000 /* ISA 3.0 */
+ #define PPC_FEATURE2_HAS_IEEE128   0x00400000 /* VSX IEEE Binary Float
+ 						 128-bit */
++#define PPC_FEATURE2_DARN	   0x00200000 /* darn instruction.  */
++#define PPC_FEATURE2_SCV	   0x00100000 /* scv syscall.  */
+diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
+index cd7329b..4dac16d 100644
+--- a/sysdeps/powerpc/dl-procinfo.c
++++ b/sysdeps/powerpc/dl-procinfo.c
+@@ -62,7 +62,7 @@ PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][10]
+     "", "", "", "",
+     "", "", "", "",
+     "", "", "", "",
+-    "", "", "ieee128", "arch_3_00",
++    "scv", "darn", "ieee128", "arch_3_00",
+     "htm-nosc", "vcrypto", "tar", "isel",
+     "ebb", "dscr", "htm", "arch_2_07",
+   }
diff --git a/SOURCES/glibc-rh1515114-3.patch b/SOURCES/glibc-rh1515114-3.patch
new file mode 100644
index 0000000..d1408ea
--- /dev/null
+++ b/SOURCES/glibc-rh1515114-3.patch
@@ -0,0 +1,49 @@
+commit df0c40ee3a893238ac11f4c0d876a0c3b49d198d
+Author: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
+Date:   Fri Nov 17 21:15:15 2017 -0200
+
+    powerpc: Update AT_HWCAP2 bits
+    
+    Linux commit ID cba6ac4869e45cc93ac5497024d1d49576e82666 reserved a new
+    bit for a scenario where transactional memory is available, but the
+    suspended state is disabled.
+    
+            * sysdeps/powerpc/bits/hwcap.h (PPC_FEATURE2_HTM_NO_SUSPEND): New
+            macro.
+            * sysdeps/powerpc/dl-procinfo.c (_dl_powerpc_cap_flags): Add
+            htm-no-suspend.
+    
+    Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
+
+diff --git a/sysdeps/powerpc/bits/hwcap.h b/sysdeps/powerpc/bits/hwcap.h
+index dfc71c2..0668ca0 100644
+--- a/sysdeps/powerpc/bits/hwcap.h
++++ b/sysdeps/powerpc/bits/hwcap.h
+@@ -72,3 +72,5 @@
+ 						 128-bit */
+ #define PPC_FEATURE2_DARN	   0x00200000 /* darn instruction.  */
+ #define PPC_FEATURE2_SCV	   0x00100000 /* scv syscall.  */
++#define PPC_FEATURE2_HTM_NO_SUSPEND  0x00080000 /* TM without suspended
++						   state.  */
+diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
+index 4dac16d..55a6e78 100644
+--- a/sysdeps/powerpc/dl-procinfo.c
++++ b/sysdeps/powerpc/dl-procinfo.c
+@@ -45,7 +45,7 @@
+ #if !defined PROCINFO_DECL && defined SHARED
+   ._dl_powerpc_cap_flags
+ #else
+-PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][10]
++PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15]
+ #endif
+ #ifndef PROCINFO_DECL
+ = {
+@@ -61,7 +61,7 @@ PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][10]
+     "", "", "", "",
+     "", "", "", "",
+     "", "", "", "",
+-    "", "", "", "",
++    "", "", "", "htm-no-suspend",
+     "scv", "darn", "ieee128", "arch_3_00",
+     "htm-nosc", "vcrypto", "tar", "isel",
+     "ebb", "dscr", "htm", "arch_2_07",
diff --git a/SOURCES/glibc-rh1516402-1.patch b/SOURCES/glibc-rh1516402-1.patch
new file mode 100644
index 0000000..053ed03
--- /dev/null
+++ b/SOURCES/glibc-rh1516402-1.patch
@@ -0,0 +1,89 @@
+commit 87868c2418fb74357757e3b739ce5b76b17a8929
+Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
+Date:   Wed Jun 25 11:54:31 2014 -0500
+
+    PowerPC: Align power7 memcpy using VSX to quadword
+    
+    This patch changes power7 memcpy to use VSX instructions only when
+    memory is aligned to quardword.  It is to avoid unaligned kernel traps
+    on non-cacheable memory (for instance, memory-mapped I/O).
+
+diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S
+index 52c2a6b..e540fea 100644
+--- a/sysdeps/powerpc/powerpc32/power7/memcpy.S
++++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S
+@@ -38,8 +38,8 @@ EALIGN (memcpy, 5, 0)
+ 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
+ 				    code.  */
+ 
+-	andi.   11,3,7	      /* Check alignment of DST.  */
+-	clrlwi  10,4,29	      /* Check alignment of SRC.  */
++	andi.   11,3,15	      /* Check alignment of DST.  */
++	clrlwi  10,4,28	      /* Check alignment of SRC.  */
+ 	cmplw   cr6,10,11     /* SRC and DST alignments match?  */
+ 	mr	12,4
+ 	mr	31,5
+diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
+index bbfd381..58d9b12 100644
+--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
++++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
+@@ -36,16 +36,11 @@ EALIGN (memcpy, 5, 0)
+ 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
+ 				    code.  */
+ 
+-#ifdef __LITTLE_ENDIAN__
+-/* In little-endian mode, power7 takes an alignment trap on any lxvd2x
+-   or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy
+-   loop is only used for quadword aligned copies.  */
++/* Align copies using VSX instructions to quadword. It is to avoid alignment
++   traps when memcpy is used on non-cacheable memory (for instance, memory
++   mapped I/O).  */
+ 	andi.	10,3,15
+ 	clrldi	11,4,60
+-#else
+-	andi.	10,3,7		/* Check alignment of DST.  */
+-	clrldi	11,4,61		/* Check alignment of SRC.  */
+-#endif
+ 	cmpld	cr6,10,11	/* SRC and DST alignments match?  */
+ 
+ 	mr	dst,3
+@@ -53,13 +48,9 @@ EALIGN (memcpy, 5, 0)
+ 	beq	L(aligned_copy)
+ 
+ 	mtocrf	0x01,0
+-#ifdef __LITTLE_ENDIAN__
+ 	clrldi	0,0,60
+-#else
+-	clrldi	0,0,61
+-#endif
+ 
+-/* Get the DST and SRC aligned to 8 bytes (16 for little-endian).  */
++/* Get the DST and SRC aligned to 16 bytes.  */
+ 1:
+ 	bf	31,2f
+ 	lbz	6,0(src)
+@@ -79,14 +70,12 @@ EALIGN (memcpy, 5, 0)
+ 	stw	6,0(dst)
+ 	addi	dst,dst,4
+ 8:
+-#ifdef __LITTLE_ENDIAN__
+ 	bf	28,16f
+ 	ld	6,0(src)
+ 	addi	src,src,8
+ 	std	6,0(dst)
+ 	addi	dst,dst,8
+ 16:
+-#endif
+ 	subf	cnt,0,cnt
+ 
+ /* Main aligned copy loop. Copies 128 bytes at a time. */
+@@ -298,9 +287,6 @@ L(copy_LE_8):
+ 	.align	4
+ L(copy_GE_32_unaligned):
+ 	clrldi	0,0,60	      /* Number of bytes until the 1st dst quadword.  */
+-#ifndef __LITTLE_ENDIAN__
+-	andi.	10,3,15	      /* Check alignment of DST (against quadwords).  */
+-#endif
+ 	srdi	9,cnt,4	      /* Number of full quadwords remaining.  */
+ 
+ 	beq	L(copy_GE_32_unaligned_cont)
diff --git a/SOURCES/glibc-rh1516402-2.patch b/SOURCES/glibc-rh1516402-2.patch
new file mode 100644
index 0000000..2455f56
--- /dev/null
+++ b/SOURCES/glibc-rh1516402-2.patch
@@ -0,0 +1,123 @@
+The memmove related fix is dropped in this patch because rhel-7.5
+does not have optimized memmove for POWER7.
+
+commit 63da5cd4a097d089033d980c42254c3356fa723f
+Author: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
+Date:   Wed Oct 25 13:13:53 2017 -0200
+
+    powerpc: Replace lxvd2x/stxvd2x with lvx/stvx in P7's memcpy/memmove
+    
+    POWER9 DD2.1 and earlier has an issue where some cache inhibited
+    vector load traps to the kernel, causing a performance degradation.  To
+    handle this in memcpy and memmove, lvx/stvx is used for aligned
+    addresses instead of lxvd2x/stxvd2x.
+    
+    Reference: https://patchwork.ozlabs.org/patch/814059/
+    
+            * sysdeps/powerpc/powerpc64/power7/memcpy.S: Replace
+            lxvd2x/stxvd2x with lvx/stvx.
+            * sysdeps/powerpc/powerpc64/power7/memmove.S: Likewise.
+    
+    Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
+    Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+
+diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
+index 1ccbc2e..a7cdf8b 100644
+--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
++++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
+@@ -91,63 +91,63 @@ L(aligned_copy):
+ 	srdi	12,cnt,7
+ 	cmpdi	12,0
+ 	beq	L(aligned_tail)
+-	lxvd2x	6,0,src
+-	lxvd2x	7,src,6
++	lvx	6,0,src
++	lvx	7,src,6
+ 	mtctr	12
+ 	b	L(aligned_128loop)
+ 
+ 	.align  4
+ L(aligned_128head):
+ 	/* for the 2nd + iteration of this loop. */
+-	lxvd2x	6,0,src
+-	lxvd2x	7,src,6
++	lvx	6,0,src
++	lvx	7,src,6
+ L(aligned_128loop):
+-	lxvd2x	8,src,7
+-	lxvd2x	9,src,8
+-	stxvd2x	6,0,dst
++	lvx	8,src,7
++	lvx	9,src,8
++	stvx	6,0,dst
+ 	addi	src,src,64
+-	stxvd2x	7,dst,6
+-	stxvd2x	8,dst,7
+-	stxvd2x	9,dst,8
+-	lxvd2x	6,0,src
+-	lxvd2x	7,src,6
++	stvx	7,dst,6
++	stvx	8,dst,7
++	stvx	9,dst,8
++	lvx	6,0,src
++	lvx	7,src,6
+ 	addi	dst,dst,64
+-	lxvd2x	8,src,7
+-	lxvd2x	9,src,8
++	lvx	8,src,7
++	lvx	9,src,8
+ 	addi	src,src,64
+-	stxvd2x	6,0,dst
+-	stxvd2x	7,dst,6
+-	stxvd2x	8,dst,7
+-	stxvd2x	9,dst,8
++	stvx	6,0,dst
++	stvx	7,dst,6
++	stvx	8,dst,7
++	stvx	9,dst,8
+ 	addi	dst,dst,64
+ 	bdnz	L(aligned_128head)
+ 
+ L(aligned_tail):
+ 	mtocrf	0x01,cnt
+ 	bf	25,32f
+-	lxvd2x	6,0,src
+-	lxvd2x	7,src,6
+-	lxvd2x	8,src,7
+-	lxvd2x	9,src,8
++	lvx	6,0,src
++	lvx	7,src,6
++	lvx	8,src,7
++	lvx	9,src,8
+ 	addi	src,src,64
+-	stxvd2x	6,0,dst
+-	stxvd2x	7,dst,6
+-	stxvd2x	8,dst,7
+-	stxvd2x	9,dst,8
++	stvx	6,0,dst
++	stvx	7,dst,6
++	stvx	8,dst,7
++	stvx	9,dst,8
+ 	addi	dst,dst,64
+ 32:
+ 	bf	26,16f
+-	lxvd2x	6,0,src
+-	lxvd2x	7,src,6
++	lvx	6,0,src
++	lvx	7,src,6
+ 	addi	src,src,32
+-	stxvd2x	6,0,dst
+-	stxvd2x	7,dst,6
++	stvx	6,0,dst
++	stvx	7,dst,6
+ 	addi	dst,dst,32
+ 16:
+ 	bf	27,8f
+-	lxvd2x	6,0,src
++	lvx	6,0,src
+ 	addi	src,src,16
+-	stxvd2x	6,0,dst
++	stvx	6,0,dst
+ 	addi	dst,dst,16
+ 8:
+ 	bf	28,4f
diff --git a/SPECS/glibc.spec b/SPECS/glibc.spec
index 1d01f86..0ff2079 100644
--- a/SPECS/glibc.spec
+++ b/SPECS/glibc.spec
@@ -1,6 +1,6 @@
 %define glibcsrcdir glibc-2.17-c758a686
 %define glibcversion 2.17
-%define glibcrelease 196%{?dist}
+%define glibcrelease 196%{?dist}.2
 ##############################################################################
 # We support the following options:
 # --with/--without,
@@ -1099,6 +1099,17 @@ Patch1858: glibc-rh1457177-2.patch
 Patch1859: glibc-rh1457177-3.patch
 Patch1860: glibc-rh1457177-4.patch
 
+Patch1861: glibc-rh1504969.patch
+
+# RHBZ #1515114: Pegas1.0 - Update HWCAP bits for POWER9 DD2.1
+Patch1862: glibc-rh1515114-1.patch
+Patch1863: glibc-rh1515114-2.patch
+Patch1864: glibc-rh1515114-3.patch
+
+# RHBZ #1516402: Pegas1.0 - Workaround performance regressions on VSX loads on POWER9 DD2.1
+Patch1865: glibc-rh1516402-1.patch
+Patch1866: glibc-rh1516402-2.patch
+
 ##############################################################################
 #
 # Patches submitted, but not yet approved upstream.
@@ -2132,6 +2143,12 @@ cp %{_sourcedir}/syscall-names.list sysdeps/unix/sysv/linux/
 %patch1858 -p1
 %patch1859 -p1
 %patch1860 -p1
+%patch1861 -p1
+%patch1862 -p1
+%patch1863 -p1
+%patch1864 -p1
+%patch1865 -p1
+%patch1866 -p1
 
 ##############################################################################
 # %%prep - Additional prep required...
@@ -3282,6 +3299,13 @@ rm -f *.filelist*
 %endif
 
 %changelog
+* Wed Nov 22 2017 Carlos O'Donell <carlos@redhat.com> - 2.17-196.2
+- Update HWCAP bits for IBM POWER9 DD2.1 (#1515114)
+- Improve memcpy performance for POWER9 DD2.1 (#1516402)
+
+* Tue Nov 14 2017 Carlos O'Donell <carlos@redhat.com> - 2.17-196.1
+- x86-64: Use XSAVE/XSAVEC in the ld.so trampoline (#1513070)
+
 * Fri Jun 16 2017 Florian Weimer <fweimer@redhat.com> - 2.17-196
 - Avoid large allocas in the dynamic linker (#1452721)