diff --git a/SOURCES/glibc-rh1845098-1.patch b/SOURCES/glibc-rh1845098-1.patch new file mode 100644 index 0000000..463697d --- /dev/null +++ b/SOURCES/glibc-rh1845098-1.patch @@ -0,0 +1,36 @@ +commit ae725e3f9cb4e1eb825ebe1d55241c98c2ea32f1 +Author: Tulio Magno Quites Machado Filho +Date: Mon Jun 15 11:15:57 2020 -0300 + + powerpc: Add new hwcap values + + Linux commit ID ee988c11acf6f9464b7b44e9a091bf6afb3b3a49 reserved 2 new + bits in AT_HWCAP2: + - PPC_FEATURE2_ARCH_3_1 indicates the availability of the POWER ISA + 3.1; + - PPC_FEATURE2_MMA indicates the availability of the Matrix-Multiply + Assist facility. + +diff --git a/sysdeps/powerpc/bits/hwcap.h b/sysdeps/powerpc/bits/hwcap.h +index b35f5eddc1d309bb..f2853a11df16f63c 100644 +--- a/sysdeps/powerpc/bits/hwcap.h ++++ b/sysdeps/powerpc/bits/hwcap.h +@@ -74,3 +74,5 @@ + #define PPC_FEATURE2_SCV 0x00100000 /* scv syscall. */ + #define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 /* TM without suspended + state. */ ++#define PPC_FEATURE2_ARCH_3_1 0x00040000 /* ISA 3.1. */ ++#define PPC_FEATURE2_MMA 0x00020000 /* Matrix-Multiply Assist. */ +diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c +index 35cac2e249916507..4555d4548554e788 100644 +--- a/sysdeps/powerpc/dl-procinfo.c ++++ b/sysdeps/powerpc/dl-procinfo.c +@@ -77,7 +77,7 @@ PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15] + "", "", "", "", + "", "", "", "", + "", "", "", "", +- "", "", "", "htm-no-suspend", ++ "", "mma", "arch_3_1", "htm-no-suspend", + "scv", "darn", "ieee128", "arch_3_00", + "htm-nosc", "vcrypto", "tar", "isel", + "ebb", "dscr", "htm", "arch_2_07", diff --git a/SOURCES/glibc-rh1845098-2.patch b/SOURCES/glibc-rh1845098-2.patch new file mode 100644 index 0000000..d947c3b --- /dev/null +++ b/SOURCES/glibc-rh1845098-2.patch @@ -0,0 +1,52 @@ +This patch is based on the following commit, with the new Implies +files for POWER10, and the preconfigure changes removed. + +commit d2ba3677da7a785556fcd708404d8e049b1c063b +Author: Tulio Magno Quites Machado Filho +Date: Wed Jun 24 18:04:41 2020 -0300 + + powerpc: Add support for POWER10 + + 1. Add the directories to hold POWER10 files. + + 2. Add support to select POWER10 libraries based on AT_PLATFORM. + + 3. Let submachine=power10 be set automatically. + + +diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h +index 3803379ab2303658..3558d6a83ca2a988 100644 +--- a/sysdeps/powerpc/dl-procinfo.h ++++ b/sysdeps/powerpc/dl-procinfo.h +@@ -37,7 +37,7 @@ + #define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \ + + PPC_FEATURE_HAS_DFP) + +-#define _DL_PLATFORMS_COUNT 15 ++#define _DL_PLATFORMS_COUNT 16 + + #define _DL_FIRST_PLATFORM 32 + /* Mask to filter out platforms. */ +@@ -60,6 +60,7 @@ + #define PPC_PLATFORM_PPC476 12 + #define PPC_PLATFORM_POWER8 13 + #define PPC_PLATFORM_POWER9 14 ++#define PPC_PLATFORM_POWER10 15 + + static inline const char * + __attribute__ ((unused)) +@@ -91,6 +92,14 @@ _dl_string_platform (const char *str) + str += 5; + switch (*str) + { ++ case '1': ++ if (str[1] == '0') ++ { ++ ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER10; ++ } ++ else ++ return -1; ++ break; + case '4': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER4; + break; diff --git a/SOURCES/glibc-rh1845098-3.patch b/SOURCES/glibc-rh1845098-3.patch new file mode 100644 index 0000000..feefa6f --- /dev/null +++ b/SOURCES/glibc-rh1845098-3.patch @@ -0,0 +1,26 @@ +commit f6add169c89bbdd139a2eb845686127ead5799cd +Author: Tulio Magno Quites Machado Filho +Date: Tue Jul 21 18:01:39 2020 -0300 + + powerpc: Fix POWER10 selection + + Add a line that was missing from a previous commit. + Without increasing str, the null-byte is not validated, and + _dl_string_platform returns -1. + + Fixes: d2ba3677da7a ("powerpc: Add support for POWER10") + + Reviewed-by: Carlos O'Donell + +diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h +index 3558d6a83ca2a988..3593e9661b06dcff 100644 +--- a/sysdeps/powerpc/dl-procinfo.h ++++ b/sysdeps/powerpc/dl-procinfo.h +@@ -96,6 +96,7 @@ _dl_string_platform (const char *str) + if (str[1] == '0') + { + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER10; ++ str++; + } + else + return -1; diff --git a/SOURCES/glibc-rh1871387-1.patch b/SOURCES/glibc-rh1871387-1.patch new file mode 100644 index 0000000..f37c484 --- /dev/null +++ b/SOURCES/glibc-rh1871387-1.patch @@ -0,0 +1,254 @@ +commit 7793ad7a2c00434398aa8bb3f5932e2fdf43536a +Author: Rajalakshmi Srinivasaraghavan +Date: Thu Aug 16 12:12:02 2018 +0530 + + powerpc: Rearrange little endian specific files + + This patch moves little endian specific POWER9 optimization files to + sysdeps/powerpc/powerpc64/le and creates POWER9 ifunc functions + only for little endian. + +diff --git a/sysdeps/powerpc/powerpc64/power9/strcmp.S b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S +similarity index 93% +rename from sysdeps/powerpc/powerpc64/power9/strcmp.S +rename to sysdeps/powerpc/powerpc64/le/power9/strcmp.S +index 98243a9d51e1577f..bf057f598ef2aa55 100644 +--- a/sysdeps/powerpc/powerpc64/power9/strcmp.S ++++ b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S +@@ -15,7 +15,6 @@ + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ +-#ifdef __LITTLE_ENDIAN__ + #include + + #ifndef STRCMP +@@ -30,16 +29,16 @@ + as in POWER8 patch and uses vectorised loops after that. */ + + /* TODO: Change this to actual instructions when minimum binutils is upgraded +- to 2.27. Macros are defined below for these newer instructions in order ++ to 2.27. Macros are defined below for these newer instructions in order + to maintain compatibility. */ +-# define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) ++#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) + +-# define VEXTUBRX(t,a,b) .long (0x1000070d \ ++#define VEXTUBRX(t,a,b) .long (0x1000070d \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +-# define VCMPNEZB(t,a,b) .long (0x10000507 \ ++#define VCMPNEZB(t,a,b) .long (0x10000507 \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) +@@ -48,7 +47,7 @@ + reg1: Vector to hold next 16 bytes. + reg2: Address to read from. + reg3: Permute control vector. */ +-# define GET16BYTES(reg1, reg2, reg3) \ ++#define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vperm v8, v2, reg1, reg3; \ + vcmpequb. v8, v0, v8; \ +@@ -263,6 +262,3 @@ L(pagecross_nullfound): + b L(pagecross_retdiff) + END (STRCMP) + libc_hidden_builtin_def (strcmp) +-#else +-#include +-#endif +diff --git a/sysdeps/powerpc/powerpc64/power9/strncmp.S b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S +similarity index 95% +rename from sysdeps/powerpc/powerpc64/power9/strncmp.S +rename to sysdeps/powerpc/powerpc64/le/power9/strncmp.S +index 40be98ff45c9f485..93a79343c6be1099 100644 +--- a/sysdeps/powerpc/powerpc64/power9/strncmp.S ++++ b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S +@@ -15,7 +15,6 @@ + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ +-#ifdef __LITTLE_ENDIAN__ + #include + + /* Implements the function +@@ -31,16 +30,16 @@ + #endif + + /* TODO: Change this to actual instructions when minimum binutils is upgraded +- to 2.27. Macros are defined below for these newer instructions in order ++ to 2.27. Macros are defined below for these newer instructions in order + to maintain compatibility. */ +-# define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) ++#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) + +-# define VEXTUBRX(t,a,b) .long (0x1000070d \ ++#define VEXTUBRX(t,a,b) .long (0x1000070d \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +-# define VCMPNEZB(t,a,b) .long (0x10000507 \ ++#define VCMPNEZB(t,a,b) .long (0x10000507 \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) +@@ -49,7 +48,7 @@ + reg1: Vector to hold next 16 bytes. + reg2: Address to read from. + reg3: Permute control vector. */ +-# define GET16BYTES(reg1, reg2, reg3) \ ++#define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vperm v8, v2, reg1, reg3; \ + vcmpequb. v8, v0, v8; \ +@@ -374,6 +373,3 @@ L(byte_ne_3): + b L(byte_ne_1) + END(STRNCMP) + libc_hidden_builtin_def(strncmp) +-#else +-#include +-#endif +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 4df6b45c4c1c495a..963ea84dbfa98c74 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -12,7 +12,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ + strnlen-power8 strnlen-power7 strnlen-ppc64 \ + strcasecmp-power7 strcasecmp_l-power7 \ + strncase-power7 strncase_l-power7 \ +- strncmp-power9 strncmp-power8 strncmp-power7 \ ++ strncmp-power8 strncmp-power7 \ + strncmp-power4 strncmp-ppc64 \ + strchr-power8 strchr-power7 strchr-ppc64 \ + strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \ +@@ -22,7 +22,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ + strncat-power8 strncat-power7 strncat-ppc64 \ + strncpy-power7 strncpy-ppc64 \ + stpncpy-power8 stpncpy-power7 stpncpy-ppc64 \ +- strcmp-power9 strcmp-power8 strcmp-power7 strcmp-ppc64 \ ++ strcmp-power8 strcmp-power7 strcmp-ppc64 \ + strcat-power8 strcat-power7 strcat-ppc64 \ + memmove-power7 memmove-ppc64 wordcopy-ppc64 bcopy-ppc64 \ + strncpy-power8 strstr-power7 strstr-ppc64 \ +@@ -31,6 +31,9 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ + strcasecmp-ppc64 strcasecmp-power8 strncase-ppc64 \ + strncase-power8 + ++ifneq (,$(filter %le,$(config-machine))) ++sysdep_routines += strcmp-power9 strncmp-power9 ++endif + CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops + CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops + endif +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index 38a21e478e2527f5..1d374f2ae48165bd 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -112,8 +112,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c. */ + IFUNC_IMPL (i, name, strncmp, ++#ifdef __LITTLE_ENDIAN__ + IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00, + __strncmp_power9) ++#endif + IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncmp_power8) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_HAS_VSX, +@@ -337,9 +339,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcmp.c. */ + IFUNC_IMPL (i, name, strcmp, ++#ifdef __LITTLE_ENDIAN__ + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap2 & PPC_FEATURE2_ARCH_3_00, + __strcmp_power9) ++#endif + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcmp_power8) +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S +index 8b569d38be783316..545e6cee91e61311 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S +@@ -16,11 +16,11 @@ + License along with the GNU C Library; if not, see + . */ + +-#if IS_IN (libc) ++#if defined __LITTLE_ENDIAN__ && IS_IN (libc) + #define STRCMP __strcmp_power9 + + #undef libc_hidden_builtin_def + #define libc_hidden_builtin_def(name) + +-#include ++#include + #endif +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c +index b669053166771cae..2422c8d72cfdec83 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c +@@ -27,13 +27,17 @@ + extern __typeof (strcmp) __strcmp_ppc attribute_hidden; + extern __typeof (strcmp) __strcmp_power7 attribute_hidden; + extern __typeof (strcmp) __strcmp_power8 attribute_hidden; ++# ifdef __LITTLE_ENDIAN__ + extern __typeof (strcmp) __strcmp_power9 attribute_hidden; ++# endif + + # undef strcmp + + libc_ifunc_redirected (__redirect_strcmp, strcmp, ++# ifdef __LITTLE_ENDIAN__ + (hwcap2 & PPC_FEATURE2_ARCH_3_00) + ? __strcmp_power9 : ++# endif + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcmp_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S +index 3356f7252771a043..c6f0128379c497b4 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S ++++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S +@@ -15,11 +15,11 @@ + License along with the GNU C Library; if not, see + . */ + +-#if IS_IN (libc) ++#if defined __LITTLE_ENDIAN__ && IS_IN (libc) + #define STRNCMP __strncmp_power9 + + #undef libc_hidden_builtin_def + #define libc_hidden_builtin_def(name) + +-#include ++#include + #endif +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c +index c4a40d1ec7245a3b..9c887ee18186f070 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c +@@ -29,14 +29,18 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden; + extern __typeof (strncmp) __strncmp_power4 attribute_hidden; + extern __typeof (strncmp) __strncmp_power7 attribute_hidden; + extern __typeof (strncmp) __strncmp_power8 attribute_hidden; ++# ifdef __LITTLE_ENDIAN__ + extern __typeof (strncmp) __strncmp_power9 attribute_hidden; ++# endif + # undef strncmp + + /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ + libc_ifunc_redirected (__redirect_strncmp, strncmp, ++# ifdef __LITTLE_ENDIAN_ + (hwcap2 & PPC_FEATURE2_ARCH_3_00) + ? __strncmp_power9 : ++# endif + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncmp_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) diff --git a/SOURCES/glibc-rh1871387-2.patch b/SOURCES/glibc-rh1871387-2.patch new file mode 100644 index 0000000..7f0cc39 --- /dev/null +++ b/SOURCES/glibc-rh1871387-2.patch @@ -0,0 +1,245 @@ +commit 39037048502d52ab6422c18f2d178d6228d2c7b9 +Author: Anton Blanchard via Libc-alpha +Date: Thu May 14 09:00:26 2020 +1000 + + powerpc: Optimized strcpy for POWER9 + + This version uses VSX store vector with length instructions and is + significantly faster on small strings and relatively unaligned large + strings, compared to the POWER8 version. A few examples: + + __strcpy_power9 __strcpy_power8 + Length 16, alignments in bytes 0/ 0: 2.52454 4.62695 + Length 412, alignments in bytes 4/ 0: 11.6 22.9185 + +diff --git a/sysdeps/powerpc/powerpc64/le/power9/strcpy.S b/sysdeps/powerpc/powerpc64/le/power9/strcpy.S +new file mode 100644 +index 0000000000000000..5749228054667b2d +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/le/power9/strcpy.S +@@ -0,0 +1,144 @@ ++/* Optimized strcpy implementation for PowerPC64/POWER9. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#ifndef STRCPY ++# define STRCPY strcpy ++#endif ++ ++/* Implements the function ++ ++ char * [r3] strcpy (char *dest [r3], const char *src [r4]) ++ ++ The implementation can load bytes past a null terminator, but only ++ up to the next 16B boundary, so it never crosses a page. */ ++ ++.machine power9 ++ENTRY_TOCLESS (STRCPY, 4) ++ CALL_MCOUNT 2 ++ ++ /* NULL string optimisation */ ++ lbz r0,0(r4) ++ stb r0,0(r3) ++ cmpwi r0,0 ++ beqlr ++ ++ addi r4,r4,1 ++ addi r11,r3,1 ++ ++ vspltisb v18,0 /* Zeroes in v18 */ ++ ++ neg r5,r4 ++ rldicl r9,r5,0,60 /* How many bytes to get source 16B aligned? */ ++ ++ /* Get source 16B aligned */ ++ lvx v0,0,r4 ++ lvsr v1,0,r4 ++ vperm v0,v18,v0,v1 ++ ++ vcmpequb v6,v0,v18 /* 0xff if byte is NULL, 0x00 otherwise */ ++ vctzlsbb r8,v6 /* Number of trailing zeroes */ ++ addi r8,r8,1 /* Add null terminator */ ++ ++ /* r8 = bytes including null ++ r9 = bytes to get source 16B aligned ++ if r8 > r9 ++ no null, copy r9 bytes ++ else ++ there is a null, copy r8 bytes and return. */ ++ cmpd r8,r9 ++ bgt L(no_null) ++ ++ sldi r10,r8,56 /* stxvl wants size in top 8 bits */ ++ stxvl 32+v0,r11,r10 /* Partial store */ ++ ++ blr ++ ++L(no_null): ++ sldi r10,r9,56 /* stxvl wants size in top 8 bits */ ++ stxvl 32+v0,r11,r10 /* Partial store */ ++ ++ add r4,r4,r9 ++ add r11,r11,r9 ++ ++L(loop): ++ lxv 32+v0,0(r4) ++ vcmpequb. v6,v0,v18 /* Any zero bytes? */ ++ bne cr6,L(tail1) ++ ++ lxv 32+v1,16(r4) ++ vcmpequb. v6,v1,v18 /* Any zero bytes? */ ++ bne cr6,L(tail2) ++ ++ lxv 32+v2,32(r4) ++ vcmpequb. v6,v2,v18 /* Any zero bytes? */ ++ bne cr6,L(tail3) ++ ++ lxv 32+v3,48(r4) ++ vcmpequb. v6,v3,v18 /* Any zero bytes? */ ++ bne cr6,L(tail4) ++ ++ stxv 32+v0,0(r11) ++ stxv 32+v1,16(r11) ++ stxv 32+v2,32(r11) ++ stxv 32+v3,48(r11) ++ ++ addi r4,r4,64 ++ addi r11,r11,64 ++ ++ b L(loop) ++ ++L(tail1): ++ vctzlsbb r8,v6 ++ addi r8,r8,1 ++ sldi r9,r8,56 /* stxvl wants size in top 8 bits */ ++ stxvl 32+v0,r11,r9 ++ blr ++ ++L(tail2): ++ stxv 32+v0,0(r11) ++ vctzlsbb r8,v6 /* Number of trailing zeroes */ ++ addi r8,r8,1 /* Add null terminator */ ++ sldi r10,r8,56 /* stxvl wants size in top 8 bits */ ++ addi r11,r11,16 ++ stxvl 32+v1,r11,r10 /* Partial store */ ++ blr ++ ++L(tail3): ++ stxv 32+v0,0(r11) ++ stxv 32+v1,16(r11) ++ vctzlsbb r8,v6 /* Number of trailing zeroes */ ++ addi r8,r8,1 /* Add null terminator */ ++ sldi r10,r8,56 /* stxvl wants size in top 8 bits */ ++ addi r11,r11,32 ++ stxvl 32+v2,r11,r10 /* Partial store */ ++ blr ++ ++L(tail4): ++ stxv 32+v0,0(r11) ++ stxv 32+v1,16(r11) ++ stxv 32+v2,32(r11) ++ vctzlsbb r8,v6 /* Number of trailing zeroes */ ++ addi r8,r8,1 /* Add null terminator */ ++ sldi r10,r8,56 /* stxvl wants size in top 8 bits */ ++ addi r11,r11,48 ++ stxvl 32+v3,r11,r10 /* Partial store */ ++ blr ++END (STRCPY) ++libc_hidden_builtin_def (strcpy) +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 963ea84dbfa98c74..17057bcbd694a710 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -32,7 +32,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ + strncase-power8 + + ifneq (,$(filter %le,$(config-machine))) +-sysdep_routines += strcmp-power9 strncmp-power9 ++sysdep_routines += strcmp-power9 strncmp-power9 strcpy-power9 + endif + CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops + CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index 1d374f2ae48165bd..2857fa8f36599afd 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -85,6 +85,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcpy.c. */ + IFUNC_IMPL (i, name, strcpy, ++#ifdef __LITTLE_ENDIAN__ ++ IFUNC_IMPL_ADD (array, i, strcpy, hwcap2 & PPC_FEATURE2_ARCH_3_00, ++ __strcpy_power9) ++#endif + IFUNC_IMPL_ADD (array, i, strcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcpy_power8) + IFUNC_IMPL_ADD (array, i, strcpy, hwcap & PPC_FEATURE_HAS_VSX, +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcpy-power9.S b/sysdeps/powerpc/powerpc64/multiarch/strcpy-power9.S +new file mode 100644 +index 0000000000000000..d22aa0a8d690cad7 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcpy-power9.S +@@ -0,0 +1,26 @@ ++/* Optimized strcpy implementation for POWER9/PPC64. ++ Copyright (C) 2016-2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#if defined __LITTLE_ENDIAN__ && IS_IN (libc) ++#define STRCPY __strcpy_power9 ++ ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++#include ++#endif +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcpy.c b/sysdeps/powerpc/powerpc64/multiarch/strcpy.c +index b18a92a62a526d9c..88826392be4bdf48 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strcpy.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcpy.c +@@ -25,9 +25,16 @@ + extern __typeof (strcpy) __strcpy_ppc attribute_hidden; + extern __typeof (strcpy) __strcpy_power7 attribute_hidden; + extern __typeof (strcpy) __strcpy_power8 attribute_hidden; ++# ifdef __LITTLE_ENDIAN__ ++extern __typeof (strcpy) __strcpy_power9 attribute_hidden; ++# endif + #undef strcpy + + libc_ifunc_redirected (__redirect_strcpy, strcpy, ++# ifdef __LITTLE_ENDIAN__ ++ (hwcap2 & PPC_FEATURE2_ARCH_3_00) ++ ? __strcpy_power9 : ++# endif + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) diff --git a/SOURCES/glibc-rh1871387-3.patch b/SOURCES/glibc-rh1871387-3.patch new file mode 100644 index 0000000..5ccacc9 --- /dev/null +++ b/SOURCES/glibc-rh1871387-3.patch @@ -0,0 +1,270 @@ +commit aa70d0563256b8ea053203177f756bca33b5cf37 +Author: Anton Blanchard via Libc-alpha +Date: Thu May 14 09:08:35 2020 +1000 + + powerpc: Optimized stpcpy for POWER9 + + Add stpcpy support to the POWER9 strcpy. This is up to 40% faster on + small strings and up to 90% faster on long relatively unaligned strings, + compared to the POWER8 version. A few examples: + + __stpcpy_power9 __stpcpy_power8 + Length 20, alignments in bytes 4/ 4: 2.58246 4.8788 + Length 1024, alignments in bytes 1/ 6: 24.8186 47.8528 + +diff --git a/sysdeps/powerpc/powerpc64/le/power9/stpcpy.S b/sysdeps/powerpc/powerpc64/le/power9/stpcpy.S +new file mode 100644 +index 0000000000000000..44425cb1e80ea198 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/le/power9/stpcpy.S +@@ -0,0 +1,24 @@ ++/* Optimized stpcpy implementation for PowerPC64/POWER9. ++ Copyright (C) 2015-2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define USE_AS_STPCPY ++#include ++ ++weak_alias (__stpcpy, stpcpy) ++libc_hidden_def (__stpcpy) ++libc_hidden_builtin_def (stpcpy) +diff --git a/sysdeps/powerpc/powerpc64/le/power9/strcpy.S b/sysdeps/powerpc/powerpc64/le/power9/strcpy.S +index 5749228054667b2d..ce8f50329177fd06 100644 +--- a/sysdeps/powerpc/powerpc64/le/power9/strcpy.S ++++ b/sysdeps/powerpc/powerpc64/le/power9/strcpy.S +@@ -18,19 +18,35 @@ + + #include + +-#ifndef STRCPY +-# define STRCPY strcpy +-#endif ++#ifdef USE_AS_STPCPY ++# ifndef STPCPY ++# define FUNC_NAME __stpcpy ++# else ++# define FUNC_NAME STPCPY ++# endif ++#else ++# ifndef STRCPY ++# define FUNC_NAME strcpy ++# else ++# define FUNC_NAME STRCPY ++# endif ++#endif /* !USE_AS_STPCPY */ + + /* Implements the function + + char * [r3] strcpy (char *dest [r3], const char *src [r4]) + ++ or ++ ++ char * [r3] stpcpy (char *dest [r3], const char *src [r4]) ++ ++ if USE_AS_STPCPY is defined. ++ + The implementation can load bytes past a null terminator, but only + up to the next 16B boundary, so it never crosses a page. */ + + .machine power9 +-ENTRY_TOCLESS (STRCPY, 4) ++ENTRY_TOCLESS (FUNC_NAME, 4) + CALL_MCOUNT 2 + + /* NULL string optimisation */ +@@ -53,8 +69,8 @@ ENTRY_TOCLESS (STRCPY, 4) + vperm v0,v18,v0,v1 + + vcmpequb v6,v0,v18 /* 0xff if byte is NULL, 0x00 otherwise */ +- vctzlsbb r8,v6 /* Number of trailing zeroes */ +- addi r8,r8,1 /* Add null terminator */ ++ vctzlsbb r7,v6 /* Number of trailing zeroes */ ++ addi r8,r7,1 /* Add null terminator */ + + /* r8 = bytes including null + r9 = bytes to get source 16B aligned +@@ -68,6 +84,11 @@ ENTRY_TOCLESS (STRCPY, 4) + sldi r10,r8,56 /* stxvl wants size in top 8 bits */ + stxvl 32+v0,r11,r10 /* Partial store */ + ++#ifdef USE_AS_STPCPY ++ /* stpcpy returns the dest address plus the size not counting the ++ final '\0'. */ ++ add r3,r11,r7 ++#endif + blr + + L(no_null): +@@ -106,28 +127,43 @@ L(loop): + + L(tail1): + vctzlsbb r8,v6 +- addi r8,r8,1 +- sldi r9,r8,56 /* stxvl wants size in top 8 bits */ ++ addi r9,r8,1 ++ sldi r9,r9,56 /* stxvl wants size in top 8 bits */ + stxvl 32+v0,r11,r9 ++#ifdef USE_AS_STPCPY ++ /* stpcpy returns the dest address plus the size not counting the ++ final '\0'. */ ++ add r3,r11,r8 ++#endif + blr + + L(tail2): + stxv 32+v0,0(r11) + vctzlsbb r8,v6 /* Number of trailing zeroes */ +- addi r8,r8,1 /* Add null terminator */ +- sldi r10,r8,56 /* stxvl wants size in top 8 bits */ ++ addi r9,r8,1 /* Add null terminator */ ++ sldi r10,r9,56 /* stxvl wants size in top 8 bits */ + addi r11,r11,16 + stxvl 32+v1,r11,r10 /* Partial store */ ++#ifdef USE_AS_STPCPY ++ /* stpcpy returns the dest address plus the size not counting the ++ final '\0'. */ ++ add r3,r11,r8 ++#endif + blr + + L(tail3): + stxv 32+v0,0(r11) + stxv 32+v1,16(r11) + vctzlsbb r8,v6 /* Number of trailing zeroes */ +- addi r8,r8,1 /* Add null terminator */ +- sldi r10,r8,56 /* stxvl wants size in top 8 bits */ ++ addi r9,r8,1 /* Add null terminator */ ++ sldi r10,r9,56 /* stxvl wants size in top 8 bits */ + addi r11,r11,32 + stxvl 32+v2,r11,r10 /* Partial store */ ++#ifdef USE_AS_STPCPY ++ /* stpcpy returns the dest address plus the size not counting the ++ final '\0'. */ ++ add r3,r11,r8 ++#endif + blr + + L(tail4): +@@ -135,10 +171,17 @@ L(tail4): + stxv 32+v1,16(r11) + stxv 32+v2,32(r11) + vctzlsbb r8,v6 /* Number of trailing zeroes */ +- addi r8,r8,1 /* Add null terminator */ +- sldi r10,r8,56 /* stxvl wants size in top 8 bits */ ++ addi r9,r8,1 /* Add null terminator */ ++ sldi r10,r9,56 /* stxvl wants size in top 8 bits */ + addi r11,r11,48 + stxvl 32+v3,r11,r10 /* Partial store */ ++#ifdef USE_AS_STPCPY ++ /* stpcpy returns the dest address plus the size not counting the ++ final '\0'. */ ++ add r3,r11,r8 ++#endif + blr +-END (STRCPY) ++END (FUNC_NAME) ++#ifndef USE_AS_STPCPY + libc_hidden_builtin_def (strcpy) ++#endif +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 17057bcbd694a710..cada6b19bf3c8fab 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -32,7 +32,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ + strncase-power8 + + ifneq (,$(filter %le,$(config-machine))) +-sysdep_routines += strcmp-power9 strncmp-power9 strcpy-power9 ++sysdep_routines += strcmp-power9 strncmp-power9 strcpy-power9 stpcpy-power9 + endif + CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops + CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index 2857fa8f36599afd..b0abc6b61dc15f19 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -98,6 +98,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/powerpc/powerpc64/multiarch/stpcpy.c. */ + IFUNC_IMPL (i, name, stpcpy, ++#ifdef __LITTLE_ENDIAN__ ++ IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00, ++ __stpcpy_power9) ++#endif + IFUNC_IMPL_ADD (array, i, stpcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __stpcpy_power8) + IFUNC_IMPL_ADD (array, i, stpcpy, hwcap & PPC_FEATURE_HAS_VSX, +diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power9.S b/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power9.S +new file mode 100644 +index 0000000000000000..a728d49fd2575e00 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power9.S +@@ -0,0 +1,24 @@ ++/* Optimized stpcpy implementation for POWER9/PPC64. ++ Copyright (C) 2015-2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define STPCPY __stpcpy_power9 ++ ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c b/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c +index 34c889644133d757..8ce58572e0f27c7f 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c +@@ -26,13 +26,20 @@ + extern __typeof (__stpcpy) __stpcpy_ppc attribute_hidden; + extern __typeof (__stpcpy) __stpcpy_power7 attribute_hidden; + extern __typeof (__stpcpy) __stpcpy_power8 attribute_hidden; ++# ifdef __LITTLE_ENDIAN__ ++extern __typeof (__stpcpy) __stpcpy_power9 attribute_hidden; ++# endif + + libc_ifunc_hidden (__stpcpy, __stpcpy, +- (hwcap2 & PPC_FEATURE2_ARCH_2_07) +- ? __stpcpy_power8 +- : (hwcap & PPC_FEATURE_HAS_VSX) +- ? __stpcpy_power7 +- : __stpcpy_ppc); ++# ifdef __LITTLE_ENDIAN__ ++ (hwcap2 & PPC_FEATURE2_ARCH_3_00) ++ ? __stpcpy_power9 : ++# endif ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __stpcpy_power8 ++ : (hwcap & PPC_FEATURE_HAS_VSX) ++ ? __stpcpy_power7 ++ : __stpcpy_ppc); + + weak_alias (__stpcpy, stpcpy) + libc_hidden_def (__stpcpy) diff --git a/SOURCES/glibc-rh1871387-4.patch b/SOURCES/glibc-rh1871387-4.patch new file mode 100644 index 0000000..1161362 --- /dev/null +++ b/SOURCES/glibc-rh1871387-4.patch @@ -0,0 +1,213 @@ +commit 765de945efc5d5602999b2999fe8abdf04881370 +Author: Anton Blanchard +Date: Thu May 14 21:49:16 2020 +1000 + + powerpc: Optimized rawmemchr for POWER9 + + This version uses vector instructions and is up to 60% faster on medium + matches and up to 90% faster on long matches, compared to the POWER7 + version. A few examples: + + __rawmemchr_power9 __rawmemchr_power7 + Length 32, alignment 0: 2.27566 3.77765 + Length 64, alignment 2: 2.46231 3.51064 + Length 1024, alignment 0: 17.3059 32.6678 + +diff --git a/sysdeps/powerpc/powerpc64/le/power9/rawmemchr.S b/sysdeps/powerpc/powerpc64/le/power9/rawmemchr.S +new file mode 100644 +index 0000000000000000..9d0276c9315af5c8 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/le/power9/rawmemchr.S +@@ -0,0 +1,107 @@ ++/* Optimized rawmemchr implementation for PowerPC64/POWER9. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#ifndef RAWMEMCHR ++# define RAWMEMCHR __rawmemchr ++#endif ++ ++/* Implements the function ++ ++ int [r3] rawmemchr (void *s [r3], int c [r4]) ++ ++ The implementation can load bytes past a matching byte, but only ++ up to the next 16B boundary, so it never crosses a page. */ ++ ++.machine power9 ++ENTRY_TOCLESS (RAWMEMCHR, 4) ++ CALL_MCOUNT 2 ++ ++ xori r5,r4,0xff ++ ++ mtvsrd v18+32,r4 /* matching char in v18 */ ++ mtvsrd v19+32,r5 /* non matching char in v19 */ ++ ++ vspltb v18,v18,7 /* replicate */ ++ vspltb v19,v19,7 /* replicate */ ++ ++ neg r5,r3 ++ rldicl r9,r5,0,60 /* How many bytes to get source 16B aligned? */ ++ ++ /* Align data and fill bytes not loaded with non matching char */ ++ lvx v0,0,r3 ++ lvsr v1,0,r3 ++ vperm v0,v19,v0,v1 ++ ++ vcmpequb. v6,v0,v18 /* 0xff if byte matches, 0x00 otherwise */ ++ beq cr6,L(aligned) ++ ++ vctzlsbb r0,v6 ++ add r3,r3,r0 ++ blr ++ ++L(aligned): ++ add r3,r3,r9 ++ ++L(loop): ++ lxv v0+32,0(r3) ++ vcmpequb. v6,v0,v18 /* 0xff if byte matches, 0x00 otherwise */ ++ bne cr6,L(tail1) ++ ++ lxv v0+32,16(r3) ++ vcmpequb. v6,v0,v18 /* 0xff if byte matches, 0x00 otherwise */ ++ bne cr6,L(tail2) ++ ++ lxv v0+32,32(r3) ++ vcmpequb. v6,v0,v18 /* 0xff if byte matches, 0x00 otherwise */ ++ bne cr6,L(tail3) ++ ++ lxv v0+32,48(r3) ++ vcmpequb. v6,v0,v18 /* 0xff if byte matches, 0x00 otherwise */ ++ bne cr6,L(tail4) ++ ++ addi r3,r3,64 ++ b L(loop) ++ ++L(tail1): ++ vctzlsbb r0,v6 ++ add r3,r3,r0 ++ blr ++ ++L(tail2): ++ vctzlsbb r0,v6 ++ add r3,r3,r0 ++ addi r3,r3,16 ++ blr ++ ++L(tail3): ++ vctzlsbb r0,v6 ++ add r3,r3,r0 ++ addi r3,r3,32 ++ blr ++ ++L(tail4): ++ vctzlsbb r0,v6 ++ add r3,r3,r0 ++ addi r3,r3,48 ++ blr ++ ++END (RAWMEMCHR) ++weak_alias (__rawmemchr,rawmemchr) ++libc_hidden_builtin_def (__rawmemchr) +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index cada6b19bf3c8fab..1a8ef5fb73c3b0db 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -32,7 +32,8 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ + strncase-power8 + + ifneq (,$(filter %le,$(config-machine))) +-sysdep_routines += strcmp-power9 strncmp-power9 strcpy-power9 stpcpy-power9 ++sysdep_routines += strcmp-power9 strncmp-power9 strcpy-power9 stpcpy-power9 \ ++ rawmemchr-power9 + endif + CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops + CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index b0abc6b61dc15f19..297935863e44c0e1 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -216,6 +216,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c. */ + IFUNC_IMPL (i, name, rawmemchr, ++#ifdef __LITTLE_ENDIAN__ ++ IFUNC_IMPL_ADD (array, i, rawmemchr, ++ hwcap2 & PPC_FEATURE2_ARCH_3_00, ++ __rawmemchr_power9) ++#endif + IFUNC_IMPL_ADD (array, i, rawmemchr, + hwcap & PPC_FEATURE_HAS_VSX, + __rawmemchr_power7) +diff --git a/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power9.S b/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power9.S +new file mode 100644 +index 0000000000000000..bac0a9090e7a07f8 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power9.S +@@ -0,0 +1,21 @@ ++/* Optimized rawmemchr implementation for PowerPC64/POWER9. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define RAWMEMCHR __rawmemchr_power9 ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c b/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c +index 02bac49b53d52411..2a7ae5a1ed02e556 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c +@@ -24,13 +24,21 @@ + + extern __typeof (__rawmemchr) __rawmemchr_ppc attribute_hidden; + extern __typeof (__rawmemchr) __rawmemchr_power7 attribute_hidden; ++# ifdef __LITTLE_ENDIAN__ ++extern __typeof (__rawmemchr) __rawmemchr_power9 attribute_hidden; ++# endif ++ + # undef __rawmemchr + + /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ + libc_ifunc_redirected (__redirect___rawmemchr, __rawmemchr, +- (hwcap & PPC_FEATURE_HAS_VSX) +- ? __rawmemchr_power7 ++# ifdef __LITTLE_ENDIAN__ ++ (hwcap2 & PPC_FEATURE2_ARCH_3_00) ++ ? __rawmemchr_power9 : ++# endif ++ (hwcap & PPC_FEATURE_HAS_VSX) ++ ? __rawmemchr_power7 + : __rawmemchr_ppc); + + weak_alias (__rawmemchr, rawmemchr) diff --git a/SOURCES/glibc-rh1871387-5.patch b/SOURCES/glibc-rh1871387-5.patch new file mode 100644 index 0000000..8190aed --- /dev/null +++ b/SOURCES/glibc-rh1871387-5.patch @@ -0,0 +1,300 @@ +commit a23bd00f9d810c28d9e83ce1d7cf53968375937d +Author: Paul E. Murphy +Date: Mon May 18 11:16:06 2020 -0500 + + powerpc64le: add optimized strlen for P9 + + This started as a trivial change to Anton's rawmemchr. I got + carried away. This is a hybrid between P8's asympotically + faster 64B checks with extremely efficient small string checks + e.g <64B (and sometimes a little bit more depending on alignment). + + The second trick is to align to 64B by running a 48B checking loop + 16B at a time until we naturally align to 64B (i.e checking 48/96/144 + bytes/iteration based on the alignment after the first 5 comparisons). + This allieviates the need to check page boundaries. + + Finally, explicly use the P7 strlen with the runtime loader when building + P9. We need to be cautious about vector/vsx extensions here on P9 only + builds. + +diff --git a/sysdeps/powerpc/powerpc64/le/power9/rtld-strlen.S b/sysdeps/powerpc/powerpc64/le/power9/rtld-strlen.S +new file mode 100644 +index 0000000000000000..e9d83323acacfbca +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/le/power9/rtld-strlen.S +@@ -0,0 +1 @@ ++#include +diff --git a/sysdeps/powerpc/powerpc64/le/power9/strlen.S b/sysdeps/powerpc/powerpc64/le/power9/strlen.S +new file mode 100644 +index 0000000000000000..66a9b79647eebbd8 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/le/power9/strlen.S +@@ -0,0 +1,213 @@ ++/* Optimized strlen implementation for PowerPC64/POWER9. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#ifndef STRLEN ++# define STRLEN __strlen ++# define DEFINE_STRLEN_HIDDEN_DEF 1 ++#endif ++ ++/* Implements the function ++ ++ int [r3] strlen (const void *s [r3]) ++ ++ The implementation can load bytes past a matching byte, but only ++ up to the next 64B boundary, so it never crosses a page. */ ++ ++.machine power9 ++ENTRY_TOCLESS (STRLEN, 4) ++ CALL_MCOUNT 2 ++ ++ vspltisb v18,0 ++ vspltisb v19,-1 ++ ++ neg r5,r3 ++ rldicl r9,r5,0,60 /* How many bytes to get source 16B aligned? */ ++ ++ /* Align data and fill bytes not loaded with non matching char. */ ++ lvx v0,0,r3 ++ lvsr v1,0,r3 ++ vperm v0,v19,v0,v1 ++ ++ vcmpequb. v6,v0,v18 ++ beq cr6,L(aligned) ++ ++ vctzlsbb r3,v6 ++ blr ++ ++ /* Test 64B 16B at a time. The 64B vector loop is optimized for ++ longer strings. Likewise, we check a multiple of 64B to avoid ++ breaking the alignment calculation below. */ ++L(aligned): ++ add r4,r3,r9 ++ rldicl. r5,r4,60,62 /* Determine the number of 48B loops needed for ++ alignment to 64B. And test for zero. */ ++ ++ lxv v0+32,0(r4) ++ vcmpequb. v6,v0,v18 ++ bne cr6,L(tail1) ++ ++ lxv v0+32,16(r4) ++ vcmpequb. v6,v0,v18 ++ bne cr6,L(tail2) ++ ++ lxv v0+32,32(r4) ++ vcmpequb. v6,v0,v18 ++ bne cr6,L(tail3) ++ ++ lxv v0+32,48(r4) ++ vcmpequb. v6,v0,v18 ++ bne cr6,L(tail4) ++ addi r4,r4,64 ++ ++ /* Speculatively generate a fake 16B aligned address to generate the ++ vector byte constant 0,1,..,15 using lvsl during reduction. */ ++ li r0,0 ++ ++ /* Skip the alignment if already 64B aligned. */ ++ beq L(loop_64b) ++ mtctr r5 ++ ++ /* Test 48B per iteration until 64B aligned. */ ++ .p2align 5 ++L(loop): ++ lxv v0+32,0(r4) ++ vcmpequb. v6,v0,v18 ++ bne cr6,L(tail1) ++ ++ lxv v0+32,16(r4) ++ vcmpequb. v6,v0,v18 ++ bne cr6,L(tail2) ++ ++ lxv v0+32,32(r4) ++ vcmpequb. v6,v0,v18 ++ bne cr6,L(tail3) ++ ++ addi r4,r4,48 ++ bdnz L(loop) ++ ++ .p2align 5 ++L(loop_64b): ++ lxv v1+32,0(r4) /* Load 4 quadwords. */ ++ lxv v2+32,16(r4) ++ lxv v3+32,32(r4) ++ lxv v4+32,48(r4) ++ vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ ++ vminub v6,v3,v4 ++ vminub v7,v5,v6 ++ vcmpequb. v7,v7,v18 /* Check for NULLs. */ ++ addi r4,r4,64 /* Adjust address for the next iteration. */ ++ bne cr6,L(vmx_zero) ++ ++ lxv v1+32,0(r4) /* Load 4 quadwords. */ ++ lxv v2+32,16(r4) ++ lxv v3+32,32(r4) ++ lxv v4+32,48(r4) ++ vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ ++ vminub v6,v3,v4 ++ vminub v7,v5,v6 ++ vcmpequb. v7,v7,v18 /* Check for NULLs. */ ++ addi r4,r4,64 /* Adjust address for the next iteration. */ ++ bne cr6,L(vmx_zero) ++ ++ lxv v1+32,0(r4) /* Load 4 quadwords. */ ++ lxv v2+32,16(r4) ++ lxv v3+32,32(r4) ++ lxv v4+32,48(r4) ++ vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ ++ vminub v6,v3,v4 ++ vminub v7,v5,v6 ++ vcmpequb. v7,v7,v18 /* Check for NULLs. */ ++ addi r4,r4,64 /* Adjust address for the next iteration. */ ++ beq cr6,L(loop_64b) ++ ++L(vmx_zero): ++ /* OK, we found a null byte. Let's look for it in the current 64-byte ++ block and mark it in its corresponding VR. */ ++ vcmpequb v1,v1,v18 ++ vcmpequb v2,v2,v18 ++ vcmpequb v3,v3,v18 ++ vcmpequb v4,v4,v18 ++ ++ /* We will now 'compress' the result into a single doubleword, so it ++ can be moved to a GPR for the final calculation. First, we ++ generate an appropriate mask for vbpermq, so we can permute bits into ++ the first halfword. */ ++ vspltisb v10,3 ++ lvsl v11,0,r0 ++ vslb v10,v11,v10 ++ ++ /* Permute the first bit of each byte into bits 48-63. */ ++ vbpermq v1,v1,v10 ++ vbpermq v2,v2,v10 ++ vbpermq v3,v3,v10 ++ vbpermq v4,v4,v10 ++ ++ /* Shift each component into its correct position for merging. */ ++ vsldoi v2,v2,v2,2 ++ vsldoi v3,v3,v3,4 ++ vsldoi v4,v4,v4,6 ++ ++ /* Merge the results and move to a GPR. */ ++ vor v1,v2,v1 ++ vor v2,v3,v4 ++ vor v4,v1,v2 ++ mfvrd r10,v4 ++ ++ /* Adjust address to the begninning of the current 64-byte block. */ ++ addi r4,r4,-64 ++ ++ cnttzd r0,r10 /* Count trailing zeros before the match. */ ++ subf r5,r3,r4 ++ add r3,r5,r0 /* Compute final length. */ ++ blr ++ ++L(tail1): ++ vctzlsbb r0,v6 ++ add r4,r4,r0 ++ subf r3,r3,r4 ++ blr ++ ++L(tail2): ++ vctzlsbb r0,v6 ++ add r4,r4,r0 ++ addi r4,r4,16 ++ subf r3,r3,r4 ++ blr ++ ++L(tail3): ++ vctzlsbb r0,v6 ++ add r4,r4,r0 ++ addi r4,r4,32 ++ subf r3,r3,r4 ++ blr ++ ++L(tail4): ++ vctzlsbb r0,v6 ++ add r4,r4,r0 ++ addi r4,r4,48 ++ subf r3,r3,r4 ++ blr ++ ++END (STRLEN) ++ ++#ifdef DEFINE_STRLEN_HIDDEN_DEF ++weak_alias (__strlen, strlen) ++libc_hidden_builtin_def (strlen) ++#endif +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 1a8ef5fb73c3b0db..6d5661d08257b7a0 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -33,7 +33,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ + + ifneq (,$(filter %le,$(config-machine))) + sysdep_routines += strcmp-power9 strncmp-power9 strcpy-power9 stpcpy-power9 \ +- rawmemchr-power9 ++ rawmemchr-power9 strlen-power9 + endif + CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops + CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index 297935863e44c0e1..daa30d3907395680 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -111,6 +111,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/powerpc/powerpc64/multiarch/strlen.c. */ + IFUNC_IMPL (i, name, strlen, ++#ifdef __LITTLE_ENDIAN__ ++ IFUNC_IMPL_ADD (array, i, strcpy, hwcap2 & PPC_FEATURE2_ARCH_3_00, ++ __strlen_power9) ++#endif + IFUNC_IMPL_ADD (array, i, strlen, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strlen_power8) + IFUNC_IMPL_ADD (array, i, strlen, hwcap & PPC_FEATURE_HAS_VSX, +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strlen-power9.S b/sysdeps/powerpc/powerpc64/multiarch/strlen-power9.S +new file mode 100644 +index 0000000000000000..68c8d54b5f5876a2 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strlen-power9.S +@@ -0,0 +1,2 @@ ++#define STRLEN __strlen_power9 ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strlen.c b/sysdeps/powerpc/powerpc64/multiarch/strlen.c +index 74810dab9929d505..b7f0fbb13fb97783 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strlen.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/strlen.c +@@ -30,8 +30,13 @@ extern __typeof (__redirect_strlen) __libc_strlen; + extern __typeof (__redirect_strlen) __strlen_ppc attribute_hidden; + extern __typeof (__redirect_strlen) __strlen_power7 attribute_hidden; + extern __typeof (__redirect_strlen) __strlen_power8 attribute_hidden; ++extern __typeof (__redirect_strlen) __strlen_power9 attribute_hidden; + + libc_ifunc (__libc_strlen, ++# ifdef __LITTLE_ENDIAN__ ++ (hwcap2 & PPC_FEATURE2_ARCH_3_00) ++ ? __strlen_power9 : ++# endif + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strlen_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) diff --git a/SOURCES/glibc-rh1871387-6.patch b/SOURCES/glibc-rh1871387-6.patch new file mode 100644 index 0000000..e05901b --- /dev/null +++ b/SOURCES/glibc-rh1871387-6.patch @@ -0,0 +1,31 @@ +commit 07f3ecdba69c5190180112c25757040c69041bb9 +Author: Raphael Moreira Zinsly +Date: Thu Sep 17 11:16:36 2020 -0300 + + powerpc: fix ifunc implementation list for POWER9 strlen and stpcpy + + __strlen_power9 and __stpcpy_power9 were added to their ifunc lists + using the wrong function names. + +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index daa30d3907395680..e622ab4d47548146 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -99,7 +99,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/powerpc/powerpc64/multiarch/stpcpy.c. */ + IFUNC_IMPL (i, name, stpcpy, + #ifdef __LITTLE_ENDIAN__ +- IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00, ++ IFUNC_IMPL_ADD (array, i, stpcpy, hwcap2 & PPC_FEATURE2_ARCH_3_00, + __stpcpy_power9) + #endif + IFUNC_IMPL_ADD (array, i, stpcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07, +@@ -112,7 +112,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/powerpc/powerpc64/multiarch/strlen.c. */ + IFUNC_IMPL (i, name, strlen, + #ifdef __LITTLE_ENDIAN__ +- IFUNC_IMPL_ADD (array, i, strcpy, hwcap2 & PPC_FEATURE2_ARCH_3_00, ++ IFUNC_IMPL_ADD (array, i, strlen, hwcap2 & PPC_FEATURE2_ARCH_3_00, + __strlen_power9) + #endif + IFUNC_IMPL_ADD (array, i, strlen, hwcap2 & PPC_FEATURE2_ARCH_2_07, diff --git a/SPECS/glibc.spec b/SPECS/glibc.spec index 40339bb..a341012 100644 --- a/SPECS/glibc.spec +++ b/SPECS/glibc.spec @@ -1,6 +1,6 @@ %define glibcsrcdir glibc-2.28 %define glibcversion 2.28 -%define glibcrelease 127%{?dist} +%define glibcrelease 129%{?dist} # Pre-release tarballs are pulled in from git using a command that is # effectively: # @@ -486,6 +486,15 @@ Patch352: glibc-rh1642150-4.patch Patch353: glibc-rh1836867.patch Patch354: glibc-rh1821531-1.patch Patch355: glibc-rh1821531-2.patch +Patch356: glibc-rh1845098-1.patch +Patch357: glibc-rh1845098-2.patch +Patch358: glibc-rh1845098-3.patch +Patch359: glibc-rh1871387-1.patch +Patch360: glibc-rh1871387-2.patch +Patch361: glibc-rh1871387-3.patch +Patch362: glibc-rh1871387-4.patch +Patch363: glibc-rh1871387-5.patch +Patch364: glibc-rh1871387-6.patch ############################################################################## # Continued list of core "glibc" package information: @@ -2384,6 +2393,12 @@ fi %files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared %changelog +* Fri Sep 18 2020 Arjun Shankar - 2.28-129 +- Improve IBM POWER9 architecture performance (#1871387) + +* Thu Sep 17 2020 Arjun Shankar - 2.28-128 +- Enable glibc for POWER10 (#1845098) + * Tue Jun 09 2020 Carlos O'Donell - 2.28-127 - Improve performance of library strstr() function (#1821531)