diff --git a/SOURCES/glibc-rh1929928-1.patch b/SOURCES/glibc-rh1929928-1.patch new file mode 100644 index 0000000..5e04ea2 --- /dev/null +++ b/SOURCES/glibc-rh1929928-1.patch @@ -0,0 +1,38 @@ +This patch is a downstream-only variant of this upstream commit: + +commit 45b1e17e9150dbd9ac2d578579063fbfa8e1b327 +Author: Szabolcs Nagy +Date: Thu Dec 17 10:03:05 2020 +0000 + + aarch64: use PTR_ARG and SIZE_ARG instead of DELOUSE + + DELOUSE was added to asm code to make them compatible with non-LP64 + ABIs, but it is an unfortunate name and the code was not compatible + with ABIs where pointer and size_t are different. Glibc currently + only supports the LP64 ABI so these macros are not really needed or + tested, but for now the name is changed to be more meaningful instead + of removing them completely. + + Some DELOUSE macros were dropped: clone, strlen and strnlen used it + unnecessarily. + + The out of tree ILP32 patches are currently not maintained and will + likely need a rework to rebase them on top of the time64 changes. + +Keeping the DELOUSE macro avoids the need to update all string +functions. Lack of BTI markers and architecture variants cause many +conflicts in a full upstream backport. + +diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h +index 5b30709436d3acea..1bcf15d4f18586ba 100644 +--- a/sysdeps/aarch64/sysdep.h ++++ b/sysdeps/aarch64/sysdep.h +@@ -32,6 +32,8 @@ + # define PTR_LOG_SIZE 2 + # define DELOUSE(n) mov w##n, w##n + #endif ++#define PTR_ARG(n) DELOUSE(n) ++#define SIZE_ARG(n) DELOUSE(n) + + #define PTR_SIZE (1< +Date: Wed May 12 09:26:40 2021 +0000 + + config: Added HAVE_AARCH64_SVE_ASM for aarch64 + + This patch checks if assembler supports '-march=armv8.2-a+sve' to + generate SVE code or not, and then define HAVE_AARCH64_SVE_ASM macro. + +Conflicts: + config.h.in + (missing PAC+BTI support downstream, missing other ports) + +diff --git a/config.h.in b/config.h.in +index 8520b0fa8d4668fb..94d5ea367e10f849 100644 +--- a/config.h.in ++++ b/config.h.in +@@ -112,6 +112,11 @@ + /* AArch64 big endian ABI */ + #undef HAVE_AARCH64_BE + ++/* Assembler support ARMv8.2-A SVE. ++ This macro becomes obsolete when glibc increased the minimum ++ required version of GNU 'binutils' to 2.28 or later. */ ++#define HAVE_AARCH64_SVE_ASM 0 ++ + /* RISC-V integer ABI for ld.so. */ + #undef RISCV_ABI_XLEN + +diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure +index f78a79338aba1e34..9fb713155d4ee6d8 100644 +--- a/sysdeps/aarch64/configure ++++ b/sysdeps/aarch64/configure +@@ -212,3 +212,31 @@ fi + $as_echo "$libc_cv_aarch64_variant_pcs" >&6; } + config_vars="$config_vars + aarch64-variant-pcs = $libc_cv_aarch64_variant_pcs" ++ ++# Check if asm support armv8.2-a+sve ++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SVE support in assembler" >&5 ++$as_echo_n "checking for SVE support in assembler... " >&6; } ++if ${libc_cv_asm_sve+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ cat > conftest.s <<\EOF ++ ptrue p0.b ++EOF ++if { ac_try='${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; }; then ++ libc_cv_aarch64_sve_asm=yes ++else ++ libc_cv_aarch64_sve_asm=no ++fi ++rm -f conftest* ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_sve" >&5 ++$as_echo "$libc_cv_asm_sve" >&6; } ++if test $libc_cv_aarch64_sve_asm = yes; then ++ $as_echo "#define HAVE_AARCH64_SVE_ASM 1" >>confdefs.h ++ ++fi +diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac +index 7f13bfb93b60bfd7..0236cfcdf3c8d10d 100644 +--- a/sysdeps/aarch64/configure.ac ++++ b/sysdeps/aarch64/configure.ac +@@ -42,3 +42,18 @@ EOF + fi + rm -rf conftest.*]) + LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs]) ++ ++# Check if asm support armv8.2-a+sve ++AC_CACHE_CHECK(for SVE support in assembler, libc_cv_asm_sve, [dnl ++cat > conftest.s <<\EOF ++ ptrue p0.b ++EOF ++if AC_TRY_COMMAND(${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&AS_MESSAGE_LOG_FD); then ++ libc_cv_aarch64_sve_asm=yes ++else ++ libc_cv_aarch64_sve_asm=no ++fi ++rm -f conftest*]) ++if test $libc_cv_aarch64_sve_asm = yes; then ++ AC_DEFINE(HAVE_AARCH64_SVE_ASM) ++fi diff --git a/SOURCES/glibc-rh1929928-3.patch b/SOURCES/glibc-rh1929928-3.patch new file mode 100644 index 0000000..76ff1a2 --- /dev/null +++ b/SOURCES/glibc-rh1929928-3.patch @@ -0,0 +1,140 @@ +commit 38560563587ad8eafa700c56800ff844f18fbad1 +Author: Naohiro Tamura +Date: Thu May 20 07:34:37 2021 +0000 + + aarch64: Added Vector Length Set test helper script + + This patch is a test helper script to change Vector Length for child + process. This script can be used as test-wrapper for 'make check'. + + Usage examples: + + ~/build$ make check subdirs=string \ + test-wrapper='~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 16' + + ~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 16 \ + make test t=string/test-memcpy + + ~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 32 \ + ./debugglibc.sh string/test-memmove + + ~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 64 \ + ./testrun.sh string/test-memset + +diff --git a/INSTALL b/INSTALL +index 065565093bd76d5b..b3a4370f592c5047 100644 +--- a/INSTALL ++++ b/INSTALL +@@ -387,6 +387,10 @@ the same syntax as 'test-wrapper-env', the only difference in its + semantics being starting with an empty set of environment variables + rather than the ambient set. + ++ For AArch64 with SVE, when testing the GNU C Library, 'test-wrapper' ++may be set to "SRCDIR/sysdeps/unix/sysv/linux/aarch64/vltest.py ++VECTOR-LENGTH" to change Vector Length. ++ + Installing the C Library + ======================== + +diff --git a/manual/install.texi b/manual/install.texi +index 7e9f2be150e6f98a..c262fd56d0cef67b 100644 +--- a/manual/install.texi ++++ b/manual/install.texi +@@ -425,6 +425,9 @@ use has the same syntax as @samp{test-wrapper-env}, the only + difference in its semantics being starting with an empty set of + environment variables rather than the ambient set. + ++For AArch64 with SVE, when testing @theglibc{}, @samp{test-wrapper} ++may be set to "@var{srcdir}/sysdeps/unix/sysv/linux/aarch64/vltest.py ++@var{vector-length}" to change Vector Length. + + @node Running make install + @appendixsec Installing the C Library +diff --git a/sysdeps/unix/sysv/linux/aarch64/vltest.py b/sysdeps/unix/sysv/linux/aarch64/vltest.py +new file mode 100755 +index 0000000000000000..bed62ad151e06868 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/aarch64/vltest.py +@@ -0,0 +1,82 @@ ++#!/usr/bin/python3 ++# Set Scalable Vector Length test helper ++# Copyright (C) 2021 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++"""Set Scalable Vector Length test helper. ++ ++Set Scalable Vector Length for child process. ++ ++examples: ++ ++~/build$ make check subdirs=string \ ++test-wrapper='~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 16' ++ ++~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 16 \ ++make test t=string/test-memcpy ++ ++~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 32 \ ++./debugglibc.sh string/test-memmove ++ ++~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 64 \ ++./testrun.sh string/test-memset ++""" ++import argparse ++from ctypes import cdll, CDLL ++import os ++import sys ++ ++EXIT_SUCCESS = 0 ++EXIT_FAILURE = 1 ++EXIT_UNSUPPORTED = 77 ++ ++AT_HWCAP = 16 ++HWCAP_SVE = (1 << 22) ++ ++PR_SVE_GET_VL = 51 ++PR_SVE_SET_VL = 50 ++PR_SVE_SET_VL_ONEXEC = (1 << 18) ++PR_SVE_VL_INHERIT = (1 << 17) ++PR_SVE_VL_LEN_MASK = 0xffff ++ ++def main(args): ++ libc = CDLL("libc.so.6") ++ if not libc.getauxval(AT_HWCAP) & HWCAP_SVE: ++ print("CPU doesn't support SVE") ++ sys.exit(EXIT_UNSUPPORTED) ++ ++ libc.prctl(PR_SVE_SET_VL, ++ args.vl[0] | PR_SVE_SET_VL_ONEXEC | PR_SVE_VL_INHERIT) ++ os.execvp(args.args[0], args.args) ++ print("exec system call failure") ++ sys.exit(EXIT_FAILURE) ++ ++if __name__ == '__main__': ++ parser = argparse.ArgumentParser(description= ++ "Set Scalable Vector Length test helper", ++ formatter_class=argparse.ArgumentDefaultsHelpFormatter) ++ ++ # positional argument ++ parser.add_argument("vl", nargs=1, type=int, ++ choices=range(16, 257, 16), ++ help=('vector length '\ ++ 'which is multiples of 16 from 16 to 256')) ++ # remainDer arguments ++ parser.add_argument('args', nargs=argparse.REMAINDER, ++ help=('args '\ ++ 'which is passed to child process')) ++ args = parser.parse_args() ++ main(args) diff --git a/SOURCES/glibc-rh1929928-4.patch b/SOURCES/glibc-rh1929928-4.patch new file mode 100644 index 0000000..f2ec8e3 --- /dev/null +++ b/SOURCES/glibc-rh1929928-4.patch @@ -0,0 +1,623 @@ +commit fa527f345cbbe852ec085932fbea979956c195b5 +Author: Naohiro Tamura +Date: Thu May 27 07:42:35 2021 +0000 + + aarch64: Added optimized memcpy and memmove for A64FX + + This patch optimizes the performance of memcpy/memmove for A64FX [1] + which implements ARMv8-A SVE and has L1 64KB cache per core and L2 8MB + cache per NUMA node. + + The performance optimization makes use of Scalable Vector Register + with several techniques such as loop unrolling, memory access + alignment, cache zero fill, and software pipelining. + + SVE assembler code for memcpy/memmove is implemented as Vector Length + Agnostic code so theoretically it can be run on any SOC which supports + ARMv8-A SVE standard. + + We confirmed that all testcases have been passed by running 'make + check' and 'make xcheck' not only on A64FX but also on ThunderX2. + + And also we confirmed that the SVE 512 bit vector register performance + is roughly 4 times better than Advanced SIMD 128 bit register and 8 + times better than scalar 64 bit register by running 'make bench'. + + [1] https://github.com/fujitsu/A64FX + + Reviewed-by: Wilco Dijkstra + Reviewed-by: Szabolcs Nagy + +Conflicts: + manual/tunables.texi + sysdeps/aarch64/multiarch/Makefile + sysdeps/aarch64/multiarch/ifunc-impl-list.c + sysdeps/aarch64/multiarch/init-arch.h + sysdeps/aarch64/multiarch/memcpy.c + sysdeps/aarch64/multiarch/memmove.c + sysdeps/unix/sysv/linux/aarch64/cpu-features.c + sysdeps/unix/sysv/linux/aarch64/cpu-features.h + (all conflicts due to missing optimizations for other CPUs) + +diff --git a/manual/tunables.texi b/manual/tunables.texi +index bd737b5d57080462..07887981748bc44b 100644 +--- a/manual/tunables.texi ++++ b/manual/tunables.texi +@@ -386,7 +386,7 @@ This tunable is specific to powerpc, powerpc64 and powerpc64le. + The @code{glibc.cpu.name=xxx} tunable allows the user to tell @theglibc{} to + assume that the CPU is @code{xxx} where xxx may have one of these values: + @code{generic}, @code{falkor}, @code{thunderxt88}, @code{thunderx2t99}, +-@code{thunderx2t99p1}. ++@code{thunderx2t99p1}, @code{a64fx}. + + This tunable is specific to aarch64. + @end deftp +diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile +index 57ffdf72382c0a44..5a19ba0308e80983 100644 +--- a/sysdeps/aarch64/multiarch/Makefile ++++ b/sysdeps/aarch64/multiarch/Makefile +@@ -1,4 +1,5 @@ + ifeq ($(subdir),string) + sysdep_routines += memcpy_generic memcpy_thunderx memcpy_thunderx2 \ +- memcpy_falkor memmove_falkor memset_generic memset_falkor ++ memcpy_falkor memcpy_a64fx \ ++ memmove_falkor memset_generic memset_falkor + endif +diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +index e55be80103b948a2..f53db12acce37877 100644 +--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +@@ -25,7 +25,7 @@ + #include + + /* Maximum number of IFUNC implementations. */ +-#define MAX_IFUNC 4 ++#define MAX_IFUNC 7 + + size_t + __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, +@@ -42,10 +42,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx2) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor) ++#if HAVE_AARCH64_SVE_ASM ++ IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_a64fx) ++#endif + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic)) + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_falkor) ++#if HAVE_AARCH64_SVE_ASM ++ IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_a64fx) ++#endif + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) + IFUNC_IMPL (i, name, memset, + /* Enable this on non-falkor processors too so that other cores +diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h +index d1e5703cb25fdcff..65dc8f82ff23c754 100644 +--- a/sysdeps/aarch64/multiarch/init-arch.h ++++ b/sysdeps/aarch64/multiarch/init-arch.h +@@ -22,4 +22,6 @@ + uint64_t __attribute__((unused)) midr = \ + GLRO(dl_aarch64_cpu_features).midr_el1; \ + unsigned __attribute__((unused)) zva_size = \ +- GLRO(dl_aarch64_cpu_features).zva_size; ++ GLRO(dl_aarch64_cpu_features).zva_size; \ ++ bool __attribute__((unused)) sve = \ ++ GLRO(dl_aarch64_cpu_features).sve; +diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c +index 4a04a63b0fe0c84b..e0313c42e82a7b86 100644 +--- a/sysdeps/aarch64/multiarch/memcpy.c ++++ b/sysdeps/aarch64/multiarch/memcpy.c +@@ -32,6 +32,9 @@ extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden; ++# if HAVE_AARCH64_SVE_ASM ++extern __typeof (__redirect_memcpy) __memcpy_a64fx attribute_hidden; ++# endif + + libc_ifunc (__libc_memcpy, + (IS_THUNDERX (midr) +@@ -40,8 +43,13 @@ libc_ifunc (__libc_memcpy, + ? __memcpy_falkor + : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) + ? __memcpy_thunderx2 ++# if HAVE_AARCH64_SVE_ASM ++ : (IS_A64FX (midr) ++ ? __memcpy_a64fx ++ : __memcpy_generic))))); ++# else + : __memcpy_generic)))); +- ++# endif + # undef memcpy + strong_alias (__libc_memcpy, memcpy); + #endif +diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S +new file mode 100644 +index 0000000000000000..65528405bb123737 +--- /dev/null ++++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S +@@ -0,0 +1,406 @@ ++/* Optimized memcpy for Fujitsu A64FX processor. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++ ++/* Assumptions: ++ * ++ * ARMv8.2-a, AArch64, unaligned accesses, sve ++ * ++ */ ++ ++#define L2_SIZE (8*1024*1024)/2 // L2 8MB/2 ++#define CACHE_LINE_SIZE 256 ++#define ZF_DIST (CACHE_LINE_SIZE * 21) // Zerofill distance ++#define dest x0 ++#define src x1 ++#define n x2 // size ++#define tmp1 x3 ++#define tmp2 x4 ++#define tmp3 x5 ++#define rest x6 ++#define dest_ptr x7 ++#define src_ptr x8 ++#define vector_length x9 ++#define cl_remainder x10 // CACHE_LINE_SIZE remainder ++ ++#if HAVE_AARCH64_SVE_ASM ++# if IS_IN (libc) ++# define MEMCPY __memcpy_a64fx ++# define MEMMOVE __memmove_a64fx ++ ++ .arch armv8.2-a+sve ++ ++ .macro dc_zva times ++ dc zva, tmp1 ++ add tmp1, tmp1, CACHE_LINE_SIZE ++ .if \times-1 ++ dc_zva "(\times-1)" ++ .endif ++ .endm ++ ++ .macro ld1b_unroll8 ++ ld1b z0.b, p0/z, [src_ptr, #0, mul vl] ++ ld1b z1.b, p0/z, [src_ptr, #1, mul vl] ++ ld1b z2.b, p0/z, [src_ptr, #2, mul vl] ++ ld1b z3.b, p0/z, [src_ptr, #3, mul vl] ++ ld1b z4.b, p0/z, [src_ptr, #4, mul vl] ++ ld1b z5.b, p0/z, [src_ptr, #5, mul vl] ++ ld1b z6.b, p0/z, [src_ptr, #6, mul vl] ++ ld1b z7.b, p0/z, [src_ptr, #7, mul vl] ++ .endm ++ ++ .macro stld1b_unroll4a ++ st1b z0.b, p0, [dest_ptr, #0, mul vl] ++ st1b z1.b, p0, [dest_ptr, #1, mul vl] ++ ld1b z0.b, p0/z, [src_ptr, #0, mul vl] ++ ld1b z1.b, p0/z, [src_ptr, #1, mul vl] ++ st1b z2.b, p0, [dest_ptr, #2, mul vl] ++ st1b z3.b, p0, [dest_ptr, #3, mul vl] ++ ld1b z2.b, p0/z, [src_ptr, #2, mul vl] ++ ld1b z3.b, p0/z, [src_ptr, #3, mul vl] ++ .endm ++ ++ .macro stld1b_unroll4b ++ st1b z4.b, p0, [dest_ptr, #4, mul vl] ++ st1b z5.b, p0, [dest_ptr, #5, mul vl] ++ ld1b z4.b, p0/z, [src_ptr, #4, mul vl] ++ ld1b z5.b, p0/z, [src_ptr, #5, mul vl] ++ st1b z6.b, p0, [dest_ptr, #6, mul vl] ++ st1b z7.b, p0, [dest_ptr, #7, mul vl] ++ ld1b z6.b, p0/z, [src_ptr, #6, mul vl] ++ ld1b z7.b, p0/z, [src_ptr, #7, mul vl] ++ .endm ++ ++ .macro stld1b_unroll8 ++ stld1b_unroll4a ++ stld1b_unroll4b ++ .endm ++ ++ .macro st1b_unroll8 ++ st1b z0.b, p0, [dest_ptr, #0, mul vl] ++ st1b z1.b, p0, [dest_ptr, #1, mul vl] ++ st1b z2.b, p0, [dest_ptr, #2, mul vl] ++ st1b z3.b, p0, [dest_ptr, #3, mul vl] ++ st1b z4.b, p0, [dest_ptr, #4, mul vl] ++ st1b z5.b, p0, [dest_ptr, #5, mul vl] ++ st1b z6.b, p0, [dest_ptr, #6, mul vl] ++ st1b z7.b, p0, [dest_ptr, #7, mul vl] ++ .endm ++ ++ .macro shortcut_for_small_size exit ++ // if rest <= vector_length * 2 ++ whilelo p0.b, xzr, n ++ whilelo p1.b, vector_length, n ++ b.last 1f ++ ld1b z0.b, p0/z, [src, #0, mul vl] ++ ld1b z1.b, p1/z, [src, #1, mul vl] ++ st1b z0.b, p0, [dest, #0, mul vl] ++ st1b z1.b, p1, [dest, #1, mul vl] ++ ret ++1: // if rest > vector_length * 8 ++ cmp n, vector_length, lsl 3 // vector_length * 8 ++ b.hi \exit ++ // if rest <= vector_length * 4 ++ lsl tmp1, vector_length, 1 // vector_length * 2 ++ whilelo p2.b, tmp1, n ++ incb tmp1 ++ whilelo p3.b, tmp1, n ++ b.last 1f ++ ld1b z0.b, p0/z, [src, #0, mul vl] ++ ld1b z1.b, p1/z, [src, #1, mul vl] ++ ld1b z2.b, p2/z, [src, #2, mul vl] ++ ld1b z3.b, p3/z, [src, #3, mul vl] ++ st1b z0.b, p0, [dest, #0, mul vl] ++ st1b z1.b, p1, [dest, #1, mul vl] ++ st1b z2.b, p2, [dest, #2, mul vl] ++ st1b z3.b, p3, [dest, #3, mul vl] ++ ret ++1: // if rest <= vector_length * 8 ++ lsl tmp1, vector_length, 2 // vector_length * 4 ++ whilelo p4.b, tmp1, n ++ incb tmp1 ++ whilelo p5.b, tmp1, n ++ b.last 1f ++ ld1b z0.b, p0/z, [src, #0, mul vl] ++ ld1b z1.b, p1/z, [src, #1, mul vl] ++ ld1b z2.b, p2/z, [src, #2, mul vl] ++ ld1b z3.b, p3/z, [src, #3, mul vl] ++ ld1b z4.b, p4/z, [src, #4, mul vl] ++ ld1b z5.b, p5/z, [src, #5, mul vl] ++ st1b z0.b, p0, [dest, #0, mul vl] ++ st1b z1.b, p1, [dest, #1, mul vl] ++ st1b z2.b, p2, [dest, #2, mul vl] ++ st1b z3.b, p3, [dest, #3, mul vl] ++ st1b z4.b, p4, [dest, #4, mul vl] ++ st1b z5.b, p5, [dest, #5, mul vl] ++ ret ++1: lsl tmp1, vector_length, 2 // vector_length * 4 ++ incb tmp1 // vector_length * 5 ++ incb tmp1 // vector_length * 6 ++ whilelo p6.b, tmp1, n ++ incb tmp1 ++ whilelo p7.b, tmp1, n ++ ld1b z0.b, p0/z, [src, #0, mul vl] ++ ld1b z1.b, p1/z, [src, #1, mul vl] ++ ld1b z2.b, p2/z, [src, #2, mul vl] ++ ld1b z3.b, p3/z, [src, #3, mul vl] ++ ld1b z4.b, p4/z, [src, #4, mul vl] ++ ld1b z5.b, p5/z, [src, #5, mul vl] ++ ld1b z6.b, p6/z, [src, #6, mul vl] ++ ld1b z7.b, p7/z, [src, #7, mul vl] ++ st1b z0.b, p0, [dest, #0, mul vl] ++ st1b z1.b, p1, [dest, #1, mul vl] ++ st1b z2.b, p2, [dest, #2, mul vl] ++ st1b z3.b, p3, [dest, #3, mul vl] ++ st1b z4.b, p4, [dest, #4, mul vl] ++ st1b z5.b, p5, [dest, #5, mul vl] ++ st1b z6.b, p6, [dest, #6, mul vl] ++ st1b z7.b, p7, [dest, #7, mul vl] ++ ret ++ .endm ++ ++ENTRY (MEMCPY) ++ ++ PTR_ARG (0) ++ PTR_ARG (1) ++ SIZE_ARG (2) ++ ++L(memcpy): ++ cntb vector_length ++ // shortcut for less than vector_length * 8 ++ // gives a free ptrue to p0.b for n >= vector_length ++ shortcut_for_small_size L(vl_agnostic) ++ // end of shortcut ++ ++L(vl_agnostic): // VL Agnostic ++ mov rest, n ++ mov dest_ptr, dest ++ mov src_ptr, src ++ // if rest >= L2_SIZE && vector_length == 64 then L(L2) ++ mov tmp1, 64 ++ cmp rest, L2_SIZE ++ ccmp vector_length, tmp1, 0, cs ++ b.eq L(L2) ++ ++L(unroll8): // unrolling and software pipeline ++ lsl tmp1, vector_length, 3 // vector_length * 8 ++ .p2align 3 ++ cmp rest, tmp1 ++ b.cc L(last) ++ ld1b_unroll8 ++ add src_ptr, src_ptr, tmp1 ++ sub rest, rest, tmp1 ++ cmp rest, tmp1 ++ b.cc 2f ++ .p2align 3 ++1: stld1b_unroll8 ++ add dest_ptr, dest_ptr, tmp1 ++ add src_ptr, src_ptr, tmp1 ++ sub rest, rest, tmp1 ++ cmp rest, tmp1 ++ b.ge 1b ++2: st1b_unroll8 ++ add dest_ptr, dest_ptr, tmp1 ++ ++ .p2align 3 ++L(last): ++ whilelo p0.b, xzr, rest ++ whilelo p1.b, vector_length, rest ++ b.last 1f ++ ld1b z0.b, p0/z, [src_ptr, #0, mul vl] ++ ld1b z1.b, p1/z, [src_ptr, #1, mul vl] ++ st1b z0.b, p0, [dest_ptr, #0, mul vl] ++ st1b z1.b, p1, [dest_ptr, #1, mul vl] ++ ret ++1: lsl tmp1, vector_length, 1 // vector_length * 2 ++ whilelo p2.b, tmp1, rest ++ incb tmp1 ++ whilelo p3.b, tmp1, rest ++ b.last 1f ++ ld1b z0.b, p0/z, [src_ptr, #0, mul vl] ++ ld1b z1.b, p1/z, [src_ptr, #1, mul vl] ++ ld1b z2.b, p2/z, [src_ptr, #2, mul vl] ++ ld1b z3.b, p3/z, [src_ptr, #3, mul vl] ++ st1b z0.b, p0, [dest_ptr, #0, mul vl] ++ st1b z1.b, p1, [dest_ptr, #1, mul vl] ++ st1b z2.b, p2, [dest_ptr, #2, mul vl] ++ st1b z3.b, p3, [dest_ptr, #3, mul vl] ++ ret ++1: lsl tmp1, vector_length, 2 // vector_length * 4 ++ whilelo p4.b, tmp1, rest ++ incb tmp1 ++ whilelo p5.b, tmp1, rest ++ incb tmp1 ++ whilelo p6.b, tmp1, rest ++ incb tmp1 ++ whilelo p7.b, tmp1, rest ++ ld1b z0.b, p0/z, [src_ptr, #0, mul vl] ++ ld1b z1.b, p1/z, [src_ptr, #1, mul vl] ++ ld1b z2.b, p2/z, [src_ptr, #2, mul vl] ++ ld1b z3.b, p3/z, [src_ptr, #3, mul vl] ++ ld1b z4.b, p4/z, [src_ptr, #4, mul vl] ++ ld1b z5.b, p5/z, [src_ptr, #5, mul vl] ++ ld1b z6.b, p6/z, [src_ptr, #6, mul vl] ++ ld1b z7.b, p7/z, [src_ptr, #7, mul vl] ++ st1b z0.b, p0, [dest_ptr, #0, mul vl] ++ st1b z1.b, p1, [dest_ptr, #1, mul vl] ++ st1b z2.b, p2, [dest_ptr, #2, mul vl] ++ st1b z3.b, p3, [dest_ptr, #3, mul vl] ++ st1b z4.b, p4, [dest_ptr, #4, mul vl] ++ st1b z5.b, p5, [dest_ptr, #5, mul vl] ++ st1b z6.b, p6, [dest_ptr, #6, mul vl] ++ st1b z7.b, p7, [dest_ptr, #7, mul vl] ++ ret ++ ++L(L2): ++ // align dest address at CACHE_LINE_SIZE byte boundary ++ mov tmp1, CACHE_LINE_SIZE ++ ands tmp2, dest_ptr, CACHE_LINE_SIZE - 1 ++ // if cl_remainder == 0 ++ b.eq L(L2_dc_zva) ++ sub cl_remainder, tmp1, tmp2 ++ // process remainder until the first CACHE_LINE_SIZE boundary ++ whilelo p1.b, xzr, cl_remainder // keep p0.b all true ++ whilelo p2.b, vector_length, cl_remainder ++ b.last 1f ++ ld1b z1.b, p1/z, [src_ptr, #0, mul vl] ++ ld1b z2.b, p2/z, [src_ptr, #1, mul vl] ++ st1b z1.b, p1, [dest_ptr, #0, mul vl] ++ st1b z2.b, p2, [dest_ptr, #1, mul vl] ++ b 2f ++1: lsl tmp1, vector_length, 1 // vector_length * 2 ++ whilelo p3.b, tmp1, cl_remainder ++ incb tmp1 ++ whilelo p4.b, tmp1, cl_remainder ++ ld1b z1.b, p1/z, [src_ptr, #0, mul vl] ++ ld1b z2.b, p2/z, [src_ptr, #1, mul vl] ++ ld1b z3.b, p3/z, [src_ptr, #2, mul vl] ++ ld1b z4.b, p4/z, [src_ptr, #3, mul vl] ++ st1b z1.b, p1, [dest_ptr, #0, mul vl] ++ st1b z2.b, p2, [dest_ptr, #1, mul vl] ++ st1b z3.b, p3, [dest_ptr, #2, mul vl] ++ st1b z4.b, p4, [dest_ptr, #3, mul vl] ++2: add dest_ptr, dest_ptr, cl_remainder ++ add src_ptr, src_ptr, cl_remainder ++ sub rest, rest, cl_remainder ++ ++L(L2_dc_zva): ++ // zero fill ++ and tmp1, dest, 0xffffffffffffff ++ and tmp2, src, 0xffffffffffffff ++ subs tmp1, tmp1, tmp2 // diff ++ b.ge 1f ++ neg tmp1, tmp1 ++1: mov tmp3, ZF_DIST + CACHE_LINE_SIZE * 2 ++ cmp tmp1, tmp3 ++ b.lo L(unroll8) ++ mov tmp1, dest_ptr ++ dc_zva (ZF_DIST / CACHE_LINE_SIZE) - 1 ++ // unroll ++ ld1b_unroll8 // this line has to be after "b.lo L(unroll8)" ++ add src_ptr, src_ptr, CACHE_LINE_SIZE * 2 ++ sub rest, rest, CACHE_LINE_SIZE * 2 ++ mov tmp1, ZF_DIST ++ .p2align 3 ++1: stld1b_unroll4a ++ add tmp2, dest_ptr, tmp1 // dest_ptr + ZF_DIST ++ dc zva, tmp2 ++ stld1b_unroll4b ++ add tmp2, tmp2, CACHE_LINE_SIZE ++ dc zva, tmp2 ++ add dest_ptr, dest_ptr, CACHE_LINE_SIZE * 2 ++ add src_ptr, src_ptr, CACHE_LINE_SIZE * 2 ++ sub rest, rest, CACHE_LINE_SIZE * 2 ++ cmp rest, tmp3 // ZF_DIST + CACHE_LINE_SIZE * 2 ++ b.ge 1b ++ st1b_unroll8 ++ add dest_ptr, dest_ptr, CACHE_LINE_SIZE * 2 ++ b L(unroll8) ++ ++END (MEMCPY) ++libc_hidden_builtin_def (MEMCPY) ++ ++ ++ENTRY (MEMMOVE) ++ ++ PTR_ARG (0) ++ PTR_ARG (1) ++ SIZE_ARG (2) ++ ++ // remove tag address ++ // dest has to be immutable because it is the return value ++ // src has to be immutable because it is used in L(bwd_last) ++ and tmp2, dest, 0xffffffffffffff // save dest_notag into tmp2 ++ and tmp3, src, 0xffffffffffffff // save src_notag intp tmp3 ++ cmp n, 0 ++ ccmp tmp2, tmp3, 4, ne ++ b.ne 1f ++ ret ++1: cntb vector_length ++ // shortcut for less than vector_length * 8 ++ // gives a free ptrue to p0.b for n >= vector_length ++ // tmp2 and tmp3 should not be used in this macro to keep ++ // notag addresses ++ shortcut_for_small_size L(dispatch) ++ // end of shortcut ++ ++L(dispatch): ++ // tmp2 = dest_notag, tmp3 = src_notag ++ // diff = dest_notag - src_notag ++ sub tmp1, tmp2, tmp3 ++ // if diff <= 0 || diff >= n then memcpy ++ cmp tmp1, 0 ++ ccmp tmp1, n, 2, gt ++ b.cs L(vl_agnostic) ++ ++L(bwd_start): ++ mov rest, n ++ add dest_ptr, dest, n // dest_end ++ add src_ptr, src, n // src_end ++ ++L(bwd_unroll8): // unrolling and software pipeline ++ lsl tmp1, vector_length, 3 // vector_length * 8 ++ .p2align 3 ++ cmp rest, tmp1 ++ b.cc L(bwd_last) ++ sub src_ptr, src_ptr, tmp1 ++ ld1b_unroll8 ++ sub rest, rest, tmp1 ++ cmp rest, tmp1 ++ b.cc 2f ++ .p2align 3 ++1: sub src_ptr, src_ptr, tmp1 ++ sub dest_ptr, dest_ptr, tmp1 ++ stld1b_unroll8 ++ sub rest, rest, tmp1 ++ cmp rest, tmp1 ++ b.ge 1b ++2: sub dest_ptr, dest_ptr, tmp1 ++ st1b_unroll8 ++ ++L(bwd_last): ++ mov dest_ptr, dest ++ mov src_ptr, src ++ b L(last) ++ ++END (MEMMOVE) ++libc_hidden_builtin_def (MEMMOVE) ++# endif /* IS_IN (libc) */ ++#endif /* HAVE_AARCH64_SVE_ASM */ +diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c +index e69d8162910b938e..d96612b9cf7c3a4e 100644 +--- a/sysdeps/aarch64/multiarch/memmove.c ++++ b/sysdeps/aarch64/multiarch/memmove.c +@@ -31,14 +31,22 @@ extern __typeof (__redirect_memmove) __libc_memmove; + extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden; ++# if HAVE_AARCH64_SVE_ASM ++extern __typeof (__redirect_memmove) __memmove_a64fx attribute_hidden; ++# endif + + libc_ifunc (__libc_memmove, + (IS_THUNDERX (midr) + ? __memmove_thunderx + : (IS_FALKOR (midr) || IS_PHECDA (midr) + ? __memmove_falkor ++# if HAVE_AARCH64_SVE_ASM ++ : (IS_A64FX (midr) ++ ? __memmove_a64fx ++ : __memmove_generic)))); ++# else + : __memmove_generic))); +- ++# endif + # undef memmove + strong_alias (__libc_memmove, memmove); + #endif +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +index b4f348509eb1c6b3..71e4355c972f1ffb 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +@@ -36,6 +36,7 @@ static struct cpu_list cpu_list[] = { + {"thunderx2t99", 0x431F0AF0}, + {"thunderx2t99p1", 0x420F5160}, + {"phecda", 0x680F0000}, ++ {"a64fx", 0x460F0010}, + {"generic", 0x0} + }; + +@@ -80,4 +81,7 @@ init_cpu_features (struct cpu_features *cpu_features) + + if ((dczid & DCZID_DZP_MASK) == 0) + cpu_features->zva_size = 4 << (dczid & DCZID_BS_MASK); ++ ++ /* Check if SVE is supported. */ ++ cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE; + } +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +index eb35adfbe9d429d5..5691aea6de3cb7f4 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +@@ -20,6 +20,7 @@ + #define _CPU_FEATURES_AARCH64_H + + #include ++#include + + #define MIDR_PARTNUM_SHIFT 4 + #define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT) +@@ -52,10 +53,14 @@ + #define IS_PHECDA(midr) (MIDR_IMPLEMENTOR(midr) == 'h' \ + && MIDR_PARTNUM(midr) == 0x000) + ++#define IS_A64FX(midr) (MIDR_IMPLEMENTOR(midr) == 'F' \ ++ && MIDR_PARTNUM(midr) == 0x001) ++ + struct cpu_features + { + uint64_t midr_el1; + unsigned zva_size; ++ bool sve; + }; + + #endif /* _CPU_FEATURES_AARCH64_H */ diff --git a/SOURCES/glibc-rh1929928-5.patch b/SOURCES/glibc-rh1929928-5.patch new file mode 100644 index 0000000..7b5da7d --- /dev/null +++ b/SOURCES/glibc-rh1929928-5.patch @@ -0,0 +1,371 @@ +commit 4f26956d5ba394eb3ade6c1c20b5c16864a00766 +Author: Naohiro Tamura +Date: Thu May 27 07:44:12 2021 +0000 + + aarch64: Added optimized memset for A64FX + + This patch optimizes the performance of memset for A64FX [1] which + implements ARMv8-A SVE and has L1 64KB cache per core and L2 8MB cache + per NUMA node. + + The performance optimization makes use of Scalable Vector Register + with several techniques such as loop unrolling, memory access + alignment, cache zero fill and prefetch. + + SVE assembler code for memset is implemented as Vector Length Agnostic + code so theoretically it can be run on any SOC which supports ARMv8-A + SVE standard. + + We confirmed that all testcases have been passed by running 'make + check' and 'make xcheck' not only on A64FX but also on ThunderX2. + + And also we confirmed that the SVE 512 bit vector register performance + is roughly 4 times better than Advanced SIMD 128 bit register and 8 + times better than scalar 64 bit register by running 'make bench'. + + [1] https://github.com/fujitsu/A64FX + + Reviewed-by: Wilco Dijkstra + Reviewed-by: Szabolcs Nagy + +Conflicts: + sysdeps/aarch64/multiarch/Makefile + sysdeps/aarch64/multiarch/ifunc-impl-list.c + sysdeps/aarch64/multiarch/memset.c + (all conflicts due to missing other CPU implementations downstream) + +diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile +index 5a19ba0308e80983..5ff883a8ad8e3067 100644 +--- a/sysdeps/aarch64/multiarch/Makefile ++++ b/sysdeps/aarch64/multiarch/Makefile +@@ -1,5 +1,6 @@ + ifeq ($(subdir),string) + sysdep_routines += memcpy_generic memcpy_thunderx memcpy_thunderx2 \ + memcpy_falkor memcpy_a64fx \ +- memmove_falkor memset_generic memset_falkor ++ memmove_falkor memset_generic memset_falkor \ ++ memset_a64fx + endif +diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +index f53db12acce37877..53e3e162a1025e40 100644 +--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +@@ -37,7 +37,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + INIT_ARCH (); + +- /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c. */ ++ /* Support sysdeps/aarch64/multiarch/memcpy.c, memmove.c and memset.c. */ + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx2) +@@ -57,6 +57,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Enable this on non-falkor processors too so that other cores + can do a comparative analysis with __memset_generic. */ + IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor) ++#if HAVE_AARCH64_SVE_ASM ++ IFUNC_IMPL_ADD (array, i, memset, sve, __memset_a64fx) ++#endif + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic)) + + return i; +diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c +index d74ed3a549a54b10..2c8cc72bb0b18474 100644 +--- a/sysdeps/aarch64/multiarch/memset.c ++++ b/sysdeps/aarch64/multiarch/memset.c +@@ -29,12 +29,21 @@ + extern __typeof (__redirect_memset) __libc_memset; + + extern __typeof (__redirect_memset) __memset_falkor attribute_hidden; ++# if HAVE_AARCH64_SVE_ASM ++extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden; ++# endif + extern __typeof (__redirect_memset) __memset_generic attribute_hidden; + + libc_ifunc (__libc_memset, + ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64 + ? __memset_falkor ++# if HAVE_AARCH64_SVE_ASM ++ : (IS_A64FX (midr) ++ ? __memset_a64fx ++ : __memset_generic))); ++# else + : __memset_generic)); ++# endif + + # undef memset + strong_alias (__libc_memset, memset); +diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S +new file mode 100644 +index 0000000000000000..ce54e5418b08c8bc +--- /dev/null ++++ b/sysdeps/aarch64/multiarch/memset_a64fx.S +@@ -0,0 +1,268 @@ ++/* Optimized memset for Fujitsu A64FX processor. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++ ++/* Assumptions: ++ * ++ * ARMv8.2-a, AArch64, unaligned accesses, sve ++ * ++ */ ++ ++#define L1_SIZE (64*1024) // L1 64KB ++#define L2_SIZE (8*1024*1024) // L2 8MB - 1MB ++#define CACHE_LINE_SIZE 256 ++#define PF_DIST_L1 (CACHE_LINE_SIZE * 16) // Prefetch distance L1 ++#define ZF_DIST (CACHE_LINE_SIZE * 21) // Zerofill distance ++#define rest x8 ++#define vector_length x9 ++#define vl_remainder x10 // vector_length remainder ++#define cl_remainder x11 // CACHE_LINE_SIZE remainder ++ ++#if HAVE_AARCH64_SVE_ASM ++# if IS_IN (libc) ++# define MEMSET __memset_a64fx ++ ++ .arch armv8.2-a+sve ++ ++ .macro dc_zva times ++ dc zva, tmp1 ++ add tmp1, tmp1, CACHE_LINE_SIZE ++ .if \times-1 ++ dc_zva "(\times-1)" ++ .endif ++ .endm ++ ++ .macro st1b_unroll first=0, last=7 ++ st1b z0.b, p0, [dst, #\first, mul vl] ++ .if \last-\first ++ st1b_unroll "(\first+1)", \last ++ .endif ++ .endm ++ ++ .macro shortcut_for_small_size exit ++ // if rest <= vector_length * 2 ++ whilelo p0.b, xzr, count ++ whilelo p1.b, vector_length, count ++ b.last 1f ++ st1b z0.b, p0, [dstin, #0, mul vl] ++ st1b z0.b, p1, [dstin, #1, mul vl] ++ ret ++1: // if rest > vector_length * 8 ++ cmp count, vector_length, lsl 3 // vector_length * 8 ++ b.hi \exit ++ // if rest <= vector_length * 4 ++ lsl tmp1, vector_length, 1 // vector_length * 2 ++ whilelo p2.b, tmp1, count ++ incb tmp1 ++ whilelo p3.b, tmp1, count ++ b.last 1f ++ st1b z0.b, p0, [dstin, #0, mul vl] ++ st1b z0.b, p1, [dstin, #1, mul vl] ++ st1b z0.b, p2, [dstin, #2, mul vl] ++ st1b z0.b, p3, [dstin, #3, mul vl] ++ ret ++1: // if rest <= vector_length * 8 ++ lsl tmp1, vector_length, 2 // vector_length * 4 ++ whilelo p4.b, tmp1, count ++ incb tmp1 ++ whilelo p5.b, tmp1, count ++ b.last 1f ++ st1b z0.b, p0, [dstin, #0, mul vl] ++ st1b z0.b, p1, [dstin, #1, mul vl] ++ st1b z0.b, p2, [dstin, #2, mul vl] ++ st1b z0.b, p3, [dstin, #3, mul vl] ++ st1b z0.b, p4, [dstin, #4, mul vl] ++ st1b z0.b, p5, [dstin, #5, mul vl] ++ ret ++1: lsl tmp1, vector_length, 2 // vector_length * 4 ++ incb tmp1 // vector_length * 5 ++ incb tmp1 // vector_length * 6 ++ whilelo p6.b, tmp1, count ++ incb tmp1 ++ whilelo p7.b, tmp1, count ++ st1b z0.b, p0, [dstin, #0, mul vl] ++ st1b z0.b, p1, [dstin, #1, mul vl] ++ st1b z0.b, p2, [dstin, #2, mul vl] ++ st1b z0.b, p3, [dstin, #3, mul vl] ++ st1b z0.b, p4, [dstin, #4, mul vl] ++ st1b z0.b, p5, [dstin, #5, mul vl] ++ st1b z0.b, p6, [dstin, #6, mul vl] ++ st1b z0.b, p7, [dstin, #7, mul vl] ++ ret ++ .endm ++ ++ENTRY (MEMSET) ++ ++ PTR_ARG (0) ++ SIZE_ARG (2) ++ ++ cbnz count, 1f ++ ret ++1: dup z0.b, valw ++ cntb vector_length ++ // shortcut for less than vector_length * 8 ++ // gives a free ptrue to p0.b for n >= vector_length ++ shortcut_for_small_size L(vl_agnostic) ++ // end of shortcut ++ ++L(vl_agnostic): // VL Agnostic ++ mov rest, count ++ mov dst, dstin ++ add dstend, dstin, count ++ // if rest >= L2_SIZE && vector_length == 64 then L(L2) ++ mov tmp1, 64 ++ cmp rest, L2_SIZE ++ ccmp vector_length, tmp1, 0, cs ++ b.eq L(L2) ++ // if rest >= L1_SIZE && vector_length == 64 then L(L1_prefetch) ++ cmp rest, L1_SIZE ++ ccmp vector_length, tmp1, 0, cs ++ b.eq L(L1_prefetch) ++ ++L(unroll32): ++ lsl tmp1, vector_length, 3 // vector_length * 8 ++ lsl tmp2, vector_length, 5 // vector_length * 32 ++ .p2align 3 ++1: cmp rest, tmp2 ++ b.cc L(unroll8) ++ st1b_unroll ++ add dst, dst, tmp1 ++ st1b_unroll ++ add dst, dst, tmp1 ++ st1b_unroll ++ add dst, dst, tmp1 ++ st1b_unroll ++ add dst, dst, tmp1 ++ sub rest, rest, tmp2 ++ b 1b ++ ++L(unroll8): ++ lsl tmp1, vector_length, 3 ++ .p2align 3 ++1: cmp rest, tmp1 ++ b.cc L(last) ++ st1b_unroll ++ add dst, dst, tmp1 ++ sub rest, rest, tmp1 ++ b 1b ++ ++L(last): ++ whilelo p0.b, xzr, rest ++ whilelo p1.b, vector_length, rest ++ b.last 1f ++ st1b z0.b, p0, [dst, #0, mul vl] ++ st1b z0.b, p1, [dst, #1, mul vl] ++ ret ++1: lsl tmp1, vector_length, 1 // vector_length * 2 ++ whilelo p2.b, tmp1, rest ++ incb tmp1 ++ whilelo p3.b, tmp1, rest ++ b.last 1f ++ st1b z0.b, p0, [dst, #0, mul vl] ++ st1b z0.b, p1, [dst, #1, mul vl] ++ st1b z0.b, p2, [dst, #2, mul vl] ++ st1b z0.b, p3, [dst, #3, mul vl] ++ ret ++1: lsl tmp1, vector_length, 2 // vector_length * 4 ++ whilelo p4.b, tmp1, rest ++ incb tmp1 ++ whilelo p5.b, tmp1, rest ++ incb tmp1 ++ whilelo p6.b, tmp1, rest ++ incb tmp1 ++ whilelo p7.b, tmp1, rest ++ st1b z0.b, p0, [dst, #0, mul vl] ++ st1b z0.b, p1, [dst, #1, mul vl] ++ st1b z0.b, p2, [dst, #2, mul vl] ++ st1b z0.b, p3, [dst, #3, mul vl] ++ st1b z0.b, p4, [dst, #4, mul vl] ++ st1b z0.b, p5, [dst, #5, mul vl] ++ st1b z0.b, p6, [dst, #6, mul vl] ++ st1b z0.b, p7, [dst, #7, mul vl] ++ ret ++ ++L(L1_prefetch): // if rest >= L1_SIZE ++ .p2align 3 ++1: st1b_unroll 0, 3 ++ prfm pstl1keep, [dst, PF_DIST_L1] ++ st1b_unroll 4, 7 ++ prfm pstl1keep, [dst, PF_DIST_L1 + CACHE_LINE_SIZE] ++ add dst, dst, CACHE_LINE_SIZE * 2 ++ sub rest, rest, CACHE_LINE_SIZE * 2 ++ cmp rest, L1_SIZE ++ b.ge 1b ++ cbnz rest, L(unroll32) ++ ret ++ ++L(L2): ++ // align dst address at vector_length byte boundary ++ sub tmp1, vector_length, 1 ++ ands tmp2, dst, tmp1 ++ // if vl_remainder == 0 ++ b.eq 1f ++ sub vl_remainder, vector_length, tmp2 ++ // process remainder until the first vector_length boundary ++ whilelt p2.b, xzr, vl_remainder ++ st1b z0.b, p2, [dst] ++ add dst, dst, vl_remainder ++ sub rest, rest, vl_remainder ++ // align dstin address at CACHE_LINE_SIZE byte boundary ++1: mov tmp1, CACHE_LINE_SIZE ++ ands tmp2, dst, CACHE_LINE_SIZE - 1 ++ // if cl_remainder == 0 ++ b.eq L(L2_dc_zva) ++ sub cl_remainder, tmp1, tmp2 ++ // process remainder until the first CACHE_LINE_SIZE boundary ++ mov tmp1, xzr // index ++2: whilelt p2.b, tmp1, cl_remainder ++ st1b z0.b, p2, [dst, tmp1] ++ incb tmp1 ++ cmp tmp1, cl_remainder ++ b.lo 2b ++ add dst, dst, cl_remainder ++ sub rest, rest, cl_remainder ++ ++L(L2_dc_zva): ++ // zero fill ++ mov tmp1, dst ++ dc_zva (ZF_DIST / CACHE_LINE_SIZE) - 1 ++ mov zva_len, ZF_DIST ++ add tmp1, zva_len, CACHE_LINE_SIZE * 2 ++ // unroll ++ .p2align 3 ++1: st1b_unroll 0, 3 ++ add tmp2, dst, zva_len ++ dc zva, tmp2 ++ st1b_unroll 4, 7 ++ add tmp2, tmp2, CACHE_LINE_SIZE ++ dc zva, tmp2 ++ add dst, dst, CACHE_LINE_SIZE * 2 ++ sub rest, rest, CACHE_LINE_SIZE * 2 ++ cmp rest, tmp1 // ZF_DIST + CACHE_LINE_SIZE * 2 ++ b.ge 1b ++ cbnz rest, L(unroll8) ++ ret ++ ++END (MEMSET) ++libc_hidden_builtin_def (MEMSET) ++ ++#endif /* IS_IN (libc) */ ++#endif /* HAVE_AARCH64_SVE_ASM */ diff --git a/SOURCES/glibc-rh1984802-1.patch b/SOURCES/glibc-rh1984802-1.patch new file mode 100644 index 0000000..47fcc60 --- /dev/null +++ b/SOURCES/glibc-rh1984802-1.patch @@ -0,0 +1,50 @@ +From 756c306502498f999fdd494477b9cea1b45e4faf Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Fri, 21 Aug 2020 11:23:17 +0200 +Subject: [PATCH] S390: Sync HWCAP names with kernel by adding aliases [BZ + #25971] + +Unfortunately some HWCAP names like HWCAP_S390_VX differs between +kernel (see /arch/s390/include/asm/elf.h) and glibc. + +Therefore, those HWCAP names from kernel are now introduced as alias +--- + sysdeps/s390/dl-procinfo.h | 3 +++ + sysdeps/unix/sysv/linux/s390/bits/hwcap.h | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h +index 0db4bc39c7..08eee109f7 100644 +--- a/sysdeps/s390/dl-procinfo.h ++++ b/sysdeps/s390/dl-procinfo.h +@@ -51,8 +51,11 @@ enum + HWCAP_S390_HIGH_GPRS = 1 << 9, + HWCAP_S390_TE = 1 << 10, + HWCAP_S390_VX = 1 << 11, ++ HWCAP_S390_VXRS = HWCAP_S390_VX, + HWCAP_S390_VXD = 1 << 12, ++ HWCAP_S390_VXRS_BCD = HWCAP_S390_VXD, + HWCAP_S390_VXE = 1 << 13, ++ HWCAP_S390_VXRS_EXT = HWCAP_S390_VXE, + HWCAP_S390_GS = 1 << 14, + HWCAP_S390_VXRS_EXT2 = 1 << 15, + HWCAP_S390_VXRS_PDE = 1 << 16, +diff --git a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h +index 6adbec018b..f2998ff131 100644 +--- a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h ++++ b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h +@@ -36,8 +36,11 @@ + #define HWCAP_S390_HIGH_GPRS 512 + #define HWCAP_S390_TE 1024 + #define HWCAP_S390_VX 2048 ++#define HWCAP_S390_VXRS HWCAP_S390_VX + #define HWCAP_S390_VXD 4096 ++#define HWCAP_S390_VXRS_BCD HWCAP_S390_VXD + #define HWCAP_S390_VXE 8192 ++#define HWCAP_S390_VXRS_EXT HWCAP_S390_VXE + #define HWCAP_S390_GS 16384 + #define HWCAP_S390_VXRS_EXT2 32768 + #define HWCAP_S390_VXRS_PDE 65536 +-- +2.31.1 + diff --git a/SOURCES/glibc-rh1984802-2.patch b/SOURCES/glibc-rh1984802-2.patch new file mode 100644 index 0000000..4d7d4b5 --- /dev/null +++ b/SOURCES/glibc-rh1984802-2.patch @@ -0,0 +1,67 @@ +From 25251c0707fe34f30a27381a5fabc35435a96621 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Tue, 16 Feb 2021 16:18:56 +0100 +Subject: [PATCH] S390: Add new hwcap values. + +The new hwcap values indicate support for arch14 architecture. +--- + sysdeps/s390/dl-procinfo.c | 5 +++-- + sysdeps/s390/dl-procinfo.h | 4 +++- + sysdeps/unix/sysv/linux/s390/bits/hwcap.h | 2 ++ + 3 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c +index 0c334a2551..c174e27b35 100644 +--- a/sysdeps/s390/dl-procinfo.c ++++ b/sysdeps/s390/dl-procinfo.c +@@ -46,12 +46,13 @@ + #if !defined PROCINFO_DECL && defined SHARED + ._dl_s390_cap_flags + #else +-PROCINFO_CLASS const char _dl_s390_cap_flags[19][9] ++PROCINFO_CLASS const char _dl_s390_cap_flags[21][9] + #endif + #ifndef PROCINFO_DECL + = { + "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh", +- "highgprs", "te", "vx", "vxd", "vxe", "gs", "vxe2", "vxp", "sort", "dflt" ++ "highgprs", "te", "vx", "vxd", "vxe", "gs", "vxe2", "vxp", "sort", "dflt", ++ "vxp2", "nnpa" + } + #endif + #if !defined SHARED || defined PROCINFO_DECL +diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h +index 9e1a8c7ba9..2d9c305808 100644 +--- a/sysdeps/s390/dl-procinfo.h ++++ b/sysdeps/s390/dl-procinfo.h +@@ -21,7 +21,7 @@ + #define _DL_PROCINFO_H 1 + #include + +-#define _DL_HWCAP_COUNT 19 ++#define _DL_HWCAP_COUNT 21 + + #define _DL_PLATFORMS_COUNT 10 + +@@ -61,6 +61,8 @@ enum + HWCAP_S390_VXRS_PDE = 1 << 16, + HWCAP_S390_SORT = 1 << 17, + HWCAP_S390_DFLT = 1 << 18, ++ HWCAP_S390_VXRS_PDE2 = 1 << 19, ++ HWCAP_S390_NNPA = 1 << 20, + }; + + #define HWCAP_IMPORTANT (HWCAP_S390_ZARCH | HWCAP_S390_LDISP \ +diff --git a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h +index 696616e779..e9bd3684db 100644 +--- a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h ++++ b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h +@@ -46,3 +46,5 @@ + #define HWCAP_S390_VXRS_PDE 65536 + #define HWCAP_S390_SORT 131072 + #define HWCAP_S390_DFLT 262144 ++#define HWCAP_S390_VXRS_PDE2 524288 ++#define HWCAP_S390_NNPA 1048576 +-- +2.31.1 + diff --git a/SOURCES/glibc-rh1984802-3.patch b/SOURCES/glibc-rh1984802-3.patch new file mode 100644 index 0000000..c0412b4 --- /dev/null +++ b/SOURCES/glibc-rh1984802-3.patch @@ -0,0 +1,88 @@ +From f2e06656d04a9fcb0603802a4f8ce7aa3a1f055e Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Tue, 5 Oct 2021 16:14:10 +0200 +Subject: [PATCH] S390: Add PCI_MIO and SIE HWCAPs + +Both new HWCAPs were introduced in these kernel commits: +- 7e8403ecaf884f307b627f3c371475913dd29292 + "s390: add HWCAP_S390_PCI_MIO to ELF hwcaps" +- 7e82523f2583e9813e4109df3656707162541297 + "s390/hwcaps: make sie capability regular hwcap" + +Also note that the kernel commit 511ad531afd4090625def4d9aba1f5227bd44b8e +"s390/hwcaps: shorten HWCAP defines" has shortened the prefix of the macros +from "HWCAP_S390_" to "HWCAP_". For compatibility reasons, we do not +change the prefix in public glibc header file. +--- + sysdeps/s390/dl-procinfo.c | 4 ++-- + sysdeps/s390/dl-procinfo.h | 4 +++- + sysdeps/unix/sysv/linux/s390/bits/hwcap.h | 7 +++++++ + 3 files changed, 12 insertions(+), 3 deletions(-) + +diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c +index 7314c31b15..97be34fe9d 100644 +--- a/sysdeps/s390/dl-procinfo.c ++++ b/sysdeps/s390/dl-procinfo.c +@@ -45,13 +45,13 @@ + #if !defined PROCINFO_DECL && defined SHARED + ._dl_s390_cap_flags + #else +-PROCINFO_CLASS const char _dl_s390_cap_flags[21][9] ++PROCINFO_CLASS const char _dl_s390_cap_flags[23][9] + #endif + #ifndef PROCINFO_DECL + = { + "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh", + "highgprs", "te", "vx", "vxd", "vxe", "gs", "vxe2", "vxp", "sort", "dflt", +- "vxp2", "nnpa" ++ "vxp2", "nnpa", "pcimio", "sie" + } + #endif + #if !defined SHARED || defined PROCINFO_DECL +diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h +index 2502dd2604..d9a3b264ff 100644 +--- a/sysdeps/s390/dl-procinfo.h ++++ b/sysdeps/s390/dl-procinfo.h +@@ -20,7 +20,7 @@ + #define _DL_PROCINFO_H 1 + #include + +-#define _DL_HWCAP_COUNT 21 ++#define _DL_HWCAP_COUNT 23 + + #define _DL_PLATFORMS_COUNT 10 + +@@ -62,6 +62,8 @@ enum + HWCAP_S390_DFLT = 1 << 18, + HWCAP_S390_VXRS_PDE2 = 1 << 19, + HWCAP_S390_NNPA = 1 << 20, ++ HWCAP_S390_PCI_MIO = 1 << 21, ++ HWCAP_S390_SIE = 1 << 22, + }; + + #define HWCAP_IMPORTANT (HWCAP_S390_ZARCH | HWCAP_S390_LDISP \ +diff --git a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h +index e9bd3684db..00e73a3e3b 100644 +--- a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h ++++ b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h +@@ -22,6 +22,11 @@ + + /* + * The following must match the kernels asm/elf.h. ++ * Note: The kernel commit 511ad531afd4090625def4d9aba1f5227bd44b8e ++ * "s390/hwcaps: shorten HWCAP defines" has shortened the prefix of the macros ++ * from "HWCAP_S390_" to "HWCAP_". For compatibility reasons, we do not ++ * change the prefix in public glibc header file. ++ * + * Note that these are *not* the same as the STORE FACILITY LIST bits. + */ + #define HWCAP_S390_ESAN3 1 +@@ -48,3 +53,5 @@ + #define HWCAP_S390_DFLT 262144 + #define HWCAP_S390_VXRS_PDE2 524288 + #define HWCAP_S390_NNPA 1048576 ++#define HWCAP_S390_PCI_MIO 2097152 ++#define HWCAP_S390_SIE 4194304 +-- +2.31.1 + diff --git a/SOURCES/glibc-rh1991001-1.patch b/SOURCES/glibc-rh1991001-1.patch new file mode 100644 index 0000000..6d09fa0 --- /dev/null +++ b/SOURCES/glibc-rh1991001-1.patch @@ -0,0 +1,30 @@ +commit ad78d702757a189b1fa552d607e8aaa22252a45f +Author: Florian Weimer +Date: Tue May 12 19:06:18 2020 +0200 + + elf: Remove redundant add_to_global_resize_failure call from dl_open_args + + The second call does not do anything because the data structures have + already been resized by the call that comes before the demarcation + point. Fixes commit a509eb117fac1d764b15eba64993f4bdb63d7f3c + ("Avoid late dlopen failure due to scope, TLS slotinfo updates + [BZ #25112]"). + + Reviewed-by: Carlos O'Donell + +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 3d49a84596e99bf6..b052bb0bc2cd17aa 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -769,11 +769,6 @@ dl_open_worker (void *a) + DL_STATIC_INIT (new); + #endif + +- /* Perform the necessary allocations for adding new global objects +- to the global scope below, via add_to_global_update. */ +- if (mode & RTLD_GLOBAL) +- add_to_global_resize (new); +- + /* Run the initializer functions of new objects. Temporarily + disable the exception handler, so that lazy binding failures are + fatal. */ diff --git a/SOURCES/glibc-rh1991001-10.patch b/SOURCES/glibc-rh1991001-10.patch new file mode 100644 index 0000000..7263cc7 --- /dev/null +++ b/SOURCES/glibc-rh1991001-10.patch @@ -0,0 +1,23 @@ +commit 52290d8c04569615fb011ee286d52dc5147afbd7 +Author: Szabolcs Nagy +Date: Thu Apr 15 09:57:10 2021 +0100 + + elf: Fix missing include in test case [BZ #27136] + + Broken test was introduced in + + commit 8f85075a2e9c26ff7486d4bbaf358999807d215c + elf: Add a DTV setup test [BZ #27136] + +diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c +index ac5f8c8d39b66dd6..9977ec803208b9c8 100644 +--- a/elf/tst-tls20.c ++++ b/elf/tst-tls20.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + #include + #include + diff --git a/SOURCES/glibc-rh1991001-11.patch b/SOURCES/glibc-rh1991001-11.patch new file mode 100644 index 0000000..a704d26 --- /dev/null +++ b/SOURCES/glibc-rh1991001-11.patch @@ -0,0 +1,160 @@ +commit 2208066603a136f95cfb815ca9281262e6465784 +Author: Szabolcs Nagy +Date: Thu Feb 11 13:24:47 2021 +0000 + + elf: Remove lazy tlsdesc relocation related code + + Remove generic tlsdesc code related to lazy tlsdesc processing since + lazy tlsdesc relocation is no longer supported. This includes removing + GL(dl_load_lock) from _dl_make_tlsdesc_dynamic which is only called at + load time when that lock is already held. + + Added a documentation comment too. + + Reviewed-by: Adhemerval Zanella + +diff --git a/elf/tlsdeschtab.h b/elf/tlsdeschtab.h +index fea9eefe72edcd6b..c20857e5b4264f00 100644 +--- a/elf/tlsdeschtab.h ++++ b/elf/tlsdeschtab.h +@@ -78,6 +78,10 @@ map_generation (struct link_map *map) + return GL(dl_tls_generation) + 1; + } + ++/* Returns the data pointer for a given map and tls offset that is used ++ to fill in one of the GOT entries referenced by a TLSDESC relocation ++ when using dynamic TLS. This requires allocation, returns NULL on ++ allocation failure. */ + void * + _dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset) + { +@@ -85,18 +89,12 @@ _dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset) + void **entry; + struct tlsdesc_dynamic_arg *td, test; + +- /* FIXME: We could use a per-map lock here, but is it worth it? */ +- __rtld_lock_lock_recursive (GL(dl_load_lock)); +- + ht = map->l_mach.tlsdesc_table; + if (! ht) + { + ht = htab_create (); + if (! ht) +- { +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); +- return 0; +- } ++ return 0; + map->l_mach.tlsdesc_table = ht; + } + +@@ -104,15 +102,11 @@ _dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset) + test.tlsinfo.ti_offset = ti_offset; + entry = htab_find_slot (ht, &test, 1, hash_tlsdesc, eq_tlsdesc); + if (! entry) +- { +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); +- return 0; +- } ++ return 0; + + if (*entry) + { + td = *entry; +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); + return td; + } + +@@ -122,44 +116,9 @@ _dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset) + thread. */ + td->gen_count = map_generation (map); + td->tlsinfo = test.tlsinfo; +- +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); + return td; + } + + # endif /* SHARED */ + +-/* The idea of the following two functions is to stop multiple threads +- from attempting to resolve the same TLS descriptor without busy +- waiting. Ideally, we should be able to release the lock right +- after changing td->entry, and then using say a condition variable +- or a futex wake to wake up any waiting threads, but let's try to +- avoid introducing such dependencies. */ +- +-static int +-__attribute__ ((unused)) +-_dl_tlsdesc_resolve_early_return_p (struct tlsdesc volatile *td, void *caller) +-{ +- if (caller != atomic_load_relaxed (&td->entry)) +- return 1; +- +- __rtld_lock_lock_recursive (GL(dl_load_lock)); +- if (caller != atomic_load_relaxed (&td->entry)) +- { +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); +- return 1; +- } +- +- atomic_store_relaxed (&td->entry, _dl_tlsdesc_resolve_hold); +- +- return 0; +-} +- +-static void +-__attribute__ ((unused)) +-_dl_tlsdesc_wake_up_held_fixups (void) +-{ +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); +-} +- + #endif +diff --git a/sysdeps/aarch64/tlsdesc.c b/sysdeps/aarch64/tlsdesc.c +index 357465f23d76e2bd..1ead73ab8250e29c 100644 +--- a/sysdeps/aarch64/tlsdesc.c ++++ b/sysdeps/aarch64/tlsdesc.c +@@ -22,7 +22,6 @@ + #include + #include + #include +-#define _dl_tlsdesc_resolve_hold 0 + #include + + /* Unmap the dynamic object, but also release its TLS descriptor table +diff --git a/sysdeps/arm/tlsdesc.c b/sysdeps/arm/tlsdesc.c +index d142d7a2c91e9adb..b78e3f65785bf587 100644 +--- a/sysdeps/arm/tlsdesc.c ++++ b/sysdeps/arm/tlsdesc.c +@@ -20,7 +20,6 @@ + #include + #include + #include +-#define _dl_tlsdesc_resolve_hold 0 + #include + + /* Unmap the dynamic object, but also release its TLS descriptor table +diff --git a/sysdeps/i386/tlsdesc.c b/sysdeps/i386/tlsdesc.c +index 1b4227c8381e1b3d..c242ffce726d50e4 100644 +--- a/sysdeps/i386/tlsdesc.c ++++ b/sysdeps/i386/tlsdesc.c +@@ -20,7 +20,6 @@ + #include + #include + #include +-#define _dl_tlsdesc_resolve_hold 0 + #include + + /* Unmap the dynamic object, but also release its TLS descriptor table +diff --git a/sysdeps/x86_64/tlsdesc.c b/sysdeps/x86_64/tlsdesc.c +index 61a19ae26944c84f..a9325827d0e5e31b 100644 +--- a/sysdeps/x86_64/tlsdesc.c ++++ b/sysdeps/x86_64/tlsdesc.c +@@ -20,7 +20,6 @@ + #include + #include + #include +-#define _dl_tlsdesc_resolve_hold 0 + #include + + /* Unmap the dynamic object, but also release its TLS descriptor table diff --git a/SOURCES/glibc-rh1991001-12.patch b/SOURCES/glibc-rh1991001-12.patch new file mode 100644 index 0000000..67f9bab --- /dev/null +++ b/SOURCES/glibc-rh1991001-12.patch @@ -0,0 +1,182 @@ +commit 1387ad6225c2222f027790e3f460e31aa5dd2c54 +Author: Szabolcs Nagy +Date: Wed Dec 30 19:19:37 2020 +0000 + + elf: Fix data races in pthread_create and TLS access [BZ #19329] + + DTV setup at thread creation (_dl_allocate_tls_init) is changed + to take the dlopen lock, GL(dl_load_lock). Avoiding data races + here without locks would require design changes: the map that is + accessed for static TLS initialization here may be concurrently + freed by dlclose. That use after free may be solved by only + locking around static TLS setup or by ensuring dlclose does not + free modules with static TLS, however currently every link map + with TLS has to be accessed at least to see if it needs static + TLS. And even if that's solved, still a lot of atomics would be + needed to synchronize DTV related globals without a lock. So fix + both bug 19329 and bug 27111 with a lock that prevents DTV setup + running concurrently with dlopen or dlclose. + + _dl_update_slotinfo at TLS access still does not use any locks + so CONCURRENCY NOTES are added to explain the synchronization. + The early exit from the slotinfo walk when max_modid is reached + is not strictly necessary, but does not hurt either. + + An incorrect acquire load was removed from _dl_resize_dtv: it + did not synchronize with any release store or fence and + synchronization is now handled separately at thread creation + and TLS access time. + + There are still a number of racy read accesses to globals that + will be changed to relaxed MO atomics in a followup patch. This + should not introduce regressions compared to existing behaviour + and avoid cluttering the main part of the fix. + + Not all TLS access related data races got fixed here: there are + additional races at lazy tlsdesc relocations see bug 27137. + + Reviewed-by: Adhemerval Zanella + +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index 15ed01d795a8627a..da83cd6ae2ee6504 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -471,14 +471,11 @@ extern dtv_t _dl_static_dtv[]; + #endif + + static dtv_t * +-_dl_resize_dtv (dtv_t *dtv) ++_dl_resize_dtv (dtv_t *dtv, size_t max_modid) + { + /* Resize the dtv. */ + dtv_t *newp; +- /* Load GL(dl_tls_max_dtv_idx) atomically since it may be written to by +- other threads concurrently. */ +- size_t newsize +- = atomic_load_acquire (&GL(dl_tls_max_dtv_idx)) + DTV_SURPLUS; ++ size_t newsize = max_modid + DTV_SURPLUS; + size_t oldsize = dtv[-1].counter; + + if (dtv == GL(dl_initial_dtv)) +@@ -524,11 +521,14 @@ _dl_allocate_tls_init (void *result) + size_t total = 0; + size_t maxgen = 0; + ++ /* Protects global dynamic TLS related state. */ ++ __rtld_lock_lock_recursive (GL(dl_load_lock)); ++ + /* Check if the current dtv is big enough. */ + if (dtv[-1].counter < GL(dl_tls_max_dtv_idx)) + { + /* Resize the dtv. */ +- dtv = _dl_resize_dtv (dtv); ++ dtv = _dl_resize_dtv (dtv, GL(dl_tls_max_dtv_idx)); + + /* Install this new dtv in the thread data structures. */ + INSTALL_DTV (result, &dtv[-1]); +@@ -596,6 +596,7 @@ _dl_allocate_tls_init (void *result) + listp = listp->next; + assert (listp != NULL); + } ++ __rtld_lock_unlock_recursive (GL(dl_load_lock)); + + /* The DTV version is up-to-date now. */ + dtv[0].counter = maxgen; +@@ -730,12 +731,29 @@ _dl_update_slotinfo (unsigned long int req_modid) + + if (dtv[0].counter < listp->slotinfo[idx].gen) + { +- /* The generation counter for the slot is higher than what the +- current dtv implements. We have to update the whole dtv but +- only those entries with a generation counter <= the one for +- the entry we need. */ ++ /* CONCURRENCY NOTES: ++ ++ Here the dtv needs to be updated to new_gen generation count. ++ ++ This code may be called during TLS access when GL(dl_load_lock) ++ is not held. In that case the user code has to synchronize with ++ dlopen and dlclose calls of relevant modules. A module m is ++ relevant if the generation of m <= new_gen and dlclose of m is ++ synchronized: a memory access here happens after the dlopen and ++ before the dlclose of relevant modules. The dtv entries for ++ relevant modules need to be updated, other entries can be ++ arbitrary. ++ ++ This e.g. means that the first part of the slotinfo list can be ++ accessed race free, but the tail may be concurrently extended. ++ Similarly relevant slotinfo entries can be read race free, but ++ other entries are racy. However updating a non-relevant dtv ++ entry does not affect correctness. For a relevant module m, ++ max_modid >= modid of m. */ + size_t new_gen = listp->slotinfo[idx].gen; + size_t total = 0; ++ size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx)); ++ assert (max_modid >= req_modid); + + /* We have to look through the entire dtv slotinfo list. */ + listp = GL(dl_tls_dtv_slotinfo_list); +@@ -745,12 +763,14 @@ _dl_update_slotinfo (unsigned long int req_modid) + { + size_t modid = total + cnt; + ++ /* Later entries are not relevant. */ ++ if (modid > max_modid) ++ break; ++ + size_t gen = listp->slotinfo[cnt].gen; + + if (gen > new_gen) +- /* This is a slot for a generation younger than the +- one we are handling now. It might be incompletely +- set up so ignore it. */ ++ /* Not relevant. */ + continue; + + /* If the entry is older than the current dtv layout we +@@ -767,7 +787,7 @@ _dl_update_slotinfo (unsigned long int req_modid) + continue; + + /* Resize the dtv. */ +- dtv = _dl_resize_dtv (dtv); ++ dtv = _dl_resize_dtv (dtv, max_modid); + + assert (modid <= dtv[-1].counter); + +@@ -789,8 +809,17 @@ _dl_update_slotinfo (unsigned long int req_modid) + } + + total += listp->len; ++ if (total > max_modid) ++ break; ++ ++ /* Synchronize with _dl_add_to_slotinfo. Ideally this would ++ be consume MO since we only need to order the accesses to ++ the next node after the read of the address and on most ++ hardware (other than alpha) a normal load would do that ++ because of the address dependency. */ ++ listp = atomic_load_acquire (&listp->next); + } +- while ((listp = listp->next) != NULL); ++ while (listp != NULL); + + /* This will be the new maximum generation counter. */ + dtv[0].counter = new_gen; +@@ -982,7 +1011,7 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add) + the first slot. */ + assert (idx == 0); + +- listp = prevp->next = (struct dtv_slotinfo_list *) ++ listp = (struct dtv_slotinfo_list *) + malloc (sizeof (struct dtv_slotinfo_list) + + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); + if (listp == NULL) +@@ -996,6 +1025,8 @@ cannot create TLS data structures")); + listp->next = NULL; + memset (listp->slotinfo, '\0', + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); ++ /* Synchronize with _dl_update_slotinfo. */ ++ atomic_store_release (&prevp->next, listp); + } + + /* Add the information into the slotinfo data structure. */ diff --git a/SOURCES/glibc-rh1991001-13.patch b/SOURCES/glibc-rh1991001-13.patch new file mode 100644 index 0000000..524eb3c --- /dev/null +++ b/SOURCES/glibc-rh1991001-13.patch @@ -0,0 +1,193 @@ +commit f4f8f4d4e0f92488431b268c8cd9555730b9afe9 +Author: Szabolcs Nagy +Date: Wed Dec 30 19:19:37 2020 +0000 + + elf: Use relaxed atomics for racy accesses [BZ #19329] + + This is a follow up patch to the fix for bug 19329. This adds relaxed + MO atomics to accesses that were previously data races but are now + race conditions, and where relaxed MO is sufficient. + + The race conditions all follow the pattern that the write is behind the + dlopen lock, but a read can happen concurrently (e.g. during tls access) + without holding the lock. For slotinfo entries the read value only + matters if it reads from a synchronized write in dlopen or dlclose, + otherwise the related dtv entry is not valid to access so it is fine + to leave it in an inconsistent state. The same applies for + GL(dl_tls_max_dtv_idx) and GL(dl_tls_generation), but there the + algorithm relies on the fact that the read of the last synchronized + write is an increasing value. + + Reviewed-by: Adhemerval Zanella + +diff --git a/elf/dl-close.c b/elf/dl-close.c +index 1ece0ae1dd062d1e..7d2dc2272cd643f5 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -79,9 +79,10 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp, + { + assert (old_map->l_tls_modid == idx); + +- /* Mark the entry as unused. */ +- listp->slotinfo[idx - disp].gen = GL(dl_tls_generation) + 1; +- listp->slotinfo[idx - disp].map = NULL; ++ /* Mark the entry as unused. These can be read concurrently. */ ++ atomic_store_relaxed (&listp->slotinfo[idx - disp].gen, ++ GL(dl_tls_generation) + 1); ++ atomic_store_relaxed (&listp->slotinfo[idx - disp].map, NULL); + } + + /* If this is not the last currently used entry no need to look +@@ -96,8 +97,8 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp, + + if (listp->slotinfo[idx - disp].map != NULL) + { +- /* Found a new last used index. */ +- GL(dl_tls_max_dtv_idx) = idx; ++ /* Found a new last used index. This can be read concurrently. */ ++ atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), idx); + return true; + } + } +@@ -571,7 +572,9 @@ _dl_close_worker (struct link_map *map, bool force) + GL(dl_tls_dtv_slotinfo_list), 0, + imap->l_init_called)) + /* All dynamically loaded modules with TLS are unloaded. */ +- GL(dl_tls_max_dtv_idx) = GL(dl_tls_static_nelem); ++ /* Can be read concurrently. */ ++ atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), ++ GL(dl_tls_static_nelem)); + + if (imap->l_tls_offset != NO_TLS_OFFSET + && imap->l_tls_offset != FORCED_DYNAMIC_TLS_OFFSET) +@@ -769,8 +772,11 @@ _dl_close_worker (struct link_map *map, bool force) + /* If we removed any object which uses TLS bump the generation counter. */ + if (any_tls) + { +- if (__glibc_unlikely (++GL(dl_tls_generation) == 0)) ++ size_t newgen = GL(dl_tls_generation) + 1; ++ if (__glibc_unlikely (newgen == 0)) + _dl_fatal_printf ("TLS generation counter wrapped! Please report as described in "REPORT_BUGS_TO".\n"); ++ /* Can be read concurrently. */ ++ atomic_store_relaxed (&GL(dl_tls_generation), newgen); + + if (tls_free_end == GL(dl_tls_static_used)) + GL(dl_tls_static_used) = tls_free_start; +diff --git a/elf/dl-open.c b/elf/dl-open.c +index b052bb0bc2cd17aa..a67fb3aee40860e1 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -395,9 +395,12 @@ update_tls_slotinfo (struct link_map *new) + } + } + +- if (__builtin_expect (++GL(dl_tls_generation) == 0, 0)) ++ size_t newgen = GL(dl_tls_generation) + 1; ++ if (__glibc_unlikely (newgen == 0)) + _dl_fatal_printf (N_("\ + TLS generation counter wrapped! Please report this.")); ++ /* Can be read concurrently. */ ++ atomic_store_relaxed (&GL(dl_tls_generation), newgen); + + /* We need a second pass for static tls data, because + _dl_update_slotinfo must not be run while calls to +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index da83cd6ae2ee6504..801eafad3961573c 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -175,7 +175,9 @@ _dl_next_tls_modid (void) + /* No gaps, allocate a new entry. */ + nogaps: + +- result = ++GL(dl_tls_max_dtv_idx); ++ result = GL(dl_tls_max_dtv_idx) + 1; ++ /* Can be read concurrently. */ ++ atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result); + } + + return result; +@@ -359,10 +361,12 @@ allocate_dtv (void *result) + dtv_t *dtv; + size_t dtv_length; + ++ /* Relaxed MO, because the dtv size is later rechecked, not relied on. */ ++ size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx)); + /* We allocate a few more elements in the dtv than are needed for the + initial set of modules. This should avoid in most cases expansions + of the dtv. */ +- dtv_length = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS; ++ dtv_length = max_modid + DTV_SURPLUS; + dtv = calloc (dtv_length + 2, sizeof (dtv_t)); + if (dtv != NULL) + { +@@ -767,7 +771,7 @@ _dl_update_slotinfo (unsigned long int req_modid) + if (modid > max_modid) + break; + +- size_t gen = listp->slotinfo[cnt].gen; ++ size_t gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen); + + if (gen > new_gen) + /* Not relevant. */ +@@ -779,7 +783,8 @@ _dl_update_slotinfo (unsigned long int req_modid) + continue; + + /* If there is no map this means the entry is empty. */ +- struct link_map *map = listp->slotinfo[cnt].map; ++ struct link_map *map ++ = atomic_load_relaxed (&listp->slotinfo[cnt].map); + /* Check whether the current dtv array is large enough. */ + if (dtv[-1].counter < modid) + { +@@ -923,7 +928,12 @@ __tls_get_addr (GET_ADDR_ARGS) + { + dtv_t *dtv = THREAD_DTV (); + +- if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation))) ++ /* Update is needed if dtv[0].counter < the generation of the accessed ++ module. The global generation counter is used here as it is easier ++ to check. Synchronization for the relaxed MO access is guaranteed ++ by user code, see CONCURRENCY NOTES in _dl_update_slotinfo. */ ++ size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); ++ if (__glibc_unlikely (dtv[0].counter != gen)) + return update_get_addr (GET_ADDR_PARAM); + + void *p = dtv[GET_ADDR_MODULE].pointer.val; +@@ -946,7 +956,10 @@ _dl_tls_get_addr_soft (struct link_map *l) + return NULL; + + dtv_t *dtv = THREAD_DTV (); +- if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation))) ++ /* This may be called without holding the GL(dl_load_lock). Reading ++ arbitrary gen value is fine since this is best effort code. */ ++ size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); ++ if (__glibc_unlikely (dtv[0].counter != gen)) + { + /* This thread's DTV is not completely current, + but it might already cover this module. */ +@@ -1032,7 +1045,9 @@ cannot create TLS data structures")); + /* Add the information into the slotinfo data structure. */ + if (do_add) + { +- listp->slotinfo[idx].map = l; +- listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1; ++ /* Can be read concurrently. See _dl_update_slotinfo. */ ++ atomic_store_relaxed (&listp->slotinfo[idx].map, l); ++ atomic_store_relaxed (&listp->slotinfo[idx].gen, ++ GL(dl_tls_generation) + 1); + } + } +diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c +index 533ee2b3a6e85ad8..bc543dcc264ea361 100644 +--- a/sysdeps/x86_64/dl-tls.c ++++ b/sysdeps/x86_64/dl-tls.c +@@ -40,7 +40,8 @@ __tls_get_addr_slow (GET_ADDR_ARGS) + { + dtv_t *dtv = THREAD_DTV (); + +- if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation))) ++ size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); ++ if (__glibc_unlikely (dtv[0].counter != gen)) + return update_get_addr (GET_ADDR_PARAM); + + return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL); diff --git a/SOURCES/glibc-rh1991001-14.patch b/SOURCES/glibc-rh1991001-14.patch new file mode 100644 index 0000000..3467532 --- /dev/null +++ b/SOURCES/glibc-rh1991001-14.patch @@ -0,0 +1,133 @@ +commit 9d0e30329c23b5ad736fda3f174208c25970dbce +Author: Szabolcs Nagy +Date: Tue Dec 13 12:28:41 2016 +0000 + + elf: Add test case for [BZ #19329] + + Test concurrent dlopen and pthread_create when the loaded modules have + TLS. This triggers dl-tls assertion failures more reliably than the + nptl/tst-stack4 test. + + The dlopened module has 100 DT_NEEDED dependencies with TLS, they were + reused from an existing TLS test. The number of created threads during + dlopen depends on filesystem speed and hardware, but at most 3 threads + are alive at a time to limit resource usage. + + Reviewed-by: Adhemerval Zanella + +Conflicts: + elf/Makefile + (usual testing differences) + +diff --git a/elf/Makefile b/elf/Makefile +index 0995d810b57d0dda..be40e3761cf91c4a 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -210,7 +210,7 @@ tests += restest1 preloadtest loadfail multiload origtest resolvfail \ + tst-tls-ie tst-tls-ie-dlmopen \ + argv0test \ + tst-glibc-hwcaps tst-glibc-hwcaps-prepend tst-glibc-hwcaps-mask \ +- tst-tls20 ++ tst-tls20 tst-tls21 + # reldep9 + tests-internal += loadtest unload unload2 circleload1 \ + neededtest neededtest2 neededtest3 neededtest4 \ +@@ -333,7 +333,7 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \ + libmarkermod2-1 libmarkermod2-2 \ + libmarkermod3-1 libmarkermod3-2 libmarkermod3-3 \ + libmarkermod4-1 libmarkermod4-2 libmarkermod4-3 libmarkermod4-4 \ +- tst-tls20mod-bad ++ tst-tls20mod-bad tst-tls21mod \ + + # Most modules build with _ISOMAC defined, but those filtered out + # depend on internal headers. +@@ -1836,3 +1836,8 @@ tst-tls20mod-bad.so-no-z-defs = yes + $(objpfx)tst-tls20: $(libdl) $(shared-thread-library) + $(objpfx)tst-tls20.out: $(objpfx)tst-tls20mod-bad.so \ + $(tst-tls-many-dynamic-modules:%=$(objpfx)%.so) ++ ++# Reuses tst-tls-many-dynamic-modules ++$(objpfx)tst-tls21: $(libdl) $(shared-thread-library) ++$(objpfx)tst-tls21.out: $(objpfx)tst-tls21mod.so ++$(objpfx)tst-tls21mod.so: $(tst-tls-many-dynamic-modules:%=$(objpfx)%.so) +diff --git a/elf/tst-tls21.c b/elf/tst-tls21.c +new file mode 100644 +index 0000000000000000..560bf5813a746417 +--- /dev/null ++++ b/elf/tst-tls21.c +@@ -0,0 +1,68 @@ ++/* Test concurrent dlopen and pthread_create: BZ 19329. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define THREADS 10000 ++ ++static atomic_int done; ++ ++static void * ++start (void *a) ++{ ++ /* Load a module with many dependencies that each have TLS. */ ++ xdlopen ("tst-tls21mod.so", RTLD_LAZY); ++ atomic_store_explicit (&done, 1, memory_order_release); ++ return 0; ++} ++ ++static void * ++nop (void *a) ++{ ++ return 0; ++} ++ ++static int ++do_test (void) ++{ ++ pthread_t t1, t2; ++ int i; ++ ++ /* Load a module with lots of dependencies and TLS. */ ++ t1 = xpthread_create (0, start, 0); ++ ++ /* Concurrently create lots of threads until dlopen is observably done. */ ++ for (i = 0; i < THREADS; i++) ++ { ++ if (atomic_load_explicit (&done, memory_order_acquire) != 0) ++ break; ++ t2 = xpthread_create (0, nop, 0); ++ xpthread_join (t2); ++ } ++ ++ xpthread_join (t1); ++ printf ("threads created during dlopen: %d\n", i); ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-tls21mod.c b/elf/tst-tls21mod.c +new file mode 100644 +index 0000000000000000..206ece4fb34622a9 +--- /dev/null ++++ b/elf/tst-tls21mod.c +@@ -0,0 +1 @@ ++int __thread x; diff --git a/SOURCES/glibc-rh1991001-15.patch b/SOURCES/glibc-rh1991001-15.patch new file mode 100644 index 0000000..553ecfb --- /dev/null +++ b/SOURCES/glibc-rh1991001-15.patch @@ -0,0 +1,81 @@ +commit 572bd547d57a39b6cf0ea072545dc4048921f4c3 +Author: Szabolcs Nagy +Date: Thu Dec 31 13:59:38 2020 +0000 + + elf: Fix DTV gap reuse logic [BZ #27135] + + For some reason only dlopen failure caused dtv gaps to be reused. + + It is possible that the intent was to never reuse modids for a + different module, but after dlopen failure all gaps are reused + not just the ones caused by the unfinished dlopened. + + So the code has to handle reused modids already which seems to + work, however the data races at thread creation and tls access + (see bug 19329 and bug 27111) may be more severe if slots are + reused so this is scheduled after those fixes. I think fixing + the races are not simpler if reuse is disallowed and reuse has + other benefits, so set GL(dl_tls_dtv_gaps) whenever entries are + removed from the middle of the slotinfo list. The value does + not have to be correct: incorrect true value causes the next + modid query to do a slotinfo walk, incorrect false will leave + gaps and new entries are added at the end. + + Fixes bug 27135. + + Reviewed-by: Adhemerval Zanella + +diff --git a/elf/dl-close.c b/elf/dl-close.c +index 7d2dc2272cd643f5..41cb6c58491c364b 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -88,7 +88,11 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp, + /* If this is not the last currently used entry no need to look + further. */ + if (idx != GL(dl_tls_max_dtv_idx)) +- return true; ++ { ++ /* There is an unused dtv entry in the middle. */ ++ GL(dl_tls_dtv_gaps) = true; ++ return true; ++ } + } + + while (idx - disp > (disp == 0 ? 1 + GL(dl_tls_static_nelem) : 0)) +diff --git a/elf/dl-open.c b/elf/dl-open.c +index a67fb3aee40860e1..54727402750f4c0c 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -896,16 +896,6 @@ no more namespaces available for dlmopen()")); + state if relocation failed, for example. */ + if (args.map) + { +- /* Maybe some of the modules which were loaded use TLS. +- Since it will be removed in the following _dl_close call +- we have to mark the dtv array as having gaps to fill the +- holes. This is a pessimistic assumption which won't hurt +- if not true. There is no need to do this when we are +- loading the auditing DSOs since TLS has not yet been set +- up. */ +- if ((mode & __RTLD_AUDIT) == 0) +- GL(dl_tls_dtv_gaps) = true; +- + _dl_close_worker (args.map, true); + + /* All l_nodelete_pending objects should have been deleted +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index 801eafad3961573c..bacb4101e2e2c4e5 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -187,10 +187,7 @@ _dl_next_tls_modid (void) + size_t + _dl_count_modids (void) + { +- /* It is rare that we have gaps; see elf/dl-open.c (_dl_open) where +- we fail to load a module and unload it leaving a gap. If we don't +- have gaps then the number of modids is the current maximum so +- return that. */ ++ /* The count is the max unless dlclose or failed dlopen created gaps. */ + if (__glibc_likely (!GL(dl_tls_dtv_gaps))) + return GL(dl_tls_max_dtv_idx); + diff --git a/SOURCES/glibc-rh1991001-16.patch b/SOURCES/glibc-rh1991001-16.patch new file mode 100644 index 0000000..d1902a7 --- /dev/null +++ b/SOURCES/glibc-rh1991001-16.patch @@ -0,0 +1,71 @@ +commit 40ebfd016ad284872f434bdd76dbe9c708db4d6b +Author: Florian Weimer +Date: Fri Jun 25 08:09:08 2021 +0200 + + elf: Disable most of TLS modid gaps processing [BZ #27135] + + Revert "elf: Fix DTV gap reuse logic [BZ #27135]" + + This reverts commit 572bd547d57a39b6cf0ea072545dc4048921f4c3. + + It turns out that the _dl_next_tls_modid in _dl_map_object_from_fd keeps + returning the same modid over and over again if there is a gap and + more than TLS-using module is loaded in one dlopen call. This corrupts + TLS data structures. The bug is still present after a revert, but + empirically it is much more difficult to trigger (because it involves a + dlopen failure). + +diff --git a/elf/dl-close.c b/elf/dl-close.c +index 41cb6c58491c364b..7d2dc2272cd643f5 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -88,11 +88,7 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp, + /* If this is not the last currently used entry no need to look + further. */ + if (idx != GL(dl_tls_max_dtv_idx)) +- { +- /* There is an unused dtv entry in the middle. */ +- GL(dl_tls_dtv_gaps) = true; +- return true; +- } ++ return true; + } + + while (idx - disp > (disp == 0 ? 1 + GL(dl_tls_static_nelem) : 0)) +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 54727402750f4c0c..a67fb3aee40860e1 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -896,6 +896,16 @@ no more namespaces available for dlmopen()")); + state if relocation failed, for example. */ + if (args.map) + { ++ /* Maybe some of the modules which were loaded use TLS. ++ Since it will be removed in the following _dl_close call ++ we have to mark the dtv array as having gaps to fill the ++ holes. This is a pessimistic assumption which won't hurt ++ if not true. There is no need to do this when we are ++ loading the auditing DSOs since TLS has not yet been set ++ up. */ ++ if ((mode & __RTLD_AUDIT) == 0) ++ GL(dl_tls_dtv_gaps) = true; ++ + _dl_close_worker (args.map, true); + + /* All l_nodelete_pending objects should have been deleted +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index bacb4101e2e2c4e5..801eafad3961573c 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -187,7 +187,10 @@ _dl_next_tls_modid (void) + size_t + _dl_count_modids (void) + { +- /* The count is the max unless dlclose or failed dlopen created gaps. */ ++ /* It is rare that we have gaps; see elf/dl-open.c (_dl_open) where ++ we fail to load a module and unload it leaving a gap. If we don't ++ have gaps then the number of modids is the current maximum so ++ return that. */ + if (__glibc_likely (!GL(dl_tls_dtv_gaps))) + return GL(dl_tls_max_dtv_idx); + diff --git a/SOURCES/glibc-rh1991001-17.patch b/SOURCES/glibc-rh1991001-17.patch new file mode 100644 index 0000000..cb1a041 --- /dev/null +++ b/SOURCES/glibc-rh1991001-17.patch @@ -0,0 +1,585 @@ +commit ba33937be210da5d07f7f01709323743f66011ce +Author: Adhemerval Zanella +Date: Fri Jun 25 10:54:12 2021 -0300 + + elf: Fix DTV gap reuse logic (BZ #27135) + + This is updated version of the 572bd547d57a (reverted by 40ebfd016ad2) + that fixes the _dl_next_tls_modid issues. + + This issue with 572bd547d57a patch is the DTV entry will be only + update on dl_open_worker() with the update_tls_slotinfo() call after + all dependencies are being processed by _dl_map_object_deps(). However + _dl_map_object_deps() itself might call _dl_next_tls_modid(), and since + the _dl_tls_dtv_slotinfo_list::map is not yet set the entry will be + wrongly reused. + + This patch fixes by renaming the _dl_next_tls_modid() function to + _dl_assign_tls_modid() and by passing the link_map so it can set + the slotinfo value so a subsequente _dl_next_tls_modid() call will + see the entry as allocated. + + The intermediary value is cleared up on remove_slotinfo() for the case + a library fails to load with RTLD_NOW. + + This patch fixes BZ #27135. + + Checked on x86_64-linux-gnu. + + Reviewed-by: Szabolcs Nagy + +Conflicts: + elf/Makefile + (testing differences; libdl removal upstream) + +diff --git a/elf/Makefile b/elf/Makefile +index be40e3761cf91c4a..3e71939d3234c4c3 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -242,6 +242,13 @@ one-hundred = $(foreach x,0 1 2 3 4 5 6 7 8 9, \ + 0$x 1$x 2$x 3$x 4$x 5$x 6$x 7$x 8$x 9$x) + tst-tls-many-dynamic-modules := \ + $(foreach n,$(one-hundred),tst-tls-manydynamic$(n)mod) ++tst-tls-many-dynamic-modules-dep-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 \ ++ 14 15 16 17 18 19 ++tst-tls-many-dynamic-modules-dep = \ ++ $(foreach n,$(tst-tls-many-dynamic-modules-dep-suffixes),tst-tls-manydynamic$(n)mod-dep) ++tst-tls-many-dynamic-modules-dep-bad-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ++tst-tls-many-dynamic-modules-dep-bad = \ ++ $(foreach n,$(tst-tls-many-dynamic-modules-dep-bad-suffixes),tst-tls-manydynamic$(n)mod-dep-bad) + extra-test-objs += $(tlsmod17a-modules:=.os) $(tlsmod18a-modules:=.os) \ + tst-tlsalign-vars.o + test-extras += tst-tlsmod17a tst-tlsmod18a tst-tlsalign-vars +@@ -314,6 +321,8 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \ + tst-audit11mod1 tst-audit11mod2 tst-auditmod11 \ + tst-audit12mod1 tst-audit12mod2 tst-audit12mod3 tst-auditmod12 \ + tst-latepthreadmod $(tst-tls-many-dynamic-modules) \ ++ $(tst-tls-many-dynamic-modules-dep) \ ++ $(tst-tls-many-dynamic-modules-dep-bad) \ + tst-nodelete-dlclose-dso tst-nodelete-dlclose-plugin \ + tst-main1mod tst-libc_dlvsym-dso tst-absolute-sym-lib \ + tst-absolute-zero-lib tst-big-note-lib \ +@@ -1832,10 +1841,63 @@ $(objpfx)tst-rtld-help.out: $(objpfx)ld.so + $(evaluate-test) + + # Reuses tst-tls-many-dynamic-modules ++$(patsubst %,$(objpfx)%.os,$(tst-tls-many-dynamic-modules-dep)): \ ++ $(objpfx)tst-tls-manydynamic%mod-dep.os : tst-tls-manydynamicmod.c ++ $(compile-command.c) \ ++ -DNAME=tls_global_$* -DSETTER=set_value_$* -DGETTER=get_value_$* ++$(patsubst %,$(objpfx)%.os,$(tst-tls-many-dynamic-modules-dep-bad)): \ ++ $(objpfx)tst-tls-manydynamic%mod-dep-bad.os : tst-tls-manydynamicmod.c ++ $(compile-command.c) \ ++ -DNAME=tls_global_$* -DSETTER=set_value_$* -DGETTER=get_value_$* + tst-tls20mod-bad.so-no-z-defs = yes ++# Single dependency. ++$(objpfx)tst-tls-manydynamic0mod-dep.so: $(objpfx)tst-tls-manydynamic1mod-dep.so ++# Double dependencies. ++$(objpfx)tst-tls-manydynamic2mod-dep.so: $(objpfx)tst-tls-manydynamic3mod-dep.so \ ++ $(objpfx)tst-tls-manydynamic4mod-dep.so ++# Double dependencies with each dependency depent of another module. ++$(objpfx)tst-tls-manydynamic5mod-dep.so: $(objpfx)tst-tls-manydynamic6mod-dep.so \ ++ $(objpfx)tst-tls-manydynamic7mod-dep.so ++$(objpfx)tst-tls-manydynamic6mod-dep.so: $(objpfx)tst-tls-manydynamic8mod-dep.so ++$(objpfx)tst-tls-manydynamic7mod-dep.so: $(objpfx)tst-tls-manydynamic8mod-dep.so ++# Long chain with one double dependency in the middle ++$(objpfx)tst-tls-manydynamic9mod-dep.so: $(objpfx)tst-tls-manydynamic10mod-dep.so \ ++ $(objpfx)tst-tls-manydynamic11mod-dep.so ++$(objpfx)tst-tls-manydynamic10mod-dep.so: $(objpfx)tst-tls-manydynamic12mod-dep.so ++$(objpfx)tst-tls-manydynamic12mod-dep.so: $(objpfx)tst-tls-manydynamic13mod-dep.so ++# Long chain with two double depedencies in the middle ++$(objpfx)tst-tls-manydynamic14mod-dep.so: $(objpfx)tst-tls-manydynamic15mod-dep.so ++$(objpfx)tst-tls-manydynamic15mod-dep.so: $(objpfx)tst-tls-manydynamic16mod-dep.so \ ++ $(objpfx)tst-tls-manydynamic17mod-dep.so ++$(objpfx)tst-tls-manydynamic16mod-dep.so: $(objpfx)tst-tls-manydynamic18mod-dep.so \ ++ $(objpfx)tst-tls-manydynamic19mod-dep.so ++# Same but with an invalid module. ++# Single dependency. ++$(objpfx)tst-tls-manydynamic0mod-dep-bad.so: $(objpfx)tst-tls20mod-bad.so ++# Double dependencies. ++$(objpfx)tst-tls-manydynamic1mod-dep-bad.so: $(objpfx)tst-tls-manydynamic2mod-dep-bad.so \ ++ $(objpfx)tst-tls20mod-bad.so ++# Double dependencies with each dependency depent of another module. ++$(objpfx)tst-tls-manydynamic3mod-dep-bad.so: $(objpfx)tst-tls-manydynamic4mod-dep-bad.so \ ++ $(objpfx)tst-tls-manydynamic5mod-dep-bad.so ++$(objpfx)tst-tls-manydynamic4mod-dep-bad.so: $(objpfx)tst-tls20mod-bad.so ++$(objpfx)tst-tls-manydynamic5mod-dep-bad.so: $(objpfx)tst-tls20mod-bad.so ++# Long chain with one double dependency in the middle ++$(objpfx)tst-tls-manydynamic6mod-dep-bad.so: $(objpfx)tst-tls-manydynamic7mod-dep-bad.so \ ++ $(objpfx)tst-tls-manydynamic8mod-dep-bad.so ++$(objpfx)tst-tls-manydynamic7mod-dep-bad.so: $(objpfx)tst-tls-manydynamic9mod-dep-bad.so ++$(objpfx)tst-tls-manydynamic9mod-dep-bad.so: $(objpfx)tst-tls20mod-bad.so ++# Long chain with two double depedencies in the middle ++$(objpfx)tst-tls-manydynamic10mod-dep-bad.so: $(objpfx)tst-tls-manydynamic11mod-dep-bad.so ++$(objpfx)tst-tls-manydynamic11mod-dep-bad.so: $(objpfx)tst-tls-manydynamic12mod-dep-bad.so \ ++ $(objpfx)tst-tls-manydynamic13mod-dep-bad.so ++$(objpfx)tst-tls-manydynamic12mod-dep-bad.so: $(objpfx)tst-tls-manydynamic14mod-dep-bad.so \ ++ $(objpfx)tst-tls20mod-bad.so + $(objpfx)tst-tls20: $(libdl) $(shared-thread-library) + $(objpfx)tst-tls20.out: $(objpfx)tst-tls20mod-bad.so \ +- $(tst-tls-many-dynamic-modules:%=$(objpfx)%.so) ++ $(tst-tls-many-dynamic-modules:%=$(objpfx)%.so) \ ++ $(tst-tls-many-dynamic-modules-dep:%=$(objpfx)%.so) \ ++ $(tst-tls-many-dynamic-modules-dep-bad:%=$(objpfx)%.so) \ + + # Reuses tst-tls-many-dynamic-modules + $(objpfx)tst-tls21: $(libdl) $(shared-thread-library) +diff --git a/elf/dl-close.c b/elf/dl-close.c +index 7d2dc2272cd643f5..18227fe992029364 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -77,8 +77,6 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp, + object that wasn't fully set up. */ + if (__glibc_likely (old_map != NULL)) + { +- assert (old_map->l_tls_modid == idx); +- + /* Mark the entry as unused. These can be read concurrently. */ + atomic_store_relaxed (&listp->slotinfo[idx - disp].gen, + GL(dl_tls_generation) + 1); +@@ -88,7 +86,11 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp, + /* If this is not the last currently used entry no need to look + further. */ + if (idx != GL(dl_tls_max_dtv_idx)) +- return true; ++ { ++ /* There is an unused dtv entry in the middle. */ ++ GL(dl_tls_dtv_gaps) = true; ++ return true; ++ } + } + + while (idx - disp > (disp == 0 ? 1 + GL(dl_tls_static_nelem) : 0)) +diff --git a/elf/dl-load.c b/elf/dl-load.c +index 80fc38041a936c3c..cdb5d4b5b67f1ca1 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1419,7 +1419,7 @@ cannot enable executable stack as shared object requires"); + not set up TLS data structures, so don't use them now. */ + || __glibc_likely (GL(dl_tls_dtv_slotinfo_list) != NULL))) + /* Assign the next available module ID. */ +- l->l_tls_modid = _dl_next_tls_modid (); ++ _dl_assign_tls_modid (l); + + #ifdef DL_AFTER_LOAD + DL_AFTER_LOAD (l); +diff --git a/elf/dl-open.c b/elf/dl-open.c +index a67fb3aee40860e1..54727402750f4c0c 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -896,16 +896,6 @@ no more namespaces available for dlmopen()")); + state if relocation failed, for example. */ + if (args.map) + { +- /* Maybe some of the modules which were loaded use TLS. +- Since it will be removed in the following _dl_close call +- we have to mark the dtv array as having gaps to fill the +- holes. This is a pessimistic assumption which won't hurt +- if not true. There is no need to do this when we are +- loading the auditing DSOs since TLS has not yet been set +- up. */ +- if ((mode & __RTLD_AUDIT) == 0) +- GL(dl_tls_dtv_gaps) = true; +- + _dl_close_worker (args.map, true); + + /* All l_nodelete_pending objects should have been deleted +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index 801eafad3961573c..8c0f9e972d7a0eac 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -122,8 +122,8 @@ oom (void) + } + + +-size_t +-_dl_next_tls_modid (void) ++void ++_dl_assign_tls_modid (struct link_map *l) + { + size_t result; + +@@ -153,7 +153,11 @@ _dl_next_tls_modid (void) + } + + if (result - disp < runp->len) +- break; ++ { ++ /* Mark the entry as used, so any dependency see it. */ ++ atomic_store_relaxed (&runp->slotinfo[result - disp].map, l); ++ break; ++ } + + disp += runp->len; + } +@@ -180,17 +184,14 @@ _dl_next_tls_modid (void) + atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result); + } + +- return result; ++ l->l_tls_modid = result; + } + + + size_t + _dl_count_modids (void) + { +- /* It is rare that we have gaps; see elf/dl-open.c (_dl_open) where +- we fail to load a module and unload it leaving a gap. If we don't +- have gaps then the number of modids is the current maximum so +- return that. */ ++ /* The count is the max unless dlclose or failed dlopen created gaps. */ + if (__glibc_likely (!GL(dl_tls_dtv_gaps))) + return GL(dl_tls_max_dtv_idx); + +diff --git a/elf/rtld.c b/elf/rtld.c +index 992f825ba00762a7..118c454a2329573f 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1693,7 +1693,7 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]); + /* Add the dynamic linker to the TLS list if it also uses TLS. */ + if (GL(dl_rtld_map).l_tls_blocksize != 0) + /* Assign a module ID. Do this before loading any audit modules. */ +- GL(dl_rtld_map).l_tls_modid = _dl_next_tls_modid (); ++ _dl_assign_tls_modid (&GL(dl_rtld_map)); + + audit_list_add_dynamic_tag (&state.audit_list, main_map, DT_AUDIT); + audit_list_add_dynamic_tag (&state.audit_list, main_map, DT_DEPAUDIT); +diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c +index 9977ec803208b9c8..d8d04fe574597f35 100644 +--- a/elf/tst-tls20.c ++++ b/elf/tst-tls20.c +@@ -16,12 +16,14 @@ + License along with the GNU C Library; if not, see + . */ + ++#include + #include + #include + #include + #include + #include + #include ++#include + #include + #include + +@@ -59,28 +61,75 @@ access (int i) + char *buf = xasprintf ("tls_global_%02d", i); + dlerror (); + int *p = dlsym (mod[i], buf); +- printf ("mod[%d]: &tls = %p\n", i, p); ++ if (test_verbose) ++ printf ("mod[%d]: &tls = %p\n", i, p); + if (p == NULL) + FAIL_EXIT1 ("dlsym failed: %s\n", dlerror ()); ++ TEST_COMPARE (*p, 0); + ++*p; + free (buf); + } + ++static void ++access_mod (const char *modname, void *mod, int i) ++{ ++ char *modsym = xasprintf ("tls_global_%d", i); ++ dlerror (); ++ int *p = dlsym (mod, modsym); ++ if (test_verbose) ++ printf ("%s: &tls = %p\n", modname, p); ++ if (p == NULL) ++ FAIL_EXIT1 ("dlsym failed: %s\n", dlerror ()); ++ TEST_COMPARE (*p, 0); ++ ++*p; ++ free (modsym); ++} ++ ++static void ++access_dep (int i) ++{ ++ char *modname = xasprintf ("tst-tls-manydynamic%dmod-dep.so", i); ++ void *moddep = xdlopen (modname, RTLD_LAZY); ++ access_mod (modname, moddep, i); ++ free (modname); ++ xdlclose (moddep); ++} ++ ++struct start_args ++{ ++ const char *modname; ++ void *mod; ++ int modi; ++ int ndeps; ++ const int *deps; ++}; ++ + static void * + start (void *a) + { ++ struct start_args *args = a; ++ + for (int i = 0; i < NMOD; i++) + if (mod[i] != NULL) + access (i); ++ ++ if (args != NULL) ++ { ++ access_mod (args->modname, args->mod, args->modi); ++ for (int n = 0; n < args->ndeps; n++) ++ access_dep (args->deps[n]); ++ } ++ + return 0; + } + +-static int +-do_test (void) ++/* This test gaps with shared libraries with dynamic TLS that has no ++ dependencies. The DTV gap is set with by trying to load an invalid ++ module, the entry should be used on the dlopen. */ ++static void ++do_test_no_depedency (void) + { +- int i; +- +- for (i = 0; i < NMOD; i++) ++ for (int i = 0; i < NMOD; i++) + { + load_mod (i); + /* Bump the generation of mod[0] without using new dtv slot. */ +@@ -91,8 +140,220 @@ do_test (void) + pthread_t t = xpthread_create (0, start, 0); + xpthread_join (t); + } +- for (i = 0; i < NMOD; i++) ++ for (int i = 0; i < NMOD; i++) + unload_mod (i); ++} ++ ++/* The following test check DTV gaps handling with shared libraries that has ++ dependencies. It defines 5 different sets: ++ ++ 1. Single dependency: ++ mod0 -> mod1 ++ 2. Double dependency: ++ mod2 -> [mod3,mod4] ++ 3. Double dependency with each dependency depent of another module: ++ mod5 -> [mod6,mod7] -> mod8 ++ 4. Long chain with one double dependency in the middle: ++ mod9 -> [mod10, mod11] -> mod12 -> mod13 ++ 5. Long chain with two double depedencies in the middle: ++ mod14 -> mod15 -> [mod16, mod17] ++ mod15 -> [mod18, mod19] ++ ++ This does not cover all the possible gaps and configuration, but it ++ should check if different dynamic shared sets are placed correctly in ++ different gaps configurations. */ ++ ++static int ++nmodules (uint32_t v) ++{ ++ unsigned int r = 0; ++ while (v >>= 1) ++ r++; ++ return r + 1; ++} ++ ++static inline bool ++is_mod_set (uint32_t g, uint32_t n) ++{ ++ return (1U << (n - 1)) & g; ++} ++ ++static void ++print_gap (uint32_t g) ++{ ++ if (!test_verbose) ++ return; ++ printf ("gap: "); ++ int nmods = nmodules (g); ++ for (int n = 1; n <= nmods; n++) ++ printf ("%c", ((1 << (n - 1)) & g) == 0 ? 'G' : 'M'); ++ printf ("\n"); ++} ++ ++static void ++do_test_dependency (void) ++{ ++ /* Maps the module and its dependencies, use thread to access the TLS on ++ each loaded module. */ ++ static const int tlsmanydeps0[] = { 1 }; ++ static const int tlsmanydeps1[] = { 3, 4 }; ++ static const int tlsmanydeps2[] = { 6, 7, 8 }; ++ static const int tlsmanydeps3[] = { 10, 11, 12 }; ++ static const int tlsmanydeps4[] = { 15, 16, 17, 18, 19 }; ++ static const struct tlsmanydeps_t ++ { ++ int modi; ++ int ndeps; ++ const int *deps; ++ } tlsmanydeps[] = ++ { ++ { 0, array_length (tlsmanydeps0), tlsmanydeps0 }, ++ { 2, array_length (tlsmanydeps1), tlsmanydeps1 }, ++ { 5, array_length (tlsmanydeps2), tlsmanydeps2 }, ++ { 9, array_length (tlsmanydeps3), tlsmanydeps3 }, ++ { 14, array_length (tlsmanydeps4), tlsmanydeps4 }, ++ }; ++ ++ /* The gap configuration is defined as a bitmap: the bit set represents a ++ loaded module prior the tests execution, while a bit unsed is a module ++ unloaded. Not all permtation will show gaps, but it is simpler than ++ define each one independently. */ ++ for (uint32_t g = 0; g < 64; g++) ++ { ++ print_gap (g); ++ int nmods = nmodules (g); ++ ++ int mods[nmods]; ++ /* We use '0' as indication for a gap, to avoid the dlclose on iteration ++ cleanup. */ ++ for (int n = 1; n <= nmods; n++) ++ { ++ load_mod (n); ++ mods[n] = n; ++ } ++ for (int n = 1; n <= nmods; n++) ++ { ++ if (!is_mod_set (g, n)) ++ { ++ unload_mod (n); ++ mods[n] = 0; ++ } ++ } ++ ++ for (int t = 0; t < array_length (tlsmanydeps); t++) ++ { ++ char *moddepname = xasprintf ("tst-tls-manydynamic%dmod-dep.so", ++ tlsmanydeps[t].modi); ++ void *moddep = xdlopen (moddepname, RTLD_LAZY); ++ ++ /* Access TLS in all loaded modules. */ ++ struct start_args args = ++ { ++ moddepname, ++ moddep, ++ tlsmanydeps[t].modi, ++ tlsmanydeps[t].ndeps, ++ tlsmanydeps[t].deps ++ }; ++ pthread_t t = xpthread_create (0, start, &args); ++ xpthread_join (t); ++ ++ free (moddepname); ++ xdlclose (moddep); ++ } ++ ++ for (int n = 1; n <= nmods; n++) ++ if (mods[n] != 0) ++ unload_mod (n); ++ } ++} ++ ++/* The following test check DTV gaps handling with shared libraries that has ++ invalid dependencies. It defines 5 different sets: ++ ++ 1. Single dependency: ++ mod0 -> invalid ++ 2. Double dependency: ++ mod1 -> [mod2,invalid] ++ 3. Double dependency with each dependency depent of another module: ++ mod3 -> [mod4,mod5] -> invalid ++ 4. Long chain with one double dependency in the middle: ++ mod6 -> [mod7, mod8] -> mod12 -> invalid ++ 5. Long chain with two double depedencies in the middle: ++ mod10 -> mod11 -> [mod12, mod13] ++ mod12 -> [mod14, invalid] ++ ++ This does not cover all the possible gaps and configuration, but it ++ should check if different dynamic shared sets are placed correctly in ++ different gaps configurations. */ ++ ++static void ++do_test_invalid_dependency (bool bind_now) ++{ ++ static const int tlsmanydeps[] = { 0, 1, 3, 6, 10 }; ++ ++ /* The gap configuration is defined as a bitmap: the bit set represents a ++ loaded module prior the tests execution, while a bit unsed is a module ++ unloaded. Not all permtation will show gaps, but it is simpler than ++ define each one independently. */ ++ for (uint32_t g = 0; g < 64; g++) ++ { ++ print_gap (g); ++ int nmods = nmodules (g); ++ ++ int mods[nmods]; ++ /* We use '0' as indication for a gap, to avoid the dlclose on iteration ++ cleanup. */ ++ for (int n = 1; n <= nmods; n++) ++ { ++ load_mod (n); ++ mods[n] = n; ++ } ++ for (int n = 1; n <= nmods; n++) ++ { ++ if (!is_mod_set (g, n)) ++ { ++ unload_mod (n); ++ mods[n] = 0; ++ } ++ } ++ ++ for (int t = 0; t < array_length (tlsmanydeps); t++) ++ { ++ char *moddepname = xasprintf ("tst-tls-manydynamic%dmod-dep-bad.so", ++ tlsmanydeps[t]); ++ void *moddep; ++ if (bind_now) ++ { ++ moddep = dlopen (moddepname, RTLD_NOW); ++ TEST_VERIFY (moddep == 0); ++ } ++ else ++ moddep = dlopen (moddepname, RTLD_LAZY); ++ ++ /* Access TLS in all loaded modules. */ ++ pthread_t t = xpthread_create (0, start, NULL); ++ xpthread_join (t); ++ ++ free (moddepname); ++ if (!bind_now) ++ xdlclose (moddep); ++ } ++ ++ for (int n = 1; n <= nmods; n++) ++ if (mods[n] != 0) ++ unload_mod (n); ++ } ++} ++ ++static int ++do_test (void) ++{ ++ do_test_no_depedency (); ++ do_test_dependency (); ++ do_test_invalid_dependency (true); ++ do_test_invalid_dependency (false); ++ + return 0; + } + +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 6cbbaa808a596f77..0138353ccb41c5f1 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1111,8 +1111,8 @@ extern ElfW(Addr) _dl_sysdep_start (void **start_argptr, + extern void _dl_sysdep_start_cleanup (void) attribute_hidden; + + +-/* Determine next available module ID. */ +-extern size_t _dl_next_tls_modid (void) attribute_hidden; ++/* Determine next available module ID and set the L l_tls_modid. */ ++extern void _dl_assign_tls_modid (struct link_map *l) attribute_hidden; + + /* Count the modules with TLS segments. */ + extern size_t _dl_count_modids (void) attribute_hidden; diff --git a/SOURCES/glibc-rh1991001-18.patch b/SOURCES/glibc-rh1991001-18.patch new file mode 100644 index 0000000..340b345 --- /dev/null +++ b/SOURCES/glibc-rh1991001-18.patch @@ -0,0 +1,42 @@ +commit 881b68e45c3a518319dcf5a3c4a2b3ec59e1c1e5 +Author: Adhemerval Zanella +Date: Fri Jul 16 08:32:05 2021 -0300 + + elf: Fix a wrong array access on tst-tls20 + + Check on x86_64-linux-gnu with --enable-stack-protector=all. + +diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c +index d8d04fe574597f35..831c3336c914790d 100644 +--- a/elf/tst-tls20.c ++++ b/elf/tst-tls20.c +@@ -226,12 +226,12 @@ do_test_dependency (void) + int mods[nmods]; + /* We use '0' as indication for a gap, to avoid the dlclose on iteration + cleanup. */ +- for (int n = 1; n <= nmods; n++) ++ for (int n = 1; n < nmods; n++) + { + load_mod (n); + mods[n] = n; + } +- for (int n = 1; n <= nmods; n++) ++ for (int n = 1; n < nmods; n++) + { + if (!is_mod_set (g, n)) + { +@@ -304,12 +304,12 @@ do_test_invalid_dependency (bool bind_now) + int mods[nmods]; + /* We use '0' as indication for a gap, to avoid the dlclose on iteration + cleanup. */ +- for (int n = 1; n <= nmods; n++) ++ for (int n = 1; n < nmods; n++) + { + load_mod (n); + mods[n] = n; + } +- for (int n = 1; n <= nmods; n++) ++ for (int n = 1; n < nmods; n++) + { + if (!is_mod_set (g, n)) + { diff --git a/SOURCES/glibc-rh1991001-19.patch b/SOURCES/glibc-rh1991001-19.patch new file mode 100644 index 0000000..48995ad --- /dev/null +++ b/SOURCES/glibc-rh1991001-19.patch @@ -0,0 +1,468 @@ +commit 83b5323261bb72313bffcf37476c1b8f0847c736 +Author: Szabolcs Nagy +Date: Wed Sep 15 15:16:19 2021 +0100 + + elf: Avoid deadlock between pthread_create and ctors [BZ #28357] + + The fix for bug 19329 caused a regression such that pthread_create can + deadlock when concurrent ctors from dlopen are waiting for it to finish. + Use a new GL(dl_load_tls_lock) in pthread_create that is not taken + around ctors in dlopen. + + The new lock is also used in __tls_get_addr instead of GL(dl_load_lock). + + The new lock is held in _dl_open_worker and _dl_close_worker around + most of the logic before/after the init/fini routines. When init/fini + routines are running then TLS is in a consistent, usable state. + In _dl_open_worker the new lock requires catching and reraising dlopen + failures that happen in the critical section. + + The new lock is reinitialized in a fork child, to keep the existing + behaviour and it is kept recursive in case malloc interposition or TLS + access from signal handlers can retake it. It is not obvious if this + is necessary or helps, but avoids changing the preexisting behaviour. + + The new lock may be more appropriate for dl_iterate_phdr too than + GL(dl_load_write_lock), since TLS state of an incompletely loaded + module may be accessed. If the new lock can replace the old one, + that can be a separate change. + + Fixes bug 28357. + + Reviewed-by: Adhemerval Zanella + +Conflicts: + posix/fork.c + (reworked due to file rename upstream and libpthread integration) + sysdeps/pthread/Makefile + (htl testing support was missing downstream, reconstituted here; + added $(libdl) required downstream) + +diff --git a/elf/dl-close.c b/elf/dl-close.c +index 18227fe992029364..7fe91bdd9aaf694e 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -549,6 +549,9 @@ _dl_close_worker (struct link_map *map, bool force) + size_t tls_free_end; + tls_free_start = tls_free_end = NO_TLS_OFFSET; + ++ /* Protects global and module specitic TLS state. */ ++ __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); ++ + /* We modify the list of loaded objects. */ + __rtld_lock_lock_recursive (GL(dl_load_write_lock)); + +@@ -784,6 +787,9 @@ _dl_close_worker (struct link_map *map, bool force) + GL(dl_tls_static_used) = tls_free_start; + } + ++ /* TLS is cleaned up for the unloaded modules. */ ++ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); ++ + #ifdef SHARED + /* Auditing checkpoint: we have deleted all objects. */ + if (__glibc_unlikely (do_audit)) +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 54727402750f4c0c..736df62ce6e46d34 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -65,6 +65,9 @@ struct dl_open_args + libc_map value in the namespace in case of a dlopen failure. */ + bool libc_already_loaded; + ++ /* Set to true if the end of dl_open_worker_begin was reached. */ ++ bool worker_continue; ++ + /* Original parameters to the program and the current environment. */ + int argc; + char **argv; +@@ -481,7 +484,7 @@ call_dl_init (void *closure) + } + + static void +-dl_open_worker (void *a) ++dl_open_worker_begin (void *a) + { + struct dl_open_args *args = a; + const char *file = args->file; +@@ -772,6 +775,36 @@ dl_open_worker (void *a) + DL_STATIC_INIT (new); + #endif + ++ args->worker_continue = true; ++} ++ ++static void ++dl_open_worker (void *a) ++{ ++ struct dl_open_args *args = a; ++ ++ args->worker_continue = false; ++ ++ { ++ /* Protects global and module specific TLS state. */ ++ __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); ++ ++ struct dl_exception ex; ++ int err = _dl_catch_exception (&ex, dl_open_worker_begin, args); ++ ++ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); ++ ++ if (__glibc_unlikely (ex.errstring != NULL)) ++ /* Reraise the error. */ ++ _dl_signal_exception (err, &ex, NULL); ++ } ++ ++ if (!args->worker_continue) ++ return; ++ ++ int mode = args->mode; ++ struct link_map *new = args->map; ++ + /* Run the initializer functions of new objects. Temporarily + disable the exception handler, so that lazy binding failures are + fatal. */ +diff --git a/elf/dl-support.c b/elf/dl-support.c +index 34be8e5babfb6af3..3e5531138eaa18f8 100644 +--- a/elf/dl-support.c ++++ b/elf/dl-support.c +@@ -212,6 +212,13 @@ __rtld_lock_define_initialized_recursive (, _dl_load_lock) + list of loaded objects while an object is added to or removed from + that list. */ + __rtld_lock_define_initialized_recursive (, _dl_load_write_lock) ++ /* This lock protects global and module specific TLS related data. ++ E.g. it is held in dlopen and dlclose when GL(dl_tls_generation), ++ GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are ++ accessed and when TLS related relocations are processed for a ++ module. It was introduced to keep pthread_create accessing TLS ++ state that is being set up. */ ++__rtld_lock_define_initialized_recursive (, _dl_load_tls_lock) + + + #ifdef HAVE_AUX_VECTOR +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index 8c0f9e972d7a0eac..7865fc390c3f3f0a 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -527,7 +527,7 @@ _dl_allocate_tls_init (void *result) + size_t maxgen = 0; + + /* Protects global dynamic TLS related state. */ +- __rtld_lock_lock_recursive (GL(dl_load_lock)); ++ __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); + + /* Check if the current dtv is big enough. */ + if (dtv[-1].counter < GL(dl_tls_max_dtv_idx)) +@@ -601,7 +601,7 @@ _dl_allocate_tls_init (void *result) + listp = listp->next; + assert (listp != NULL); + } +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); ++ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + + /* The DTV version is up-to-date now. */ + dtv[0].counter = maxgen; +@@ -740,7 +740,7 @@ _dl_update_slotinfo (unsigned long int req_modid) + + Here the dtv needs to be updated to new_gen generation count. + +- This code may be called during TLS access when GL(dl_load_lock) ++ This code may be called during TLS access when GL(dl_load_tls_lock) + is not held. In that case the user code has to synchronize with + dlopen and dlclose calls of relevant modules. A module m is + relevant if the generation of m <= new_gen and dlclose of m is +@@ -862,11 +862,11 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map) + if (__glibc_unlikely (the_map->l_tls_offset + != FORCED_DYNAMIC_TLS_OFFSET)) + { +- __rtld_lock_lock_recursive (GL(dl_load_lock)); ++ __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); + if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET)) + { + the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET; +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); ++ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + } + else if (__glibc_likely (the_map->l_tls_offset + != FORCED_DYNAMIC_TLS_OFFSET)) +@@ -878,7 +878,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map) + #else + # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" + #endif +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); ++ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + + dtv[GET_ADDR_MODULE].pointer.to_free = NULL; + dtv[GET_ADDR_MODULE].pointer.val = p; +@@ -886,7 +886,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map) + return (char *) p + GET_ADDR_OFFSET; + } + else +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); ++ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + } + struct dtv_pointer result = allocate_and_init (the_map); + dtv[GET_ADDR_MODULE].pointer = result; +@@ -957,7 +957,7 @@ _dl_tls_get_addr_soft (struct link_map *l) + return NULL; + + dtv_t *dtv = THREAD_DTV (); +- /* This may be called without holding the GL(dl_load_lock). Reading ++ /* This may be called without holding the GL(dl_load_tls_lock). Reading + arbitrary gen value is fine since this is best effort code. */ + size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); + if (__glibc_unlikely (dtv[0].counter != gen)) +diff --git a/elf/rtld.c b/elf/rtld.c +index 118c454a2329573f..9e09896da078274d 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -317,6 +317,7 @@ struct rtld_global _rtld_global = + #ifdef _LIBC_REENTRANT + ._dl_load_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER, + ._dl_load_write_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER, ++ ._dl_load_tls_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER, + #endif + ._dl_nns = 1, + ._dl_ns = +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 0138353ccb41c5f1..7b0a667629ddc06a 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -373,6 +373,13 @@ struct rtld_global + list of loaded objects while an object is added to or removed + from that list. */ + __rtld_lock_define_recursive (EXTERN, _dl_load_write_lock) ++ /* This lock protects global and module specific TLS related data. ++ E.g. it is held in dlopen and dlclose when GL(dl_tls_generation), ++ GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are ++ accessed and when TLS related relocations are processed for a ++ module. It was introduced to keep pthread_create accessing TLS ++ state that is being set up. */ ++ __rtld_lock_define_recursive (EXTERN, _dl_load_tls_lock) + + /* Incremented whenever something may have been added to dl_loaded. */ + EXTERN unsigned long long _dl_load_adds; +@@ -1192,7 +1199,7 @@ extern int _dl_scope_free (void *) attribute_hidden; + + /* Add module to slot information data. If DO_ADD is false, only the + required memory is allocated. Must be called with GL +- (dl_load_lock) acquired. If the function has already been called ++ (dl_load_tls_lock) acquired. If the function has already been called + for the link map L with !do_add, then this function will not raise + an exception, otherwise it is possible that it encounters a memory + allocation failure. */ +diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c +index 37db30f3d1e846b6..b4d20fa652f4ba3b 100644 +--- a/sysdeps/nptl/fork.c ++++ b/sysdeps/nptl/fork.c +@@ -125,6 +125,9 @@ __libc_fork (void) + /* Reset the lock the dynamic loader uses to protect its data. */ + __rtld_lock_initialize (GL(dl_load_lock)); + ++ /* Reset the lock protecting dynamic TLS related data. */ ++ __rtld_lock_initialize (GL(dl_load_tls_lock)); ++ + /* Run the handlers registered for the child. */ + __run_fork_handlers (atfork_run_child, multiple_threads); + } +diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile +index ea4f8894891b2636..98a92f8d6bb119ba 100644 +--- a/sysdeps/pthread/Makefile ++++ b/sysdeps/pthread/Makefile +@@ -25,3 +25,24 @@ $(objpfx)tst-timer: $(objpfx)librt.a $(static-thread-library) + endif + + endif ++ ++ifneq (,$(filter $(subdir),htl nptl)) ++ifeq ($(build-shared),yes) ++tests += tst-create1 ++endif ++ ++tst-create1mod.so-no-z-defs = yes ++ ++ifeq ($(build-shared),yes) ++# Build all the modules even when not actually running test programs. ++tests: $(test-modules) ++endif ++ ++modules-names += tst-create1mod ++test-modules = $(addprefix $(objpfx),$(addsuffix .so,$(modules-names))) ++ ++LDFLAGS-tst-create1 = -Wl,-export-dynamic ++$(objpfx)tst-create1: $(libdl) $(shared-thread-library) ++$(objpfx)tst-create1.out: $(objpfx)tst-create1mod.so ++ ++endif +diff --git a/sysdeps/pthread/tst-create1.c b/sysdeps/pthread/tst-create1.c +new file mode 100644 +index 0000000000000000..932586c30990d1d4 +--- /dev/null ++++ b/sysdeps/pthread/tst-create1.c +@@ -0,0 +1,119 @@ ++/* Verify that pthread_create does not deadlock when ctors take locks. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++/* ++Check if ctor and pthread_create deadlocks in ++ ++thread 1: dlopen -> ctor -> lock(user_lock) ++thread 2: lock(user_lock) -> pthread_create ++ ++or in ++ ++thread 1: dlclose -> dtor -> lock(user_lock) ++thread 2: lock(user_lock) -> pthread_create ++*/ ++ ++static pthread_barrier_t bar_ctor; ++static pthread_barrier_t bar_dtor; ++static pthread_mutex_t user_lock = PTHREAD_MUTEX_INITIALIZER; ++ ++void ++ctor (void) ++{ ++ xpthread_barrier_wait (&bar_ctor); ++ dprintf (1, "thread 1: in ctor: started.\n"); ++ xpthread_mutex_lock (&user_lock); ++ dprintf (1, "thread 1: in ctor: locked user_lock.\n"); ++ xpthread_mutex_unlock (&user_lock); ++ dprintf (1, "thread 1: in ctor: unlocked user_lock.\n"); ++ dprintf (1, "thread 1: in ctor: done.\n"); ++} ++ ++void ++dtor (void) ++{ ++ xpthread_barrier_wait (&bar_dtor); ++ dprintf (1, "thread 1: in dtor: started.\n"); ++ xpthread_mutex_lock (&user_lock); ++ dprintf (1, "thread 1: in dtor: locked user_lock.\n"); ++ xpthread_mutex_unlock (&user_lock); ++ dprintf (1, "thread 1: in dtor: unlocked user_lock.\n"); ++ dprintf (1, "thread 1: in dtor: done.\n"); ++} ++ ++static void * ++thread3 (void *a) ++{ ++ dprintf (1, "thread 3: started.\n"); ++ dprintf (1, "thread 3: done.\n"); ++ return 0; ++} ++ ++static void * ++thread2 (void *a) ++{ ++ pthread_t t3; ++ dprintf (1, "thread 2: started.\n"); ++ ++ xpthread_mutex_lock (&user_lock); ++ dprintf (1, "thread 2: locked user_lock.\n"); ++ xpthread_barrier_wait (&bar_ctor); ++ t3 = xpthread_create (0, thread3, 0); ++ xpthread_mutex_unlock (&user_lock); ++ dprintf (1, "thread 2: unlocked user_lock.\n"); ++ xpthread_join (t3); ++ ++ xpthread_mutex_lock (&user_lock); ++ dprintf (1, "thread 2: locked user_lock.\n"); ++ xpthread_barrier_wait (&bar_dtor); ++ t3 = xpthread_create (0, thread3, 0); ++ xpthread_mutex_unlock (&user_lock); ++ dprintf (1, "thread 2: unlocked user_lock.\n"); ++ xpthread_join (t3); ++ ++ dprintf (1, "thread 2: done.\n"); ++ return 0; ++} ++ ++static void ++thread1 (void) ++{ ++ dprintf (1, "thread 1: started.\n"); ++ xpthread_barrier_init (&bar_ctor, NULL, 2); ++ xpthread_barrier_init (&bar_dtor, NULL, 2); ++ pthread_t t2 = xpthread_create (0, thread2, 0); ++ void *p = xdlopen ("tst-create1mod.so", RTLD_NOW | RTLD_GLOBAL); ++ dprintf (1, "thread 1: dlopen done.\n"); ++ xdlclose (p); ++ dprintf (1, "thread 1: dlclose done.\n"); ++ xpthread_join (t2); ++ dprintf (1, "thread 1: done.\n"); ++} ++ ++static int ++do_test (void) ++{ ++ thread1 (); ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/pthread/tst-create1mod.c b/sysdeps/pthread/tst-create1mod.c +new file mode 100644 +index 0000000000000000..62c9006961683177 +--- /dev/null ++++ b/sysdeps/pthread/tst-create1mod.c +@@ -0,0 +1,41 @@ ++/* Verify that pthread_create does not deadlock when ctors take locks. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++/* Require TLS setup for the module. */ ++__thread int tlsvar; ++ ++void ctor (void); ++void dtor (void); ++ ++static void __attribute__ ((constructor)) ++do_init (void) ++{ ++ dprintf (1, "constructor started: %d.\n", tlsvar++); ++ ctor (); ++ dprintf (1, "constructor done: %d.\n", tlsvar++); ++} ++ ++static void __attribute__ ((destructor)) ++do_end (void) ++{ ++ dprintf (1, "destructor started: %d.\n", tlsvar++); ++ dtor (); ++ dprintf (1, "destructor done: %d.\n", tlsvar++); ++} diff --git a/SOURCES/glibc-rh1991001-2.patch b/SOURCES/glibc-rh1991001-2.patch new file mode 100644 index 0000000..468fc03 --- /dev/null +++ b/SOURCES/glibc-rh1991001-2.patch @@ -0,0 +1,28 @@ +commit d2b997c7172e9a00895a9deb379f8782fbd2e36f +Author: Szabolcs Nagy +Date: Wed Dec 30 23:40:14 2020 +0000 + + elf: Fix a DTV setup issue [BZ #27136] + + The max modid is a valid index in the dtv, it should not be skipped. + + The bug is observable if the last module has modid == 64 and its + generation is same or less than the max generation of the previous + modules. Then dtv[0].counter implies dtv[64] is initialized but + it isn't. Fixes bug 27136. + + Reviewed-by: Adhemerval Zanella + +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index cccf74b33481b866..0b96b1dceed99d58 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -590,7 +590,7 @@ _dl_allocate_tls_init (void *result) + } + + total += cnt; +- if (total >= GL(dl_tls_max_dtv_idx)) ++ if (total > GL(dl_tls_max_dtv_idx)) + break; + + listp = listp->next; diff --git a/SOURCES/glibc-rh1991001-20.patch b/SOURCES/glibc-rh1991001-20.patch new file mode 100644 index 0000000..d8aedcc --- /dev/null +++ b/SOURCES/glibc-rh1991001-20.patch @@ -0,0 +1,20 @@ +commit 3c7c5117826816021f9d3f352f49e0dd0236cbad +Author: Florian Weimer +Date: Tue Nov 30 14:35:54 2021 +0100 + + elf: Include in tst-tls20.c + + The test uses standard integer types. + +diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c +index 831c3336c914790d..18067e6b0a6093f9 100644 +--- a/elf/tst-tls20.c ++++ b/elf/tst-tls20.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + #include + #include diff --git a/SOURCES/glibc-rh1991001-21.patch b/SOURCES/glibc-rh1991001-21.patch new file mode 100644 index 0000000..c4e1316 --- /dev/null +++ b/SOURCES/glibc-rh1991001-21.patch @@ -0,0 +1,20 @@ +commit df4cb2280e32187380520f71bd27ab32252cbc85 +Author: Florian Weimer +Date: Tue Nov 30 15:39:17 2021 +0100 + + elf: Include in tst-tls20.c + + The test uses the bool type. + +diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c +index 18067e6b0a6093f9..200dacb748af21a8 100644 +--- a/elf/tst-tls20.c ++++ b/elf/tst-tls20.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + #include + #include diff --git a/SOURCES/glibc-rh1991001-22.patch b/SOURCES/glibc-rh1991001-22.patch new file mode 100644 index 0000000..490e909 --- /dev/null +++ b/SOURCES/glibc-rh1991001-22.patch @@ -0,0 +1,62 @@ +commit 5cc338565479a620244c2f8ff35956629c4dbf81 +Author: Florian Weimer +Date: Fri Dec 10 05:14:24 2021 +0100 + + nptl: Add one more barrier to nptl/tst-create1 + + Without the bar_ctor_finish barrier, it was possible that thread2 + re-locked user_lock before ctor had a chance to lock it. ctor then + blocked in its locking operation, xdlopen from the main thread + did not return, and thread2 was stuck waiting in bar_dtor: + + thread 1: started. + thread 2: started. + thread 2: locked user_lock. + constructor started: 0. + thread 1: in ctor: started. + thread 3: started. + thread 3: done. + thread 2: unlocked user_lock. + thread 2: locked user_lock. + + Fixes the test in commit 83b5323261bb72313bffcf37476c1b8f0847c736 + ("elf: Avoid deadlock between pthread_create and ctors [BZ #28357]"). + + Reviewed-by: Szabolcs Nagy + +diff --git a/sysdeps/pthread/tst-create1.c b/sysdeps/pthread/tst-create1.c +index 932586c30990d1d4..763ded8d7956f943 100644 +--- a/sysdeps/pthread/tst-create1.c ++++ b/sysdeps/pthread/tst-create1.c +@@ -33,6 +33,7 @@ thread 2: lock(user_lock) -> pthread_create + */ + + static pthread_barrier_t bar_ctor; ++static pthread_barrier_t bar_ctor_finish; + static pthread_barrier_t bar_dtor; + static pthread_mutex_t user_lock = PTHREAD_MUTEX_INITIALIZER; + +@@ -46,6 +47,7 @@ ctor (void) + xpthread_mutex_unlock (&user_lock); + dprintf (1, "thread 1: in ctor: unlocked user_lock.\n"); + dprintf (1, "thread 1: in ctor: done.\n"); ++ xpthread_barrier_wait (&bar_ctor_finish); + } + + void +@@ -81,6 +83,7 @@ thread2 (void *a) + xpthread_mutex_unlock (&user_lock); + dprintf (1, "thread 2: unlocked user_lock.\n"); + xpthread_join (t3); ++ xpthread_barrier_wait (&bar_ctor_finish); + + xpthread_mutex_lock (&user_lock); + dprintf (1, "thread 2: locked user_lock.\n"); +@@ -99,6 +102,7 @@ thread1 (void) + { + dprintf (1, "thread 1: started.\n"); + xpthread_barrier_init (&bar_ctor, NULL, 2); ++ xpthread_barrier_init (&bar_ctor_finish, NULL, 2); + xpthread_barrier_init (&bar_dtor, NULL, 2); + pthread_t t2 = xpthread_create (0, thread2, 0); + void *p = xdlopen ("tst-create1mod.so", RTLD_NOW | RTLD_GLOBAL); diff --git a/SOURCES/glibc-rh1991001-3.patch b/SOURCES/glibc-rh1991001-3.patch new file mode 100644 index 0000000..85a9cd7 --- /dev/null +++ b/SOURCES/glibc-rh1991001-3.patch @@ -0,0 +1,163 @@ +commit 8f85075a2e9c26ff7486d4bbaf358999807d215c +Author: Szabolcs Nagy +Date: Thu Dec 31 12:24:38 2020 +0000 + + elf: Add a DTV setup test [BZ #27136] + + The test dlopens a large number of modules with TLS, they are reused + from an existing test. + + The test relies on the reuse of slotinfo entries after dlclose, without + bug 27135 fixed this needs a failing dlopen. With a slotinfo list that + has non-monotone increasing generation counters, bug 27136 can trigger. + + Reviewed-by: Adhemerval Zanella + +Conflicts: + elf/Makefile + (usual test differences) + +diff --git a/elf/Makefile b/elf/Makefile +index 82fb019a634caf81..0995d810b57d0dda 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -209,7 +209,8 @@ tests += restest1 preloadtest loadfail multiload origtest resolvfail \ + tst-audit14 tst-audit15 tst-audit16 \ + tst-tls-ie tst-tls-ie-dlmopen \ + argv0test \ +- tst-glibc-hwcaps tst-glibc-hwcaps-prepend tst-glibc-hwcaps-mask ++ tst-glibc-hwcaps tst-glibc-hwcaps-prepend tst-glibc-hwcaps-mask \ ++ tst-tls20 + # reldep9 + tests-internal += loadtest unload unload2 circleload1 \ + neededtest neededtest2 neededtest3 neededtest4 \ +@@ -332,6 +333,7 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \ + libmarkermod2-1 libmarkermod2-2 \ + libmarkermod3-1 libmarkermod3-2 libmarkermod3-3 \ + libmarkermod4-1 libmarkermod4-2 libmarkermod4-3 libmarkermod4-4 \ ++ tst-tls20mod-bad + + # Most modules build with _ISOMAC defined, but those filtered out + # depend on internal headers. +@@ -1828,3 +1830,9 @@ $(objpfx)tst-rtld-help.out: $(objpfx)ld.so + fi; \ + (exit $$status); \ + $(evaluate-test) ++ ++# Reuses tst-tls-many-dynamic-modules ++tst-tls20mod-bad.so-no-z-defs = yes ++$(objpfx)tst-tls20: $(libdl) $(shared-thread-library) ++$(objpfx)tst-tls20.out: $(objpfx)tst-tls20mod-bad.so \ ++ $(tst-tls-many-dynamic-modules:%=$(objpfx)%.so) +diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c +new file mode 100644 +index 0000000000000000..ac5f8c8d39b66dd6 +--- /dev/null ++++ b/elf/tst-tls20.c +@@ -0,0 +1,98 @@ ++/* Test dtv setup if entries don't have monotone increasing generation. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define NMOD 100 ++static void *mod[NMOD]; ++ ++static void ++load_fail (void) ++{ ++ /* Expected to fail because of a missing symbol. */ ++ void *m = dlopen ("tst-tls20mod-bad.so", RTLD_NOW); ++ if (m != NULL) ++ FAIL_EXIT1 ("dlopen of tst-tls20mod-bad.so succeeded\n"); ++} ++ ++static void ++load_mod (int i) ++{ ++ char *buf = xasprintf ("tst-tls-manydynamic%02dmod.so", i); ++ mod[i] = xdlopen (buf, RTLD_LAZY); ++ free (buf); ++} ++ ++static void ++unload_mod (int i) ++{ ++ if (mod[i] != NULL) ++ xdlclose (mod[i]); ++ mod[i] = NULL; ++} ++ ++static void ++access (int i) ++{ ++ char *buf = xasprintf ("tls_global_%02d", i); ++ dlerror (); ++ int *p = dlsym (mod[i], buf); ++ printf ("mod[%d]: &tls = %p\n", i, p); ++ if (p == NULL) ++ FAIL_EXIT1 ("dlsym failed: %s\n", dlerror ()); ++ ++*p; ++ free (buf); ++} ++ ++static void * ++start (void *a) ++{ ++ for (int i = 0; i < NMOD; i++) ++ if (mod[i] != NULL) ++ access (i); ++ return 0; ++} ++ ++static int ++do_test (void) ++{ ++ int i; ++ ++ for (i = 0; i < NMOD; i++) ++ { ++ load_mod (i); ++ /* Bump the generation of mod[0] without using new dtv slot. */ ++ unload_mod (0); ++ load_fail (); /* Ensure GL(dl_tls_dtv_gaps) is true: see bug 27135. */ ++ load_mod (0); ++ /* Access TLS in all loaded modules. */ ++ pthread_t t = xpthread_create (0, start, 0); ++ xpthread_join (t); ++ } ++ for (i = 0; i < NMOD; i++) ++ unload_mod (i); ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-tls20mod-bad.c b/elf/tst-tls20mod-bad.c +new file mode 100644 +index 0000000000000000..c1aed8ea7deffd22 +--- /dev/null ++++ b/elf/tst-tls20mod-bad.c +@@ -0,0 +1,2 @@ ++void missing_symbol (void); ++void f (void) {missing_symbol ();} diff --git a/SOURCES/glibc-rh1991001-4.patch b/SOURCES/glibc-rh1991001-4.patch new file mode 100644 index 0000000..c8650bb --- /dev/null +++ b/SOURCES/glibc-rh1991001-4.patch @@ -0,0 +1,41 @@ +commit c489c35054c39d7f2437ca61b369e3ede448f022 +Author: Szabolcs Nagy +Date: Wed Nov 30 11:44:25 2016 +0000 + + elf: Fix comments and logic in _dl_add_to_slotinfo + + Since + + commit a509eb117fac1d764b15eba64993f4bdb63d7f3c + Avoid late dlopen failure due to scope, TLS slotinfo updates [BZ #25112] + + the generation counter update is not needed in the failure path. + That commit ensures allocation in _dl_add_to_slotinfo happens before + the demarcation point in dlopen (it is called twice, first time is for + allocation only where dlopen can still be reverted on failure, then + second time actual dtv updates are done which then cannot fail). + + Reviewed-by: Adhemerval Zanella + +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index 0b96b1dceed99d58..9375650a3ab5247d 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -998,16 +998,7 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add) + + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); + if (listp == NULL) + { +- /* We ran out of memory. We will simply fail this +- call but don't undo anything we did so far. The +- application will crash or be terminated anyway very +- soon. */ +- +- /* We have to do this since some entries in the dtv +- slotinfo array might already point to this +- generation. */ +- ++GL(dl_tls_generation); +- ++ /* We ran out of memory while resizing the dtv slotinfo list. */ + _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\ + cannot create TLS data structures")); + } diff --git a/SOURCES/glibc-rh1991001-5.patch b/SOURCES/glibc-rh1991001-5.patch new file mode 100644 index 0000000..735e348 --- /dev/null +++ b/SOURCES/glibc-rh1991001-5.patch @@ -0,0 +1,58 @@ +commit c0669ae1a629e16b536bf11cdd0865e0dbcf4bee +Author: Szabolcs Nagy +Date: Wed Dec 30 21:52:38 2020 +0000 + + elf: Refactor _dl_update_slotinfo to avoid use after free + + map is not valid to access here because it can be freed by a concurrent + dlclose: during tls access (via __tls_get_addr) _dl_update_slotinfo is + called without holding dlopen locks. So don't check the modid of map. + + The map == 0 and map != 0 code paths can be shared (avoiding the dtv + resize in case of map == 0 is just an optimization: larger dtv than + necessary would be fine too). + + Reviewed-by: Adhemerval Zanella + +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index 9375650a3ab5247d..15ed01d795a8627a 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -743,6 +743,8 @@ _dl_update_slotinfo (unsigned long int req_modid) + { + for (size_t cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt) + { ++ size_t modid = total + cnt; ++ + size_t gen = listp->slotinfo[cnt].gen; + + if (gen > new_gen) +@@ -758,25 +760,12 @@ _dl_update_slotinfo (unsigned long int req_modid) + + /* If there is no map this means the entry is empty. */ + struct link_map *map = listp->slotinfo[cnt].map; +- if (map == NULL) +- { +- if (dtv[-1].counter >= total + cnt) +- { +- /* If this modid was used at some point the memory +- might still be allocated. */ +- free (dtv[total + cnt].pointer.to_free); +- dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED; +- dtv[total + cnt].pointer.to_free = NULL; +- } +- +- continue; +- } +- + /* Check whether the current dtv array is large enough. */ +- size_t modid = map->l_tls_modid; +- assert (total + cnt == modid); + if (dtv[-1].counter < modid) + { ++ if (map == NULL) ++ continue; ++ + /* Resize the dtv. */ + dtv = _dl_resize_dtv (dtv); + diff --git a/SOURCES/glibc-rh1991001-6.patch b/SOURCES/glibc-rh1991001-6.patch new file mode 100644 index 0000000..03505b0 --- /dev/null +++ b/SOURCES/glibc-rh1991001-6.patch @@ -0,0 +1,48 @@ +commit 8f7e09f4dbdb5c815a18b8285fbc5d5d7bc17d86 +Author: Szabolcs Nagy +Date: Thu Feb 11 11:29:23 2021 +0000 + + x86_64: Avoid lazy relocation of tlsdesc [BZ #27137] + + Lazy tlsdesc relocation is racy because the static tls optimization and + tlsdesc management operations are done without holding the dlopen lock. + + This similar to the commit b7cf203b5c17dd6d9878537d41e0c7cc3d270a67 + for aarch64, but it fixes a different race: bug 27137. + + Another issue is that ld auditing ignores DT_BIND_NOW and thus tries to + relocate tlsdesc lazily, but that does not work in a BIND_NOW module + due to missing DT_TLSDESC_PLT. Unconditionally relocating tlsdesc at + load time fixes this bug 27721 too. + +diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h +index e308b662d245cc63..ef5740ba281c7282 100644 +--- a/sysdeps/x86_64/dl-machine.h ++++ b/sysdeps/x86_64/dl-machine.h +@@ -563,12 +563,21 @@ elf_machine_lazy_rel (struct link_map *map, + } + else if (__glibc_likely (r_type == R_X86_64_TLSDESC)) + { +- struct tlsdesc volatile * __attribute__((__unused__)) td = +- (struct tlsdesc volatile *)reloc_addr; ++ const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); ++ const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); ++ const ElfW (Sym) *sym = &symtab[symndx]; ++ const struct r_found_version *version = NULL; + +- td->arg = (void*)reloc; +- td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)]) +- + map->l_addr); ++ if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) ++ { ++ const ElfW (Half) *vernum = ++ (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); ++ version = &map->l_versions[vernum[symndx] & 0x7fff]; ++ } ++ ++ /* Always initialize TLS descriptors completely at load time, in ++ case static TLS is allocated for it that requires locking. */ ++ elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc); + } + else if (__glibc_unlikely (r_type == R_X86_64_IRELATIVE)) + { diff --git a/SOURCES/glibc-rh1991001-7.patch b/SOURCES/glibc-rh1991001-7.patch new file mode 100644 index 0000000..0d7da7f --- /dev/null +++ b/SOURCES/glibc-rh1991001-7.patch @@ -0,0 +1,116 @@ +commit ddcacd91cc10ff92d6201eda87047d029c14158d +Author: Szabolcs Nagy +Date: Thu Feb 11 11:40:11 2021 +0000 + + i386: Avoid lazy relocation of tlsdesc [BZ #27137] + + Lazy tlsdesc relocation is racy because the static tls optimization and + tlsdesc management operations are done without holding the dlopen lock. + + This similar to the commit b7cf203b5c17dd6d9878537d41e0c7cc3d270a67 + for aarch64, but it fixes a different race: bug 27137. + + On i386 the code is a bit more complicated than on x86_64 because both + rel and rela relocs are supported. + +diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h +index e5776ef7bc8ad749..3a30671591284d79 100644 +--- a/sysdeps/i386/dl-machine.h ++++ b/sysdeps/i386/dl-machine.h +@@ -679,50 +679,32 @@ elf_machine_lazy_rel (struct link_map *map, + } + else if (__glibc_likely (r_type == R_386_TLS_DESC)) + { +- struct tlsdesc volatile * __attribute__((__unused__)) td = +- (struct tlsdesc volatile *)reloc_addr; +- +- /* Handle relocations that reference the local *ABS* in a simple +- way, so as to preserve a potential addend. */ +- if (ELF32_R_SYM (reloc->r_info) == 0) +- td->entry = _dl_tlsdesc_resolve_abs_plus_addend; +- /* Given a known-zero addend, we can store a pointer to the +- reloc in the arg position. */ +- else if (td->arg == 0) +- { +- td->arg = (void*)reloc; +- td->entry = _dl_tlsdesc_resolve_rel; +- } +- else +- { +- /* We could handle non-*ABS* relocations with non-zero addends +- by allocating dynamically an arg to hold a pointer to the +- reloc, but that sounds pointless. */ +- const Elf32_Rel *const r = reloc; +- /* The code below was borrowed from elf_dynamic_do_rel(). */ +- const ElfW(Sym) *const symtab = +- (const void *) D_PTR (map, l_info[DT_SYMTAB]); ++ const Elf32_Rel *const r = reloc; ++ /* The code below was borrowed from elf_dynamic_do_rel(). */ ++ const ElfW(Sym) *const symtab = ++ (const void *) D_PTR (map, l_info[DT_SYMTAB]); + ++ /* Always initialize TLS descriptors completely at load time, in ++ case static TLS is allocated for it that requires locking. */ + # ifdef RTLD_BOOTSTRAP +- /* The dynamic linker always uses versioning. */ +- assert (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL); ++ /* The dynamic linker always uses versioning. */ ++ assert (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL); + # else +- if (map->l_info[VERSYMIDX (DT_VERSYM)]) ++ if (map->l_info[VERSYMIDX (DT_VERSYM)]) + # endif +- { +- const ElfW(Half) *const version = +- (const void *) D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); +- ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; +- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], +- &map->l_versions[ndx], +- (void *) (l_addr + r->r_offset), skip_ifunc); +- } ++ { ++ const ElfW(Half) *const version = ++ (const void *) D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); ++ ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; ++ elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], ++ &map->l_versions[ndx], ++ (void *) (l_addr + r->r_offset), skip_ifunc); ++ } + # ifndef RTLD_BOOTSTRAP +- else +- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, +- (void *) (l_addr + r->r_offset), skip_ifunc); ++ else ++ elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, ++ (void *) (l_addr + r->r_offset), skip_ifunc); + # endif +- } + } + else if (__glibc_unlikely (r_type == R_386_IRELATIVE)) + { +@@ -749,11 +731,21 @@ elf_machine_lazy_rela (struct link_map *map, + ; + else if (__glibc_likely (r_type == R_386_TLS_DESC)) + { +- struct tlsdesc volatile * __attribute__((__unused__)) td = +- (struct tlsdesc volatile *)reloc_addr; ++ const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); ++ const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); ++ const ElfW (Sym) *sym = &symtab[symndx]; ++ const struct r_found_version *version = NULL; ++ ++ if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) ++ { ++ const ElfW (Half) *vernum = ++ (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); ++ version = &map->l_versions[vernum[symndx] & 0x7fff]; ++ } + +- td->arg = (void*)reloc; +- td->entry = _dl_tlsdesc_resolve_rela; ++ /* Always initialize TLS descriptors completely at load time, in ++ case static TLS is allocated for it that requires locking. */ ++ elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc); + } + else if (__glibc_unlikely (r_type == R_386_IRELATIVE)) + { diff --git a/SOURCES/glibc-rh1991001-8.patch b/SOURCES/glibc-rh1991001-8.patch new file mode 100644 index 0000000..3a1b147 --- /dev/null +++ b/SOURCES/glibc-rh1991001-8.patch @@ -0,0 +1,277 @@ +commit 55c9f3238080e9aba733bc0902779c46cfa16446 +Author: Szabolcs Nagy +Date: Thu Feb 11 11:52:24 2021 +0000 + + x86_64: Remove lazy tlsdesc relocation related code + + _dl_tlsdesc_resolve_rela and _dl_tlsdesc_resolve_hold are only used for + lazy tlsdesc relocation processing which is no longer supported. + +diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h +index ef5740ba281c7282..b94d3b39ec1dca64 100644 +--- a/sysdeps/x86_64/dl-machine.h ++++ b/sysdeps/x86_64/dl-machine.h +@@ -127,10 +127,6 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) + } + } + +- if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy) +- *(ElfW(Addr)*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_GOT)]) + l->l_addr) +- = (ElfW(Addr)) &_dl_tlsdesc_resolve_rela; +- + return lazy; + } + +diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S +index 80d771cd887dd626..77e78cf0a6d8babc 100644 +--- a/sysdeps/x86_64/dl-tlsdesc.S ++++ b/sysdeps/x86_64/dl-tlsdesc.S +@@ -148,107 +148,3 @@ _dl_tlsdesc_dynamic: + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic + #endif /* SHARED */ +- +- /* This function is a wrapper for a lazy resolver for TLS_DESC +- RELA relocations. The incoming 0(%rsp) points to the caller's +- link map, pushed by the dynamic object's internal lazy TLS +- resolver front-end before tail-calling us. We need to pop it +- ourselves. %rax points to a TLS descriptor, such that 0(%rax) +- holds the address of the internal resolver front-end (unless +- some other thread beat us to resolving it) and 8(%rax) holds a +- pointer to the relocation. +- +- When the actual resolver returns, it will have adjusted the +- TLS descriptor such that we can tail-call it for it to return +- the TP offset of the symbol. */ +- +- .hidden _dl_tlsdesc_resolve_rela +- .global _dl_tlsdesc_resolve_rela +- .type _dl_tlsdesc_resolve_rela,@function +- cfi_startproc +- .align 16 +- /* The PLT entry will have pushed the link_map pointer. */ +-_dl_tlsdesc_resolve_rela: +- _CET_ENDBR +- cfi_adjust_cfa_offset (8) +- /* Save all call-clobbered registers. Add 8 bytes for push in +- the PLT entry to align the stack. */ +- subq $80, %rsp +- cfi_adjust_cfa_offset (80) +- movq %rax, (%rsp) +- movq %rdi, 8(%rsp) +- movq %rax, %rdi /* Pass tlsdesc* in %rdi. */ +- movq %rsi, 16(%rsp) +- movq 80(%rsp), %rsi /* Pass link_map* in %rsi. */ +- movq %r8, 24(%rsp) +- movq %r9, 32(%rsp) +- movq %r10, 40(%rsp) +- movq %r11, 48(%rsp) +- movq %rdx, 56(%rsp) +- movq %rcx, 64(%rsp) +- call _dl_tlsdesc_resolve_rela_fixup +- movq (%rsp), %rax +- movq 8(%rsp), %rdi +- movq 16(%rsp), %rsi +- movq 24(%rsp), %r8 +- movq 32(%rsp), %r9 +- movq 40(%rsp), %r10 +- movq 48(%rsp), %r11 +- movq 56(%rsp), %rdx +- movq 64(%rsp), %rcx +- addq $88, %rsp +- cfi_adjust_cfa_offset (-88) +- jmp *(%rax) +- cfi_endproc +- .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela +- +- /* This function is a placeholder for lazy resolving of TLS +- relocations. Once some thread starts resolving a TLS +- relocation, it sets up the TLS descriptor to use this +- resolver, such that other threads that would attempt to +- resolve it concurrently may skip the call to the original lazy +- resolver and go straight to a condition wait. +- +- When the actual resolver returns, it will have adjusted the +- TLS descriptor such that we can tail-call it for it to return +- the TP offset of the symbol. */ +- +- .hidden _dl_tlsdesc_resolve_hold +- .global _dl_tlsdesc_resolve_hold +- .type _dl_tlsdesc_resolve_hold,@function +- cfi_startproc +- .align 16 +-_dl_tlsdesc_resolve_hold: +-0: +- _CET_ENDBR +- /* Save all call-clobbered registers. */ +- subq $72, %rsp +- cfi_adjust_cfa_offset (72) +- movq %rax, (%rsp) +- movq %rdi, 8(%rsp) +- movq %rax, %rdi /* Pass tlsdesc* in %rdi. */ +- movq %rsi, 16(%rsp) +- /* Pass _dl_tlsdesc_resolve_hold's address in %rsi. */ +- leaq . - _dl_tlsdesc_resolve_hold(%rip), %rsi +- movq %r8, 24(%rsp) +- movq %r9, 32(%rsp) +- movq %r10, 40(%rsp) +- movq %r11, 48(%rsp) +- movq %rdx, 56(%rsp) +- movq %rcx, 64(%rsp) +- call _dl_tlsdesc_resolve_hold_fixup +-1: +- movq (%rsp), %rax +- movq 8(%rsp), %rdi +- movq 16(%rsp), %rsi +- movq 24(%rsp), %r8 +- movq 32(%rsp), %r9 +- movq 40(%rsp), %r10 +- movq 48(%rsp), %r11 +- movq 56(%rsp), %rdx +- movq 64(%rsp), %rcx +- addq $72, %rsp +- cfi_adjust_cfa_offset (-72) +- jmp *(%rax) +- cfi_endproc +- .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold +diff --git a/sysdeps/x86_64/dl-tlsdesc.h b/sysdeps/x86_64/dl-tlsdesc.h +index 66e659bb5c7ede74..1cde1ee9664f4908 100644 +--- a/sysdeps/x86_64/dl-tlsdesc.h ++++ b/sysdeps/x86_64/dl-tlsdesc.h +@@ -55,9 +55,7 @@ struct tlsdesc_dynamic_arg + + extern ptrdiff_t attribute_hidden + _dl_tlsdesc_return(struct tlsdesc *on_rax), +- _dl_tlsdesc_undefweak(struct tlsdesc *on_rax), +- _dl_tlsdesc_resolve_rela(struct tlsdesc *on_rax), +- _dl_tlsdesc_resolve_hold(struct tlsdesc *on_rax); ++ _dl_tlsdesc_undefweak(struct tlsdesc *on_rax); + + # ifdef SHARED + extern void *_dl_make_tlsdesc_dynamic (struct link_map *map, +diff --git a/sysdeps/x86_64/tlsdesc.c b/sysdeps/x86_64/tlsdesc.c +index 302d097dbb0c4f1e..61a19ae26944c84f 100644 +--- a/sysdeps/x86_64/tlsdesc.c ++++ b/sysdeps/x86_64/tlsdesc.c +@@ -16,120 +16,13 @@ + License along with the GNU C Library; if not, see + . */ + +-#include + #include +-#include + #include + #include + #include ++#define _dl_tlsdesc_resolve_hold 0 + #include + +-/* The following 2 functions take a caller argument, that contains the +- address expected to be in the TLS descriptor. If it's changed, we +- want to return immediately. */ +- +-/* This function is used to lazily resolve TLS_DESC RELA relocations. +- The argument location is used to hold a pointer to the relocation. */ +- +-void +-attribute_hidden +-_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td, +- struct link_map *l) +-{ +- const ElfW(Rela) *reloc = td->arg; +- +- if (_dl_tlsdesc_resolve_early_return_p +- (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr))) +- return; +- +- /* The code below was borrowed from _dl_fixup(). */ +- const ElfW(Sym) *const symtab +- = (const void *) D_PTR (l, l_info[DT_SYMTAB]); +- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]); +- const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)]; +- lookup_t result; +- +- /* Look up the target symbol. If the normal lookup rules are not +- used don't look in the global scope. */ +- if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL +- && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0) +- { +- const struct r_found_version *version = NULL; +- +- if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL) +- { +- const ElfW(Half) *vernum = +- (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]); +- ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff; +- version = &l->l_versions[ndx]; +- if (version->hash == 0) +- version = NULL; +- } +- +- result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, +- l->l_scope, version, ELF_RTYPE_CLASS_PLT, +- DL_LOOKUP_ADD_DEPENDENCY, NULL); +- } +- else +- { +- /* We already found the symbol. The module (and therefore its load +- address) is also known. */ +- result = l; +- } +- +- if (! sym) +- { +- td->arg = (void*)reloc->r_addend; +- td->entry = _dl_tlsdesc_undefweak; +- } +- else +- { +-# ifndef SHARED +- CHECK_STATIC_TLS (l, result); +-# else +- if (!TRY_STATIC_TLS (l, result)) +- { +- td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value +- + reloc->r_addend); +- td->entry = _dl_tlsdesc_dynamic; +- } +- else +-# endif +- { +- td->arg = (void*)(sym->st_value - result->l_tls_offset +- + reloc->r_addend); +- td->entry = _dl_tlsdesc_return; +- } +- } +- +- _dl_tlsdesc_wake_up_held_fixups (); +-} +- +-/* This function is used to avoid busy waiting for other threads to +- complete the lazy relocation. Once another thread wins the race to +- relocate a TLS descriptor, it sets the descriptor up such that this +- function is called to wait until the resolver releases the +- lock. */ +- +-void +-attribute_hidden +-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td, +- void *caller) +-{ +- /* Maybe we're lucky and can return early. */ +- if (caller != td->entry) +- return; +- +- /* Locking here will stop execution until the running resolver runs +- _dl_tlsdesc_wake_up_held_fixups(), releasing the lock. +- +- FIXME: We'd be better off waiting on a condition variable, such +- that we didn't have to hold the lock throughout the relocation +- processing. */ +- __rtld_lock_lock_recursive (GL(dl_load_lock)); +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); +-} +- + /* Unmap the dynamic object, but also release its TLS descriptor table + if there is one. */ + diff --git a/SOURCES/glibc-rh1991001-9.patch b/SOURCES/glibc-rh1991001-9.patch new file mode 100644 index 0000000..116739a --- /dev/null +++ b/SOURCES/glibc-rh1991001-9.patch @@ -0,0 +1,443 @@ +commit a75a02a696f9f869d77b17b99964823aa8833a8b +Author: Szabolcs Nagy +Date: Thu Feb 11 11:58:20 2021 +0000 + + i386: Remove lazy tlsdesc relocation related code + + Like in commit e75711ebfa976d5468ec292282566a18b07e4d67 for x86_64, + remove unused lazy tlsdesc relocation processing code: + + _dl_tlsdesc_resolve_abs_plus_addend + _dl_tlsdesc_resolve_rel + _dl_tlsdesc_resolve_rela + _dl_tlsdesc_resolve_hold + +diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S +index 128f0af3188f46bb..22ecb2c6adc6cc6e 100644 +--- a/sysdeps/i386/dl-tlsdesc.S ++++ b/sysdeps/i386/dl-tlsdesc.S +@@ -138,159 +138,3 @@ _dl_tlsdesc_dynamic: + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic + #endif /* SHARED */ +- +- /* This function is a wrapper for a lazy resolver for TLS_DESC +- REL relocations that reference the *ABS* segment in their own +- link maps. %ebx points to the caller's GOT. %eax points to a +- TLS descriptor, such that 0(%eax) holds the address of the +- resolver wrapper itself (unless some other thread beat us to +- it) and 4(%eax) holds the addend in the relocation. +- +- When the actual resolver returns, it will have adjusted the +- TLS descriptor such that we can tail-call it for it to return +- the TP offset of the symbol. */ +- +- .hidden _dl_tlsdesc_resolve_abs_plus_addend +- .global _dl_tlsdesc_resolve_abs_plus_addend +- .type _dl_tlsdesc_resolve_abs_plus_addend,@function +- cfi_startproc +- .align 16 +-_dl_tlsdesc_resolve_abs_plus_addend: +-0: +- _CET_ENDBR +- pushl %eax +- cfi_adjust_cfa_offset (4) +- pushl %ecx +- cfi_adjust_cfa_offset (4) +- pushl %edx +- cfi_adjust_cfa_offset (4) +- movl $1f - 0b, %ecx +- movl 4(%ebx), %edx +- call _dl_tlsdesc_resolve_abs_plus_addend_fixup +-1: +- popl %edx +- cfi_adjust_cfa_offset (-4) +- popl %ecx +- cfi_adjust_cfa_offset (-4) +- popl %eax +- cfi_adjust_cfa_offset (-4) +- jmp *(%eax) +- cfi_endproc +- .size _dl_tlsdesc_resolve_abs_plus_addend, .-_dl_tlsdesc_resolve_abs_plus_addend +- +- /* This function is a wrapper for a lazy resolver for TLS_DESC +- REL relocations that had zero addends. %ebx points to the +- caller's GOT. %eax points to a TLS descriptor, such that +- 0(%eax) holds the address of the resolver wrapper itself +- (unless some other thread beat us to it) and 4(%eax) holds a +- pointer to the relocation. +- +- When the actual resolver returns, it will have adjusted the +- TLS descriptor such that we can tail-call it for it to return +- the TP offset of the symbol. */ +- +- .hidden _dl_tlsdesc_resolve_rel +- .global _dl_tlsdesc_resolve_rel +- .type _dl_tlsdesc_resolve_rel,@function +- cfi_startproc +- .align 16 +-_dl_tlsdesc_resolve_rel: +-0: +- _CET_ENDBR +- pushl %eax +- cfi_adjust_cfa_offset (4) +- pushl %ecx +- cfi_adjust_cfa_offset (4) +- pushl %edx +- cfi_adjust_cfa_offset (4) +- movl $1f - 0b, %ecx +- movl 4(%ebx), %edx +- call _dl_tlsdesc_resolve_rel_fixup +-1: +- popl %edx +- cfi_adjust_cfa_offset (-4) +- popl %ecx +- cfi_adjust_cfa_offset (-4) +- popl %eax +- cfi_adjust_cfa_offset (-4) +- jmp *(%eax) +- cfi_endproc +- .size _dl_tlsdesc_resolve_rel, .-_dl_tlsdesc_resolve_rel +- +- /* This function is a wrapper for a lazy resolver for TLS_DESC +- RELA relocations. %ebx points to the caller's GOT. %eax +- points to a TLS descriptor, such that 0(%eax) holds the +- address of the resolver wrapper itself (unless some other +- thread beat us to it) and 4(%eax) holds a pointer to the +- relocation. +- +- When the actual resolver returns, it will have adjusted the +- TLS descriptor such that we can tail-call it for it to return +- the TP offset of the symbol. */ +- +- .hidden _dl_tlsdesc_resolve_rela +- .global _dl_tlsdesc_resolve_rela +- .type _dl_tlsdesc_resolve_rela,@function +- cfi_startproc +- .align 16 +-_dl_tlsdesc_resolve_rela: +-0: +- _CET_ENDBR +- pushl %eax +- cfi_adjust_cfa_offset (4) +- pushl %ecx +- cfi_adjust_cfa_offset (4) +- pushl %edx +- cfi_adjust_cfa_offset (4) +- movl $1f - 0b, %ecx +- movl 4(%ebx), %edx +- call _dl_tlsdesc_resolve_rela_fixup +-1: +- popl %edx +- cfi_adjust_cfa_offset (-4) +- popl %ecx +- cfi_adjust_cfa_offset (-4) +- popl %eax +- cfi_adjust_cfa_offset (-4) +- jmp *(%eax) +- cfi_endproc +- .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela +- +- /* This function is a placeholder for lazy resolving of TLS +- relocations. Once some thread starts resolving a TLS +- relocation, it sets up the TLS descriptor to use this +- resolver, such that other threads that would attempt to +- resolve it concurrently may skip the call to the original lazy +- resolver and go straight to a condition wait. +- +- When the actual resolver returns, it will have adjusted the +- TLS descriptor such that we can tail-call it for it to return +- the TP offset of the symbol. */ +- +- .hidden _dl_tlsdesc_resolve_hold +- .global _dl_tlsdesc_resolve_hold +- .type _dl_tlsdesc_resolve_hold,@function +- cfi_startproc +- .align 16 +-_dl_tlsdesc_resolve_hold: +-0: +- _CET_ENDBR +- pushl %eax +- cfi_adjust_cfa_offset (4) +- pushl %ecx +- cfi_adjust_cfa_offset (4) +- pushl %edx +- cfi_adjust_cfa_offset (4) +- movl $1f - 0b, %ecx +- movl 4(%ebx), %edx +- call _dl_tlsdesc_resolve_hold_fixup +-1: +- popl %edx +- cfi_adjust_cfa_offset (-4) +- popl %ecx +- cfi_adjust_cfa_offset (-4) +- popl %eax +- cfi_adjust_cfa_offset (-4) +- jmp *(%eax) +- cfi_endproc +- .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold +diff --git a/sysdeps/i386/dl-tlsdesc.h b/sysdeps/i386/dl-tlsdesc.h +index c8a1e056150dc418..1a1a22c303baf85b 100644 +--- a/sysdeps/i386/dl-tlsdesc.h ++++ b/sysdeps/i386/dl-tlsdesc.h +@@ -43,11 +43,7 @@ struct tlsdesc_dynamic_arg + + extern ptrdiff_t attribute_hidden __attribute__ ((regparm (1))) + _dl_tlsdesc_return (struct tlsdesc *), +- _dl_tlsdesc_undefweak (struct tlsdesc *), +- _dl_tlsdesc_resolve_abs_plus_addend (struct tlsdesc *), +- _dl_tlsdesc_resolve_rel (struct tlsdesc *), +- _dl_tlsdesc_resolve_rela (struct tlsdesc *), +- _dl_tlsdesc_resolve_hold (struct tlsdesc *); ++ _dl_tlsdesc_undefweak (struct tlsdesc *); + + # ifdef SHARED + extern void *_dl_make_tlsdesc_dynamic (struct link_map *map, +diff --git a/sysdeps/i386/tlsdesc.c b/sysdeps/i386/tlsdesc.c +index 82fa8a1d35fd1912..1b4227c8381e1b3d 100644 +--- a/sysdeps/i386/tlsdesc.c ++++ b/sysdeps/i386/tlsdesc.c +@@ -16,242 +16,13 @@ + License along with the GNU C Library; if not, see + . */ + +-#include + #include +-#include + #include + #include + #include ++#define _dl_tlsdesc_resolve_hold 0 + #include + +-/* The following 4 functions take an entry_check_offset argument. +- It's computed by the caller as an offset between its entry point +- and the call site, such that by adding the built-in return address +- that is implicitly passed to the function with this offset, we can +- easily obtain the caller's entry point to compare with the entry +- point given in the TLS descriptor. If it's changed, we want to +- return immediately. */ +- +-/* This function is used to lazily resolve TLS_DESC REL relocations +- that reference the *ABS* segment in their own link maps. The +- argument is the addend originally stored there. */ +- +-void +-__attribute__ ((regparm (3))) attribute_hidden +-_dl_tlsdesc_resolve_abs_plus_addend_fixup (struct tlsdesc volatile *td, +- struct link_map *l, +- ptrdiff_t entry_check_offset) +-{ +- ptrdiff_t addend = (ptrdiff_t) td->arg; +- +- if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0) +- - entry_check_offset)) +- return; +- +-#ifndef SHARED +- CHECK_STATIC_TLS (l, l); +-#else +- if (!TRY_STATIC_TLS (l, l)) +- { +- td->arg = _dl_make_tlsdesc_dynamic (l, addend); +- td->entry = _dl_tlsdesc_dynamic; +- } +- else +-#endif +- { +- td->arg = (void*) (addend - l->l_tls_offset); +- td->entry = _dl_tlsdesc_return; +- } +- +- _dl_tlsdesc_wake_up_held_fixups (); +-} +- +-/* This function is used to lazily resolve TLS_DESC REL relocations +- that originally had zero addends. The argument location, that +- originally held the addend, is used to hold a pointer to the +- relocation, but it has to be restored before we call the function +- that applies relocations. */ +- +-void +-__attribute__ ((regparm (3))) attribute_hidden +-_dl_tlsdesc_resolve_rel_fixup (struct tlsdesc volatile *td, +- struct link_map *l, +- ptrdiff_t entry_check_offset) +-{ +- const ElfW(Rel) *reloc = td->arg; +- +- if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0) +- - entry_check_offset)) +- return; +- +- /* The code below was borrowed from _dl_fixup(), +- except for checking for STB_LOCAL. */ +- const ElfW(Sym) *const symtab +- = (const void *) D_PTR (l, l_info[DT_SYMTAB]); +- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]); +- const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)]; +- lookup_t result; +- +- /* Look up the target symbol. If the normal lookup rules are not +- used don't look in the global scope. */ +- if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL +- && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0) +- { +- const struct r_found_version *version = NULL; +- +- if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL) +- { +- const ElfW(Half) *vernum = +- (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]); +- ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff; +- version = &l->l_versions[ndx]; +- if (version->hash == 0) +- version = NULL; +- } +- +- result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, +- l->l_scope, version, ELF_RTYPE_CLASS_PLT, +- DL_LOOKUP_ADD_DEPENDENCY, NULL); +- } +- else +- { +- /* We already found the symbol. The module (and therefore its load +- address) is also known. */ +- result = l; +- } +- +- if (!sym) +- { +- td->arg = 0; +- td->entry = _dl_tlsdesc_undefweak; +- } +- else +- { +-# ifndef SHARED +- CHECK_STATIC_TLS (l, result); +-# else +- if (!TRY_STATIC_TLS (l, result)) +- { +- td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value); +- td->entry = _dl_tlsdesc_dynamic; +- } +- else +-# endif +- { +- td->arg = (void*)(sym->st_value - result->l_tls_offset); +- td->entry = _dl_tlsdesc_return; +- } +- } +- +- _dl_tlsdesc_wake_up_held_fixups (); +-} +- +-/* This function is used to lazily resolve TLS_DESC RELA relocations. +- The argument location is used to hold a pointer to the relocation. */ +- +-void +-__attribute__ ((regparm (3))) attribute_hidden +-_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td, +- struct link_map *l, +- ptrdiff_t entry_check_offset) +-{ +- const ElfW(Rela) *reloc = td->arg; +- +- if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0) +- - entry_check_offset)) +- return; +- +- /* The code below was borrowed from _dl_fixup(), +- except for checking for STB_LOCAL. */ +- const ElfW(Sym) *const symtab +- = (const void *) D_PTR (l, l_info[DT_SYMTAB]); +- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]); +- const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)]; +- lookup_t result; +- +- /* Look up the target symbol. If the normal lookup rules are not +- used don't look in the global scope. */ +- if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL +- && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0) +- { +- const struct r_found_version *version = NULL; +- +- if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL) +- { +- const ElfW(Half) *vernum = +- (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]); +- ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff; +- version = &l->l_versions[ndx]; +- if (version->hash == 0) +- version = NULL; +- } +- +- result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, +- l->l_scope, version, ELF_RTYPE_CLASS_PLT, +- DL_LOOKUP_ADD_DEPENDENCY, NULL); +- } +- else +- { +- /* We already found the symbol. The module (and therefore its load +- address) is also known. */ +- result = l; +- } +- +- if (!sym) +- { +- td->arg = (void*) reloc->r_addend; +- td->entry = _dl_tlsdesc_undefweak; +- } +- else +- { +-# ifndef SHARED +- CHECK_STATIC_TLS (l, result); +-# else +- if (!TRY_STATIC_TLS (l, result)) +- { +- td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value +- + reloc->r_addend); +- td->entry = _dl_tlsdesc_dynamic; +- } +- else +-# endif +- { +- td->arg = (void*) (sym->st_value - result->l_tls_offset +- + reloc->r_addend); +- td->entry = _dl_tlsdesc_return; +- } +- } +- +- _dl_tlsdesc_wake_up_held_fixups (); +-} +- +-/* This function is used to avoid busy waiting for other threads to +- complete the lazy relocation. Once another thread wins the race to +- relocate a TLS descriptor, it sets the descriptor up such that this +- function is called to wait until the resolver releases the +- lock. */ +- +-void +-__attribute__ ((regparm (3))) attribute_hidden +-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td, +- struct link_map *l __attribute__((__unused__)), +- ptrdiff_t entry_check_offset) +-{ +- /* Maybe we're lucky and can return early. */ +- if (__builtin_return_address (0) - entry_check_offset != td->entry) +- return; +- +- /* Locking here will stop execution until the running resolver runs +- _dl_tlsdesc_wake_up_held_fixups(), releasing the lock. +- +- FIXME: We'd be better off waiting on a condition variable, such +- that we didn't have to hold the lock throughout the relocation +- processing. */ +- __rtld_lock_lock_recursive (GL(dl_load_lock)); +- __rtld_lock_unlock_recursive (GL(dl_load_lock)); +-} +- +- + /* Unmap the dynamic object, but also release its TLS descriptor table + if there is one. */ + diff --git a/SOURCES/glibc-rh2023420-1.patch b/SOURCES/glibc-rh2023420-1.patch new file mode 100644 index 0000000..3b8d299 --- /dev/null +++ b/SOURCES/glibc-rh2023420-1.patch @@ -0,0 +1,304 @@ +commit 86f65dffc2396d408beb628f1cad2b8f63e197bd +Author: H.J. Lu +Date: Sun Jul 12 06:04:53 2020 -0700 + + ld.so: Add --list-tunables to print tunable values + + Pass --list-tunables to ld.so to print tunables with min and max values. + + Reviewed-by: Adhemerval Zanella + +Conflicts: + elf/Makefile + (different backporting order) + +diff --git a/elf/Makefile b/elf/Makefile +index 3e71939d3234c4c3..aa65ec59f143bccf 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -44,6 +44,10 @@ dl-routines += dl-tunables + tunables-type = $(addprefix TUNABLES_FRONTEND_,$(have-tunables)) + CPPFLAGS-dl-tunables.c += -DTUNABLES_FRONTEND=$(tunables-type) + ++ifeq (yesyes,$(build-shared)$(run-built-tests)) ++tests-special += $(objpfx)list-tunables.out ++endif ++ + # Make sure that the compiler does not insert any library calls in tunables + # code paths. + ifeq (yes,$(have-loop-to-function)) +@@ -1825,6 +1829,13 @@ $(objpfx)tst-glibc-hwcaps-mask.out: \ + # tst-glibc-hwcaps-cache. + $(objpfx)tst-glibc-hwcaps-cache.out: $(objpfx)tst-glibc-hwcaps + ++$(objpfx)list-tunables.out: tst-rtld-list-tunables.sh $(objpfx)ld.so ++ $(SHELL) $< $(objpfx)ld.so '$(test-wrapper-env)' \ ++ '$(run_program_env)' > $(objpfx)/tst-rtld-list-tunables.out ++ cmp tst-rtld-list-tunables.exp \ ++ $(objpfx)/tst-rtld-list-tunables.out > $@; \ ++ $(evaluate-test) ++ + tst-dst-static-ENV = LD_LIBRARY_PATH='$$ORIGIN' + + $(objpfx)tst-rtld-help.out: $(objpfx)ld.so +diff --git a/elf/dl-main.h b/elf/dl-main.h +index 566713a0d10cfdb7..9e7b51d8f010e904 100644 +--- a/elf/dl-main.h ++++ b/elf/dl-main.h +@@ -63,7 +63,7 @@ struct audit_list + enum rtld_mode + { + rtld_mode_normal, rtld_mode_list, rtld_mode_verify, rtld_mode_trace, +- rtld_mode_help, ++ rtld_mode_list_tunables, rtld_mode_help, + }; + + /* Aggregated state information extracted from environment variables +diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c +index bbc3679e3564a766..3c84809d44381241 100644 +--- a/elf/dl-tunables.c ++++ b/elf/dl-tunables.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + #define TUNABLES_INTERNAL 1 + #include "dl-tunables.h" +@@ -359,6 +360,48 @@ __tunables_init (char **envp) + } + } + ++void ++__tunables_print (void) ++{ ++ for (int i = 0; i < array_length (tunable_list); i++) ++ { ++ const tunable_t *cur = &tunable_list[i]; ++ if (cur->type.type_code == TUNABLE_TYPE_STRING ++ && cur->val.strval == NULL) ++ _dl_printf ("%s:\n", cur->name); ++ else ++ { ++ _dl_printf ("%s: ", cur->name); ++ switch (cur->type.type_code) ++ { ++ case TUNABLE_TYPE_INT_32: ++ _dl_printf ("%d (min: %d, max: %d)\n", ++ (int) cur->val.numval, ++ (int) cur->type.min, ++ (int) cur->type.max); ++ break; ++ case TUNABLE_TYPE_UINT_64: ++ _dl_printf ("0x%lx (min: 0x%lx, max: 0x%lx)\n", ++ (long int) cur->val.numval, ++ (long int) cur->type.min, ++ (long int) cur->type.max); ++ break; ++ case TUNABLE_TYPE_SIZE_T: ++ _dl_printf ("0x%Zx (min: 0x%Zx, max: 0x%Zx)\n", ++ (size_t) cur->val.numval, ++ (size_t) cur->type.min, ++ (size_t) cur->type.max); ++ break; ++ case TUNABLE_TYPE_STRING: ++ _dl_printf ("%s\n", cur->val.strval); ++ break; ++ default: ++ __builtin_unreachable (); ++ } ++ } ++ } ++} ++ + /* Set the tunable value. This is called by the module that the tunable exists + in. */ + void +diff --git a/elf/dl-tunables.h b/elf/dl-tunables.h +index 7f181f3316cd9fc1..f4f2cfaeb9828599 100644 +--- a/elf/dl-tunables.h ++++ b/elf/dl-tunables.h +@@ -69,9 +69,11 @@ typedef struct _tunable tunable_t; + # include "dl-tunable-list.h" + + extern void __tunables_init (char **); ++extern void __tunables_print (void); + extern void __tunable_get_val (tunable_id_t, void *, tunable_callback_t); + extern void __tunable_set_val (tunable_id_t, void *); + rtld_hidden_proto (__tunables_init) ++rtld_hidden_proto (__tunables_print) + rtld_hidden_proto (__tunable_get_val) + + /* Define TUNABLE_GET and TUNABLE_SET in short form if TOP_NAMESPACE and +diff --git a/elf/dl-usage.c b/elf/dl-usage.c +index e22a9c39427187d1..908b4894b3014b2d 100644 +--- a/elf/dl-usage.c ++++ b/elf/dl-usage.c +@@ -255,7 +255,12 @@ setting environment variables (which would be inherited by subprocesses).\n\ + in LIST\n\ + --audit LIST use objects named in LIST as auditors\n\ + --preload LIST preload objects named in LIST\n\ +- --argv0 STRING set argv[0] to STRING before running\n\ ++ --argv0 STRING set argv[0] to STRING before running\n" ++#if HAVE_TUNABLES ++"\ ++ --list-tunables list all tunables with minimum and maximum values\n" ++#endif ++"\ + --help display this help and exit\n\ + --version output version information and exit\n\ + \n\ +diff --git a/elf/rtld.c b/elf/rtld.c +index 9e09896da078274d..54b621ec5ca014fa 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -47,6 +47,7 @@ + #include + #include + #include ++#include + + #include + +@@ -1262,6 +1263,16 @@ dl_main (const ElfW(Phdr) *phdr, + _dl_argc -= 2; + _dl_argv += 2; + } ++#if HAVE_TUNABLES ++ else if (! strcmp (_dl_argv[1], "--list-tunables")) ++ { ++ state.mode = rtld_mode_list_tunables; ++ ++ ++_dl_skip_args; ++ --_dl_argc; ++ ++_dl_argv; ++ } ++#endif + else if (strcmp (_dl_argv[1], "--help") == 0) + { + state.mode = rtld_mode_help; +@@ -1282,6 +1293,14 @@ dl_main (const ElfW(Phdr) *phdr, + else + break; + ++#if HAVE_TUNABLES ++ if (__glibc_unlikely (state.mode == rtld_mode_list_tunables)) ++ { ++ __tunables_print (); ++ _exit (0); ++ } ++#endif ++ + /* If we have no further argument the program was called incorrectly. + Grant the user some education. */ + if (_dl_argc < 2) +diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp +new file mode 100644 +index 0000000000000000..4f3f7ee4e30a2b42 +--- /dev/null ++++ b/elf/tst-rtld-list-tunables.exp +@@ -0,0 +1,14 @@ ++glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+) ++glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+) ++glibc.malloc.check: 0 (min: 0, max: 3) ++glibc.malloc.mmap_max: 0 (min: -2147483648, max: 2147483647) ++glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+) ++glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+) ++glibc.malloc.perturb: 0 (min: 0, max: 255) ++glibc.malloc.tcache_count: 0x0 (min: 0x0, max: 0x[f]+) ++glibc.malloc.tcache_max: 0x0 (min: 0x0, max: 0x[f]+) ++glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0x[f]+) ++glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0x[f]+) ++glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0x[f]+) ++glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10) ++glibc.rtld.optional_static_tls: 0x200 (min: 0x0, max: 0x[f]+) +diff --git a/elf/tst-rtld-list-tunables.sh b/elf/tst-rtld-list-tunables.sh +new file mode 100755 +index 0000000000000000..e7bbdde94952b872 +--- /dev/null ++++ b/elf/tst-rtld-list-tunables.sh +@@ -0,0 +1,34 @@ ++#!/bin/sh ++# Test for --list-tunables option ld.so. ++# Copyright (C) 2021 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++set -e ++ ++rtld=$1 ++test_wrapper_env=$2 ++run_program_env=$3 ++ ++LC_ALL=C ++export LC_ALL ++ ++${test_wrapper_env} \ ++${run_program_env} \ ++$rtld --list-tunables \ ++| sort -u \ ++| egrep "(rtld|malloc)" \ ++| sed -e "s/0xf\+/0x[f]+/" +diff --git a/manual/tunables.texi b/manual/tunables.texi +index 07887981748bc44b..43272cf885d1e3e6 100644 +--- a/manual/tunables.texi ++++ b/manual/tunables.texi +@@ -28,6 +28,44 @@ Finally, the set of tunables available may vary between distributions as + the tunables feature allows distributions to add their own tunables under + their own namespace. + ++Passing @option{--list-tunables} to the dynamic loader to print all ++tunables with minimum and maximum values: ++ ++@example ++$ /lib64/ld-linux-x86-64.so.2 --list-tunables ++glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10) ++glibc.elision.skip_lock_after_retries: 3 (min: -2147483648, max: 2147483647) ++glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0xffffffffffffffff) ++glibc.malloc.perturb: 0 (min: 0, max: 255) ++glibc.cpu.x86_shared_cache_size: 0x100000 (min: 0x0, max: 0xffffffffffffffff) ++glibc.mem.tagging: 0 (min: 0, max: 255) ++glibc.elision.tries: 3 (min: -2147483648, max: 2147483647) ++glibc.elision.enable: 0 (min: 0, max: 1) ++glibc.cpu.x86_rep_movsb_threshold: 0x1000 (min: 0x100, max: 0xffffffffffffffff) ++glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0xffffffffffffffff) ++glibc.elision.skip_lock_busy: 3 (min: -2147483648, max: 2147483647) ++glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0xffffffffffffffff) ++glibc.cpu.x86_rep_stosb_threshold: 0x800 (min: 0x1, max: 0xffffffffffffffff) ++glibc.cpu.x86_non_temporal_threshold: 0xc0000 (min: 0x0, max: 0xffffffffffffffff) ++glibc.cpu.x86_shstk: ++glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff) ++glibc.malloc.mmap_max: 0 (min: -2147483648, max: 2147483647) ++glibc.elision.skip_trylock_internal_abort: 3 (min: -2147483648, max: 2147483647) ++glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff) ++glibc.cpu.x86_ibt: ++glibc.cpu.hwcaps: ++glibc.elision.skip_lock_internal_abort: 3 (min: -2147483648, max: 2147483647) ++glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0xffffffffffffffff) ++glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0xffffffffffffffff) ++glibc.cpu.x86_data_cache_size: 0x8000 (min: 0x0, max: 0xffffffffffffffff) ++glibc.malloc.tcache_count: 0x0 (min: 0x0, max: 0xffffffffffffffff) ++glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0xffffffffffffffff) ++glibc.pthread.mutex_spin_count: 100 (min: 0, max: 32767) ++glibc.rtld.optional_static_tls: 0x200 (min: 0x0, max: 0xffffffffffffffff) ++glibc.malloc.tcache_max: 0x0 (min: 0x0, max: 0xffffffffffffffff) ++glibc.malloc.check: 0 (min: 0, max: 3) ++@end example ++ + @menu + * Tunable names:: The structure of a tunable name + * Memory Allocation Tunables:: Tunables in the memory allocation subsystem diff --git a/SOURCES/glibc-rh2023420-2.patch b/SOURCES/glibc-rh2023420-2.patch new file mode 100644 index 0000000..23e3da5 --- /dev/null +++ b/SOURCES/glibc-rh2023420-2.patch @@ -0,0 +1,30 @@ +commit d2d12c7a988a9a04aec23b5e4af549db61b0a005 +Author: H.J. Lu +Date: Tue Feb 2 09:31:56 2021 -0800 + + tst-rtld-list-tunables.sh: Unset glibc tunables + + Unset glibc tunables and their aliases for --list-tunables test. + +diff --git a/elf/tst-rtld-list-tunables.sh b/elf/tst-rtld-list-tunables.sh +index e7bbdde94952b872..78f4ed2ebbd3db2c 100755 +--- a/elf/tst-rtld-list-tunables.sh ++++ b/elf/tst-rtld-list-tunables.sh +@@ -26,6 +26,17 @@ run_program_env=$3 + LC_ALL=C + export LC_ALL + ++# Unset tunables and their aliases. ++GLIBC_TUNABLES= ++MALLOC_ARENA_MAX= ++MALLOC_ARENA_TEST= ++MALLOC_CHECK_= ++MALLOC_MMAP_MAX_= ++MALLOC_MMAP_THRESHOLD_= ++MALLOC_PERTURB_= ++MALLOC_TOP_PAD_= ++MALLOC_TRIM_THRESHOLD_= ++ + ${test_wrapper_env} \ + ${run_program_env} \ + $rtld --list-tunables \ diff --git a/SOURCES/glibc-rh2023420-3.patch b/SOURCES/glibc-rh2023420-3.patch new file mode 100644 index 0000000..1e8307c --- /dev/null +++ b/SOURCES/glibc-rh2023420-3.patch @@ -0,0 +1,578 @@ +commit 851f32cf7bf7067f73b991610778915edd57d7b4 +Author: Florian Weimer +Date: Tue Mar 2 14:38:42 2021 +0100 + + ld.so: Implement the --list-diagnostics option + +diff --git a/elf/Makefile b/elf/Makefile +index aa65ec59f143bccf..d246f1c0d9e019fd 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -64,7 +64,7 @@ elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \ + # interpreter and operating independent of libc. + rtld-routines = rtld $(all-dl-routines) dl-sysdep dl-environ dl-minimal \ + dl-error-minimal dl-conflict dl-hwcaps dl-hwcaps_split dl-hwcaps-subdirs \ +- dl-usage ++ dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu + all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines) + + CFLAGS-dl-runtime.c += -fexceptions -fasynchronous-unwind-tables +@@ -672,6 +672,9 @@ CFLAGS-cache.c += $(SYSCONF-FLAGS) + CFLAGS-rtld.c += $(SYSCONF-FLAGS) + CFLAGS-dl-usage.c += $(SYSCONF-FLAGS) \ + -D'RTLD="$(rtlddir)/$(rtld-installed-name)"' ++CFLAGS-dl-diagnostics.c += $(SYSCONF-FLAGS) \ ++ -D'PREFIX="$(prefix)"' \ ++ -D'RTLD="$(rtlddir)/$(rtld-installed-name)"' + + cpp-srcs-left := $(all-rtld-routines:=.os) + lib := rtld +diff --git a/elf/dl-diagnostics-cpu.c b/elf/dl-diagnostics-cpu.c +new file mode 100644 +index 0000000000000000..f7d149764bcb35a1 +--- /dev/null ++++ b/elf/dl-diagnostics-cpu.c +@@ -0,0 +1,24 @@ ++/* Print CPU diagnostics data in ld.so. Stub version. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++_dl_diagnostics_cpu (void) ++{ ++} +diff --git a/elf/dl-diagnostics-kernel.c b/elf/dl-diagnostics-kernel.c +new file mode 100644 +index 0000000000000000..831c358f1463cbf4 +--- /dev/null ++++ b/elf/dl-diagnostics-kernel.c +@@ -0,0 +1,24 @@ ++/* Print kernel diagnostics data in ld.so. Stub version. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++_dl_diagnostics_kernel (void) ++{ ++} +diff --git a/elf/dl-diagnostics.c b/elf/dl-diagnostics.c +new file mode 100644 +index 0000000000000000..bef224b36cbf5fc3 +--- /dev/null ++++ b/elf/dl-diagnostics.c +@@ -0,0 +1,265 @@ ++/* Print diagnostics data in ld.so. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "trusted-dirs.h" ++#include "version.h" ++ ++/* Write CH to standard output. */ ++static void ++_dl_putc (char ch) ++{ ++ _dl_write (STDOUT_FILENO, &ch, 1); ++} ++ ++/* Print CH to standard output, quoting it if necessary. */ ++static void ++print_quoted_char (char ch) ++{ ++ if (ch < ' ' || ch > '~') ++ { ++ char buf[4]; ++ buf[0] = '\\'; ++ buf[1] = '0' + ((ch >> 6) & 7); ++ buf[2] = '0' + ((ch >> 6) & 7); ++ buf[3] = '0' + (ch & 7); ++ _dl_write (STDOUT_FILENO, buf, 4); ++ } ++ else ++ { ++ if (ch == '\\' || ch == '"') ++ _dl_putc ('\\'); ++ _dl_putc (ch); ++ } ++} ++ ++/* Print S of LEN bytes to standard output, quoting characters as ++ needed. */ ++static void ++print_string_length (const char *s, size_t len) ++{ ++ _dl_putc ('"'); ++ for (size_t i = 0; i < len; ++i) ++ print_quoted_char (s[i]); ++ _dl_putc ('"'); ++} ++ ++void ++_dl_diagnostics_print_string (const char *s) ++{ ++ if (s == NULL) ++ { ++ _dl_printf ("0x0"); ++ return; ++ } ++ ++ _dl_putc ('"'); ++ while (*s != '\0') ++ { ++ print_quoted_char (*s); ++ ++s; ++ } ++ _dl_putc ('"'); ++} ++ ++void ++_dl_diagnostics_print_labeled_string (const char *label, const char *s) ++{ ++ _dl_printf ("%s=", label); ++ _dl_diagnostics_print_string (s); ++ _dl_putc ('\n'); ++} ++ ++void ++_dl_diagnostics_print_labeled_value (const char *label, uint64_t value) ++{ ++ if (sizeof (value) == sizeof (unsigned long int)) ++ /* _dl_printf can print 64-bit values directly. */ ++ _dl_printf ("%s=0x%lx\n", label, (unsigned long int) value); ++ else ++ { ++ uint32_t high = value >> 32; ++ uint32_t low = value; ++ if (high == 0) ++ _dl_printf ("%s=0x%x\n", label, low); ++ else ++ _dl_printf ("%s=0x%x%08x\n", label, high, low); ++ } ++} ++ ++/* Return true if ENV is an unfiltered environment variable. */ ++static bool ++unfiltered_envvar (const char *env, size_t *name_length) ++{ ++ char *env_equal = strchr (env, '='); ++ if (env_equal == NULL) ++ { ++ /* Always dump malformed entries. */ ++ *name_length = strlen (env); ++ return true; ++ } ++ size_t envname_length = env_equal - env; ++ *name_length = envname_length; ++ ++ /* LC_ and LD_ variables. */ ++ if (env[0] == 'L' && (env[1] == 'C' || env[1] == 'D') ++ && env[2] == '_') ++ return true; ++ ++ /* MALLOC_ variables. */ ++ if (strncmp (env, "MALLOC_", strlen ("MALLOC_")) == 0) ++ return true; ++ ++ static const char unfiltered[] = ++ "DATEMSK\0" ++ "GCONV_PATH\0" ++ "GETCONF_DIR\0" ++ "GETCONF_DIR\0" ++ "GLIBC_TUNABLES\0" ++ "GMON_OUTPUT_PREFIX\0" ++ "HESIOD_CONFIG\0" ++ "HES_DOMAIN\0" ++ "HOSTALIASES\0" ++ "I18NPATH\0" ++ "IFS\0" ++ "LANG\0" ++ "LOCALDOMAIN\0" ++ "LOCPATH\0" ++ "MSGVERB\0" ++ "NIS_DEFAULTS\0" ++ "NIS_GROUP\0" ++ "NIS_PATH\0" ++ "NLSPATH\0" ++ "PATH\0" ++ "POSIXLY_CORRECT\0" ++ "RESOLV_HOST_CONF\0" ++ "RES_OPTIONS\0" ++ "SEV_LEVEL\0" ++ "TMPDIR\0" ++ "TZ\0" ++ "TZDIR\0" ++ /* Two null bytes at the end to mark the end of the list via an ++ empty substring. */ ++ ; ++ for (const char *candidate = unfiltered; *candidate != '\0'; ) ++ { ++ size_t candidate_length = strlen (candidate); ++ if (candidate_length == envname_length ++ && memcmp (candidate, env, candidate_length) == 0) ++ return true; ++ candidate += candidate_length + 1; ++ } ++ ++ return false; ++} ++ ++/* Dump the process environment. */ ++static void ++print_environ (char **environ) ++{ ++ unsigned int index = 0; ++ for (char **envp = environ; *envp != NULL; ++envp) ++ { ++ char *env = *envp; ++ size_t name_length; ++ bool unfiltered = unfiltered_envvar (env, &name_length); ++ _dl_printf ("env%s[0x%x]=", ++ unfiltered ? "" : "_filtered", index); ++ if (unfiltered) ++ _dl_diagnostics_print_string (env); ++ else ++ print_string_length (env, name_length); ++ _dl_putc ('\n'); ++ ++index; ++ } ++} ++ ++/* Print configured paths and the built-in search path. */ ++static void ++print_paths (void) ++{ ++ _dl_diagnostics_print_labeled_string ("path.prefix", PREFIX); ++ _dl_diagnostics_print_labeled_string ("path.rtld", RTLD); ++ _dl_diagnostics_print_labeled_string ("path.sysconfdir", SYSCONFDIR); ++ ++ unsigned int index = 0; ++ static const char *system_dirs = SYSTEM_DIRS "\0"; ++ for (const char *e = system_dirs; *e != '\0'; ) ++ { ++ size_t len = strlen (e); ++ _dl_printf ("path.system_dirs[0x%x]=", index); ++ print_string_length (e, len); ++ _dl_putc ('\n'); ++ ++index; ++ e += len + 1; ++ } ++} ++ ++/* Print information about the glibc version. */ ++static void ++print_version (void) ++{ ++ _dl_diagnostics_print_labeled_string ("version.release", RELEASE); ++ _dl_diagnostics_print_labeled_string ("version.version", VERSION); ++} ++ ++void ++_dl_print_diagnostics (char **environ) ++{ ++#ifdef HAVE_DL_DISCOVER_OSVERSION ++ _dl_diagnostics_print_labeled_value ++ ("dl_discover_osversion", _dl_discover_osversion ()); ++#endif ++ _dl_diagnostics_print_labeled_string ("dl_dst_lib", DL_DST_LIB); ++ _dl_diagnostics_print_labeled_value ("dl_hwcap", GLRO (dl_hwcap)); ++ _dl_diagnostics_print_labeled_value ("dl_hwcap_important", HWCAP_IMPORTANT); ++ _dl_diagnostics_print_labeled_value ("dl_hwcap2", GLRO (dl_hwcap2)); ++ _dl_diagnostics_print_labeled_string ++ ("dl_hwcaps_subdirs", _dl_hwcaps_subdirs); ++ _dl_diagnostics_print_labeled_value ++ ("dl_hwcaps_subdirs_active", _dl_hwcaps_subdirs_active ()); ++ _dl_diagnostics_print_labeled_value ("dl_osversion", GLRO (dl_osversion)); ++ _dl_diagnostics_print_labeled_value ("dl_pagesize", GLRO (dl_pagesize)); ++ _dl_diagnostics_print_labeled_string ("dl_platform", GLRO (dl_platform)); ++ _dl_diagnostics_print_labeled_string ++ ("dl_profile_output", GLRO (dl_profile_output)); ++ _dl_diagnostics_print_labeled_value ++ ("dl_string_platform", _dl_string_platform ( GLRO (dl_platform))); ++ ++ _dl_diagnostics_print_labeled_string ("dso.ld", LD_SO); ++ _dl_diagnostics_print_labeled_string ("dso.libc", LIBC_SO); ++ ++ print_environ (environ); ++ print_paths (); ++ print_version (); ++ ++ _dl_diagnostics_kernel (); ++ _dl_diagnostics_cpu (); ++ ++ _exit (EXIT_SUCCESS); ++} +diff --git a/elf/dl-diagnostics.h b/elf/dl-diagnostics.h +new file mode 100644 +index 0000000000000000..27dcb12bca12e5b6 +--- /dev/null ++++ b/elf/dl-diagnostics.h +@@ -0,0 +1,46 @@ ++/* Interfaces for printing diagnostics in ld.so. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_DIAGNOSTICS_H ++#define _DL_DIAGNOSTICS_H ++ ++#include ++ ++/* Write the null-terminated string to standard output, surrounded in ++ quotation marks. */ ++void _dl_diagnostics_print_string (const char *s) attribute_hidden; ++ ++/* Like _dl_diagnostics_print_string, but add a LABEL= prefix, and a ++ newline character as a suffix. */ ++void _dl_diagnostics_print_labeled_string (const char *label, const char *s) ++ attribute_hidden; ++ ++/* Print LABEL=VALUE to standard output, followed by a newline ++ character. */ ++void _dl_diagnostics_print_labeled_value (const char *label, uint64_t value) ++ attribute_hidden; ++ ++/* Print diagnostics data for the kernel. Called from ++ _dl_print_diagnostics. */ ++void _dl_diagnostics_kernel (void) attribute_hidden; ++ ++/* Print diagnostics data for the CPU(s). Called from ++ _dl_print_diagnostics. */ ++void _dl_diagnostics_cpu (void) attribute_hidden; ++ ++#endif /* _DL_DIAGNOSTICS_H */ +diff --git a/elf/dl-main.h b/elf/dl-main.h +index 9e7b51d8f010e904..9fbbdb0fac09adf3 100644 +--- a/elf/dl-main.h ++++ b/elf/dl-main.h +@@ -63,7 +63,7 @@ struct audit_list + enum rtld_mode + { + rtld_mode_normal, rtld_mode_list, rtld_mode_verify, rtld_mode_trace, +- rtld_mode_list_tunables, rtld_mode_help, ++ rtld_mode_list_tunables, rtld_mode_list_diagnostics, rtld_mode_help, + }; + + /* Aggregated state information extracted from environment variables +@@ -121,4 +121,7 @@ _Noreturn void _dl_version (void) attribute_hidden; + _Noreturn void _dl_help (const char *argv0, struct dl_main_state *state) + attribute_hidden; + ++/* Print a diagnostics dump. */ ++_Noreturn void _dl_print_diagnostics (char **environ) attribute_hidden; ++ + #endif /* _DL_MAIN */ +diff --git a/elf/dl-usage.c b/elf/dl-usage.c +index 908b4894b3014b2d..e19e1791d9169da2 100644 +--- a/elf/dl-usage.c ++++ b/elf/dl-usage.c +@@ -261,6 +261,7 @@ setting environment variables (which would be inherited by subprocesses).\n\ + --list-tunables list all tunables with minimum and maximum values\n" + #endif + "\ ++ --list-diagnostics list diagnostics information\n\ + --help display this help and exit\n\ + --version output version information and exit\n\ + \n\ +diff --git a/elf/rtld.c b/elf/rtld.c +index 54b621ec5ca014fa..d14c388f548d6d51 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -138,6 +138,7 @@ static void dl_main_state_init (struct dl_main_state *state); + /* Process all environments variables the dynamic linker must recognize. + Since all of them start with `LD_' we are a bit smarter while finding + all the entries. */ ++extern char **_environ attribute_hidden; + static void process_envvars (struct dl_main_state *state); + + #ifdef DL_ARGV_NOT_RELRO +@@ -1273,6 +1274,14 @@ dl_main (const ElfW(Phdr) *phdr, + ++_dl_argv; + } + #endif ++ else if (! strcmp (_dl_argv[1], "--list-diagnostics")) ++ { ++ state.mode = rtld_mode_list_diagnostics; ++ ++ ++_dl_skip_args; ++ --_dl_argc; ++ ++_dl_argv; ++ } + else if (strcmp (_dl_argv[1], "--help") == 0) + { + state.mode = rtld_mode_help; +@@ -1301,6 +1310,9 @@ dl_main (const ElfW(Phdr) *phdr, + } + #endif + ++ if (state.mode == rtld_mode_list_diagnostics) ++ _dl_print_diagnostics (_environ); ++ + /* If we have no further argument the program was called incorrectly. + Grant the user some education. */ + if (_dl_argc < 2) +@@ -2623,12 +2635,6 @@ a filename can be specified using the LD_DEBUG_OUTPUT environment variable.\n"); + } + } + +-/* Process all environments variables the dynamic linker must recognize. +- Since all of them start with `LD_' we are a bit smarter while finding +- all the entries. */ +-extern char **_environ attribute_hidden; +- +- + static void + process_envvars (struct dl_main_state *state) + { +diff --git a/sysdeps/unix/sysv/linux/dl-diagnostics-kernel.c b/sysdeps/unix/sysv/linux/dl-diagnostics-kernel.c +new file mode 100644 +index 0000000000000000..59f6402c547ba590 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/dl-diagnostics-kernel.c +@@ -0,0 +1,77 @@ ++/* Print kernel diagnostics data in ld.so. Linux version. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++/* Dump the auxiliary vector to standard output. */ ++static void ++print_auxv (void) ++{ ++ /* See _dl_show_auxv. The code below follows the general output ++ format for diagnostic dumps. */ ++ unsigned int index = 0; ++ for (ElfW(auxv_t) *av = GLRO(dl_auxv); av->a_type != AT_NULL; ++av) ++ { ++ _dl_printf ("auxv[0x%x].a_type=0x%lx\n" ++ "auxv[0x%x].a_val=", ++ index, (unsigned long int) av->a_type, index); ++ if (av->a_type == AT_EXECFN ++ || av->a_type == AT_PLATFORM ++ || av->a_type == AT_BASE_PLATFORM) ++ /* The address of the strings is not useful at all, so print ++ the strings themselvs. */ ++ _dl_diagnostics_print_string ((const char *) av->a_un.a_val); ++ else ++ _dl_printf ("0x%lx", (unsigned long int) av->a_un.a_val); ++ _dl_printf ("\n"); ++ ++index; ++ } ++} ++ ++/* Print one uname entry. */ ++static void ++print_utsname_entry (const char *field, const char *value) ++{ ++ _dl_printf ("uname."); ++ _dl_diagnostics_print_labeled_string (field, value); ++} ++ ++/* Print information from uname, including the kernel version. */ ++static void ++print_uname (void) ++{ ++ struct utsname uts; ++ if (__uname (&uts) == 0) ++ { ++ print_utsname_entry ("sysname", uts.sysname); ++ print_utsname_entry ("nodename", uts.nodename); ++ print_utsname_entry ("release", uts.release); ++ print_utsname_entry ("version", uts.version); ++ print_utsname_entry ("machine", uts.machine); ++ print_utsname_entry ("domainname", uts.domainname); ++ } ++} ++ ++void ++_dl_diagnostics_kernel (void) ++{ ++ print_auxv (); ++ print_uname (); ++} diff --git a/SOURCES/glibc-rh2023420-4.patch b/SOURCES/glibc-rh2023420-4.patch new file mode 100644 index 0000000..bf1a407 --- /dev/null +++ b/SOURCES/glibc-rh2023420-4.patch @@ -0,0 +1,117 @@ +commit e4933c8a92ea08eecdf3ab45e7f76c95dc3d20ac +Author: Florian Weimer +Date: Tue Mar 2 14:58:05 2021 +0100 + + x86: Automate generation of PREFERRED_FEATURE_INDEX_1 bitfield + + Use a .def file to define the bitfield layout, so that it is possible + to iterate over field members using the preprocessor. + +Conflicts: + sysdeps/x86/include/cpu-features.h + (re-did the change from scratch) + sysdeps/x86/include/cpu-features-preferred_feature_index_1.def + (adjusted to the downstream bits) + +diff --git a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def +new file mode 100644 +index 0000000000000000..17a5cc428c1dabea +--- /dev/null ++++ b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def +@@ -0,0 +1,34 @@ ++/* Bits in the PREFERRED_FEATURE_INDEX_1 bitfield of . ++ Copyright (C) 2020-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++BIT (I586) ++BIT (I686) ++BIT (Fast_Rep_String) ++BIT (Fast_Copy_Backward) ++BIT (Fast_Unaligned_Load) ++BIT (Fast_Unaligned_Copy) ++BIT (Slow_BSF) ++BIT (Slow_SSE4_2) ++BIT (AVX_Fast_Unaligned_Load) ++BIT (Prefer_MAP_32BIT_EXEC) ++BIT (Prefer_PMINUB_for_stringop) ++BIT (Prefer_No_VZEROUPPER) ++BIT (Prefer_ERMS) ++BIT (Prefer_FSRM) ++BIT (Prefer_No_AVX512) ++BIT (MathVec_Prefer_No_AVX512) +diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h +index f62be0b9b3746675..f43e22f677b249a9 100644 +--- a/sysdeps/x86/include/cpu-features.h ++++ b/sysdeps/x86/include/cpu-features.h +@@ -80,40 +80,23 @@ enum + # define HAS_ARCH_FEATURE(name) \ + CPU_FEATURE_PREFERRED (name) + +-/* PREFERRED_FEATURE_INDEX_1. */ +-# define bit_arch_I586 (1u << 0) +-# define bit_arch_I686 (1u << 1) +-# define bit_arch_Fast_Rep_String (1u << 2) +-# define bit_arch_Fast_Copy_Backward (1u << 3) +-# define bit_arch_Fast_Unaligned_Load (1u << 4) +-# define bit_arch_Fast_Unaligned_Copy (1u << 5) +-# define bit_arch_Slow_BSF (1u << 6) +-# define bit_arch_Slow_SSE4_2 (1u << 7) +-# define bit_arch_AVX_Fast_Unaligned_Load (1u << 8) +-# define bit_arch_Prefer_MAP_32BIT_EXEC (1u << 9) +-# define bit_arch_Prefer_PMINUB_for_stringop (1u << 10) +-# define bit_arch_Prefer_No_VZEROUPPER (1u << 11) +-# define bit_arch_Prefer_ERMS (1u << 12) +-# define bit_arch_Prefer_FSRM (1u << 13) +-# define bit_arch_Prefer_No_AVX512 (1u << 14) +-# define bit_arch_MathVec_Prefer_No_AVX512 (1u << 15) +- +-# define index_arch_Fast_Rep_String PREFERRED_FEATURE_INDEX_1 +-# define index_arch_Fast_Copy_Backward PREFERRED_FEATURE_INDEX_1 +-# define index_arch_Slow_BSF PREFERRED_FEATURE_INDEX_1 +-# define index_arch_Fast_Unaligned_Load PREFERRED_FEATURE_INDEX_1 +-# define index_arch_Prefer_PMINUB_for_stringop PREFERRED_FEATURE_INDEX_1 +-# define index_arch_Fast_Unaligned_Copy PREFERRED_FEATURE_INDEX_1 +-# define index_arch_I586 PREFERRED_FEATURE_INDEX_1 +-# define index_arch_I686 PREFERRED_FEATURE_INDEX_1 +-# define index_arch_Slow_SSE4_2 PREFERRED_FEATURE_INDEX_1 +-# define index_arch_AVX_Fast_Unaligned_Load PREFERRED_FEATURE_INDEX_1 +-# define index_arch_Prefer_MAP_32BIT_EXEC PREFERRED_FEATURE_INDEX_1 +-# define index_arch_Prefer_No_VZEROUPPER PREFERRED_FEATURE_INDEX_1 +-# define index_arch_Prefer_ERMS PREFERRED_FEATURE_INDEX_1 +-# define index_arch_Prefer_No_AVX512 PREFERRED_FEATURE_INDEX_1 +-# define index_arch_MathVec_Prefer_No_AVX512 PREFERRED_FEATURE_INDEX_1 +-# define index_arch_Prefer_FSRM PREFERRED_FEATURE_INDEX_1 ++/* PREFERRED_FEATURE_INDEX_1. First define the bitindex values ++ sequentially, then define the bit_arch* and index_arch_* lookup ++ constants. */ ++enum ++ { ++#define BIT(x) _bitindex_arch_##x , ++#include "cpu-features-preferred_feature_index_1.def" ++#undef BIT ++ }; ++enum ++ { ++#define BIT(x) \ ++ bit_arch_##x = 1u << _bitindex_arch_##x , \ ++ index_arch_##x = PREFERRED_FEATURE_INDEX_1, ++#include "cpu-features-preferred_feature_index_1.def" ++#undef BIT ++ }; + + /* XCR0 Feature flags. */ + # define bit_XMM_state (1u << 1) diff --git a/SOURCES/glibc-rh2023420-5.patch b/SOURCES/glibc-rh2023420-5.patch new file mode 100644 index 0000000..0d3ca97 --- /dev/null +++ b/SOURCES/glibc-rh2023420-5.patch @@ -0,0 +1,131 @@ +commit 01a5746b6c8a44dc29d33e056b63485075a6a3cc +Author: Florian Weimer +Date: Wed Feb 24 13:12:04 2021 +0100 + + x86: Add CPU-specific diagnostics to ld.so --list-diagnostics + +Conflicts: + sysdeps/x86/dl-diagnostics-cpu.c + (reworked due to struct differences, different knobs + downstream) + +diff --git a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c +new file mode 100644 +index 0000000000000000..0ba286a828b69937 +--- /dev/null ++++ b/sysdeps/x86/dl-diagnostics-cpu.c +@@ -0,0 +1,101 @@ ++/* Print CPU diagnostics data in ld.so. x86 version. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++static void ++print_cpu_features_value (const char *label, uint64_t value) ++{ ++ _dl_printf ("x86.cpu_features."); ++ _dl_diagnostics_print_labeled_value (label, value); ++} ++ ++static void ++print_cpu_feature_internal (unsigned int index, const char *kind, ++ unsigned int reg, uint32_t value) ++{ ++ _dl_printf ("x86.cpu_features.features[0x%x].%s[0x%x]=0x%x\n", ++ index, kind, reg, value); ++} ++ ++static void ++print_cpu_feature_preferred (const char *label, unsigned int flag) ++{ ++ _dl_printf("x86.cpu_features.preferred.%s=0x%x\n", label, flag); ++} ++ ++void ++_dl_diagnostics_cpu (void) ++{ ++ const struct cpu_features *cpu_features = __get_cpu_features (); ++ ++ print_cpu_features_value ("basic.kind", cpu_features->basic.kind); ++ print_cpu_features_value ("basic.max_cpuid", cpu_features->basic.max_cpuid); ++ print_cpu_features_value ("basic.family", cpu_features->basic.family); ++ print_cpu_features_value ("basic.model", cpu_features->basic.model); ++ print_cpu_features_value ("basic.stepping", cpu_features->basic.stepping); ++ ++ for (unsigned int index = 0; index < COMMON_CPUID_INDEX_MAX; ++index) ++ { ++ /* Downstream, these constants are not part of the ABI yet, so ++ analysis needs to take the precise glibc version into ++ account. */ ++ print_cpu_feature_internal ++ (index, "cpuid", 0, cpu_features->features[index].cpuid.eax); ++ print_cpu_feature_internal ++ (index, "cpuid", 1, cpu_features->features[index].cpuid.ebx); ++ print_cpu_feature_internal ++ (index, "cpuid", 2, cpu_features->features[index].cpuid.ecx); ++ print_cpu_feature_internal ++ (index, "cpuid", 3, cpu_features->features[index].cpuid.edx); ++ print_cpu_feature_internal ++ (index, "usable", 0, cpu_features->features[index].usable.eax); ++ print_cpu_feature_internal ++ (index, "usable", 1, cpu_features->features[index].usable.ebx); ++ print_cpu_feature_internal ++ (index, "usable", 2, cpu_features->features[index].usable.ecx); ++ print_cpu_feature_internal ++ (index, "usable", 3, cpu_features->features[index].usable.edx); ++ } ++ ++ /* The preferred indicators are not part of the ABI and need to be ++ translated. */ ++#define BIT(x) \ ++ print_cpu_feature_preferred (#x, CPU_FEATURE_PREFERRED_P (cpu_features, x)); ++#include "cpu-features-preferred_feature_index_1.def" ++#undef BIT ++ ++ print_cpu_features_value ("xsave_state_size", ++ cpu_features->xsave_state_size); ++ print_cpu_features_value ("xsave_state_full_size", ++ cpu_features->xsave_state_full_size); ++ print_cpu_features_value ("data_cache_size", cpu_features->data_cache_size); ++ print_cpu_features_value ("shared_cache_size", ++ cpu_features->shared_cache_size); ++ print_cpu_features_value ("non_temporal_threshold", ++ cpu_features->non_temporal_threshold); ++ print_cpu_features_value ("rep_movsb_threshold", ++ cpu_features->rep_movsb_threshold); ++ print_cpu_features_value ("rep_stosb_threshold", ++ cpu_features->rep_stosb_threshold); ++ _Static_assert (offsetof (struct cpu_features, rep_stosb_threshold) ++ + sizeof (cpu_features->rep_stosb_threshold) ++ == sizeof (*cpu_features), ++ "last cpu_features field has been printed"); ++} +diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h +index f43e22f677b249a9..536643b209425198 100644 +--- a/sysdeps/x86/include/cpu-features.h ++++ b/sysdeps/x86/include/cpu-features.h +@@ -107,6 +107,8 @@ enum + # define bit_XTILECFG_state (1u << 17) + # define bit_XTILEDATA_state (1u << 18) + ++/* NB: When adding new fields, update sysdeps/x86/dl-diagnostics-cpu.c ++ to print them. */ + struct cpu_features + { + struct cpu_features_basic basic; diff --git a/SOURCES/glibc-rh2023420-6.patch b/SOURCES/glibc-rh2023420-6.patch new file mode 100644 index 0000000..a05a690 --- /dev/null +++ b/SOURCES/glibc-rh2023420-6.patch @@ -0,0 +1,255 @@ +commit c1cb2deeca1a85c6fc5bd41b90816d48a95bc434 +Author: Florian Weimer +Date: Sun Dec 5 11:28:34 2021 +0100 + + elf: execve statically linked programs instead of crashing [BZ #28648] + + Programs without dynamic dependencies and without a program + interpreter are now run via execve. + + Previously, the dynamic linker either crashed while attempting to + read a non-existing dynamic segment (looking for DT_AUDIT/DT_DEPAUDIT + data), or the self-relocated in the static PIE executable crashed + because the outer dynamic linker had already applied RELRO protection. + + is needed because execve is not available in the + dynamic loader on Hurd. + + Reviewed-by: H.J. Lu + +Conflicts: + elf/Makefile + (some missing backports) + elf/rtld.c + (missing rework of ld.so self-relocation downstream, + always print error as a number due to missing + sterrorname_np, also fix errcode/errno glitch) + sysdeps/unix/sysv/linux/dl-execve.h + (missing INTERNAL_SYSCALL_CALL refactoring to Linux-like + calling convention) + +diff --git a/elf/Makefile b/elf/Makefile +index d246f1c0d9e019fd..b3e8ab2792608de7 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -214,7 +214,8 @@ tests += restest1 preloadtest loadfail multiload origtest resolvfail \ + tst-tls-ie tst-tls-ie-dlmopen \ + argv0test \ + tst-glibc-hwcaps tst-glibc-hwcaps-prepend tst-glibc-hwcaps-mask \ +- tst-tls20 tst-tls21 ++ tst-tls20 tst-tls21 \ ++ tst-rtld-run-static \ + # reldep9 + tests-internal += loadtest unload unload2 circleload1 \ + neededtest neededtest2 neededtest3 neededtest4 \ +@@ -1917,3 +1918,5 @@ $(objpfx)tst-tls20.out: $(objpfx)tst-tls20mod-bad.so \ + $(objpfx)tst-tls21: $(libdl) $(shared-thread-library) + $(objpfx)tst-tls21.out: $(objpfx)tst-tls21mod.so + $(objpfx)tst-tls21mod.so: $(tst-tls-many-dynamic-modules:%=$(objpfx)%.so) ++ ++$(objpfx)tst-rtld-run-static.out: $(objpfx)/ldconfig +diff --git a/elf/rtld.c b/elf/rtld.c +index d14c388f548d6d51..461d8c114a875a9b 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -48,6 +48,7 @@ + #include + #include + #include ++#include + + #include + +@@ -1114,6 +1115,40 @@ load_audit_modules (struct link_map *main_map, struct audit_list *audit_list) + } + } + ++/* Check if the executable is not actualy dynamically linked, and ++ invoke it directly in that case. */ ++static void ++rtld_chain_load (struct link_map *main_map, char *argv0) ++{ ++ /* The dynamic loader run against itself. */ ++ const char *rtld_soname ++ = ((const char *) D_PTR (&GL(dl_rtld_map), l_info[DT_STRTAB]) ++ + GL(dl_rtld_map).l_info[DT_SONAME]->d_un.d_val); ++ if (main_map->l_info[DT_SONAME] != NULL ++ && strcmp (rtld_soname, ++ ((const char *) D_PTR (main_map, l_info[DT_STRTAB]) ++ + main_map->l_info[DT_SONAME]->d_un.d_val)) == 0) ++ _dl_fatal_printf ("%s: loader cannot load itself\n", rtld_soname); ++ ++ /* With DT_NEEDED dependencies, the executable is dynamically ++ linked. */ ++ if (__glibc_unlikely (main_map->l_info[DT_NEEDED] != NULL)) ++ return; ++ ++ /* If the executable has program interpreter, it is dynamically ++ linked. */ ++ for (size_t i = 0; i < main_map->l_phnum; ++i) ++ if (main_map->l_phdr[i].p_type == PT_INTERP) ++ return; ++ ++ const char *pathname = _dl_argv[0]; ++ if (argv0 != NULL) ++ _dl_argv[0] = argv0; ++ int errcode = __rtld_execve (pathname, _dl_argv, _environ); ++ _dl_fatal_printf("%s: cannot execute %s: %d\n", ++ rtld_soname, pathname, errcode); ++} ++ + static void + dl_main (const ElfW(Phdr) *phdr, + ElfW(Word) phnum, +@@ -1384,14 +1419,8 @@ dl_main (const ElfW(Phdr) *phdr, + /* Now the map for the main executable is available. */ + main_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded; + +- if (__glibc_likely (state.mode == rtld_mode_normal) +- && GL(dl_rtld_map).l_info[DT_SONAME] != NULL +- && main_map->l_info[DT_SONAME] != NULL +- && strcmp ((const char *) D_PTR (&GL(dl_rtld_map), l_info[DT_STRTAB]) +- + GL(dl_rtld_map).l_info[DT_SONAME]->d_un.d_val, +- (const char *) D_PTR (main_map, l_info[DT_STRTAB]) +- + main_map->l_info[DT_SONAME]->d_un.d_val) == 0) +- _dl_fatal_printf ("loader cannot load itself\n"); ++ if (__glibc_likely (state.mode == rtld_mode_normal)) ++ rtld_chain_load (main_map, argv0); + + phdr = main_map->l_phdr; + phnum = main_map->l_phnum; +diff --git a/elf/tst-rtld-run-static.c b/elf/tst-rtld-run-static.c +new file mode 100644 +index 0000000000000000..7281093504b675c4 +--- /dev/null ++++ b/elf/tst-rtld-run-static.c +@@ -0,0 +1,62 @@ ++/* Test running statically linked programs using ld.so. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ char *ldconfig_path = xasprintf ("%s/elf/ldconfig", support_objdir_root); ++ ++ { ++ char *argv[] = { (char *) "ld.so", ldconfig_path, (char *) "--help", NULL }; ++ struct support_capture_subprocess cap ++ = support_capture_subprogram (support_objdir_elf_ldso, argv); ++ support_capture_subprocess_check (&cap, "no --argv0", 0, sc_allow_stdout); ++ puts ("info: output without --argv0:"); ++ puts (cap.out.buffer); ++ TEST_VERIFY (strstr (cap.out.buffer, "Usage: ldconfig [OPTION...]\n") ++ == cap.out.buffer); ++ support_capture_subprocess_free (&cap); ++ } ++ ++ { ++ char *argv[] = ++ { ++ (char *) "ld.so", (char *) "--argv0", (char *) "ldconfig-argv0", ++ ldconfig_path, (char *) "--help", NULL ++ }; ++ struct support_capture_subprocess cap ++ = support_capture_subprogram (support_objdir_elf_ldso, argv); ++ support_capture_subprocess_check (&cap, "with --argv0", 0, sc_allow_stdout); ++ puts ("info: output with --argv0:"); ++ puts (cap.out.buffer); ++ TEST_VERIFY (strstr (cap.out.buffer, "Usage: ldconfig-argv0 [OPTION...]\n") ++ == cap.out.buffer); ++ support_capture_subprocess_free (&cap); ++ } ++ ++ free (ldconfig_path); ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/generic/dl-execve.h b/sysdeps/generic/dl-execve.h +new file mode 100644 +index 0000000000000000..5fd097df69e1770c +--- /dev/null ++++ b/sysdeps/generic/dl-execve.h +@@ -0,0 +1,25 @@ ++/* execve for the dynamic linker. Generic stub version. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++static int ++__rtld_execve (const char *path, char *const *argv, char *const *envp) ++{ ++ return ENOSYS; ++} +diff --git a/sysdeps/unix/sysv/linux/dl-execve.h b/sysdeps/unix/sysv/linux/dl-execve.h +new file mode 100644 +index 0000000000000000..9ec6539286bb0589 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/dl-execve.h +@@ -0,0 +1,30 @@ ++/* execve for the dynamic linker. Linux version. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++static inline int ++__rtld_execve (const char *path, char *const *argv, char *const *envp) ++{ ++ INTERNAL_SYSCALL_DECL (err); ++ long int r = INTERNAL_SYSCALL_CALL (execve, err, path, argv, envp); ++ if (INTERNAL_SYSCALL_ERROR_P (r, err)) ++ return INTERNAL_SYSCALL_ERRNO (r, err); ++ else ++ return 0; ++} diff --git a/SOURCES/glibc-rh2023420-7.patch b/SOURCES/glibc-rh2023420-7.patch new file mode 100644 index 0000000..c14031c --- /dev/null +++ b/SOURCES/glibc-rh2023420-7.patch @@ -0,0 +1,41 @@ +commit 2e75604f8337fa4332977f72a8f6726309679edf +Author: Florian Weimer +Date: Fri Dec 10 16:06:36 2021 +0100 + + elf: Install a symbolic link to ld.so as /usr/bin/ld.so + + This makes ld.so features such as --preload, --audit, + and --list-diagnostics more accessible to end users because they + do not need to know the ABI name of the dynamic loader. + + Reviewed-by: Carlos O'Donell + + Conflicts: + elf/Makefile + (versioned shared objects downstream) + +diff --git a/elf/Makefile b/elf/Makefile +index b3e8ab2792608de7..c552aff350c2faac 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -99,7 +99,7 @@ endif + ifeq (yes,$(build-shared)) + extra-objs = $(all-rtld-routines:%=%.os) soinit.os sofini.os interp.os + generated += librtld.os dl-allobjs.os ld.so ldd +-install-others = $(inst_rtlddir)/$(rtld-installed-name) ++install-others = $(inst_rtlddir)/$(rtld-installed-name) $(inst_bindir)/ld.so + install-bin-script = ldd + endif + +@@ -622,6 +622,11 @@ $(inst_rtlddir)/$(rtld-installed-name): \ + $(make-target-directory) + $(make-shlib-link) + ++# Creates the relative /usr/bin/ld.so symbolic link. ++$(inst_bindir)/ld.so: $(inst_rtlddir)/$(rtld-installed-name) ++ $(make-target-directory) ++ $(make-link) ++ + # Special target called by parent to install just the dynamic linker. + .PHONY: ldso_install + ldso_install: $(inst_rtlddir)/$(rtld-installed-name) diff --git a/SPECS/glibc.spec b/SPECS/glibc.spec index c45e1e3..1a40a78 100644 --- a/SPECS/glibc.spec +++ b/SPECS/glibc.spec @@ -1,6 +1,6 @@ %define glibcsrcdir glibc-2.28 %define glibcversion 2.28 -%define glibcrelease 174%{?dist} +%define glibcrelease 180%{?dist} # Pre-release tarballs are pulled in from git using a command that is # effectively: # @@ -783,6 +783,43 @@ Patch605: glibc-rh1937515.patch Patch606: glibc-rh1934162-1.patch Patch607: glibc-rh1934162-2.patch Patch608: glibc-rh2000374.patch +Patch609: glibc-rh1991001-1.patch +Patch610: glibc-rh1991001-2.patch +Patch611: glibc-rh1991001-3.patch +Patch612: glibc-rh1991001-4.patch +Patch613: glibc-rh1991001-5.patch +Patch614: glibc-rh1991001-6.patch +Patch615: glibc-rh1991001-7.patch +Patch616: glibc-rh1991001-8.patch +Patch617: glibc-rh1991001-9.patch +Patch618: glibc-rh1991001-10.patch +Patch619: glibc-rh1991001-11.patch +Patch620: glibc-rh1991001-12.patch +Patch621: glibc-rh1991001-13.patch +Patch622: glibc-rh1991001-14.patch +Patch623: glibc-rh1991001-15.patch +Patch624: glibc-rh1991001-16.patch +Patch625: glibc-rh1991001-17.patch +Patch626: glibc-rh1991001-18.patch +Patch627: glibc-rh1991001-19.patch +Patch628: glibc-rh1991001-20.patch +Patch629: glibc-rh1991001-21.patch +Patch630: glibc-rh1991001-22.patch +Patch631: glibc-rh1929928-1.patch +Patch632: glibc-rh1929928-2.patch +Patch633: glibc-rh1929928-3.patch +Patch634: glibc-rh1929928-4.patch +Patch635: glibc-rh1929928-5.patch +Patch636: glibc-rh1984802-1.patch +Patch637: glibc-rh1984802-2.patch +Patch638: glibc-rh1984802-3.patch +Patch639: glibc-rh2023420-1.patch +Patch640: glibc-rh2023420-2.patch +Patch641: glibc-rh2023420-3.patch +Patch642: glibc-rh2023420-4.patch +Patch643: glibc-rh2023420-5.patch +Patch644: glibc-rh2023420-6.patch +Patch645: glibc-rh2023420-7.patch ############################################################################## # Continued list of core "glibc" package information: @@ -1851,6 +1888,7 @@ cp benchtests/scripts/benchout.schema.json %{glibc_sysroot}%{_prefix}/libexec/gl cp benchtests/scripts/compare_bench.py %{glibc_sysroot}%{_prefix}/libexec/glibc-benchtests/ cp benchtests/scripts/import_bench.py %{glibc_sysroot}%{_prefix}/libexec/glibc-benchtests/ cp benchtests/scripts/validate_benchout.py %{glibc_sysroot}%{_prefix}/libexec/glibc-benchtests/ +%endif %if 0%{?_enable_debug_packages} # The #line directives gperf generates do not give the proper @@ -2217,8 +2255,8 @@ cat > utils.filelist < nss-devel.filelist grep '/libnsl-[0-9.]*.so$' master.filelist > libnsl.filelist test $(wc -l < libnsl.filelist) -eq 1 +%if %{with benchtests} ############################################################################### # glibc-benchtests ############################################################################### @@ -2360,7 +2399,14 @@ exclude_common_dirs() for d in $(echo $exclude_dirs | sed 's/ /\n/g'); do sed -i "\|^%%dir $d/\?$|d" $1 done + + # Special kludge: /usr/bin/ld.so is a symbolic link, so debuggers + # do not need it to locate debugging information (they can use + # the real path instead). + sed -i '\,^/usr/lib/debug/usr/bin/ld\.so\.debug$,d' $1 } +# The file does not exist on all architectures. +rm -f %{glibc_sysroot}/usr/lib/debug/usr/bin/ld.so.debug %ifarch %{debuginfocommonarches} exclude_common_dirs debuginfocommon.filelist @@ -2763,6 +2809,25 @@ fi %files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared %changelog +* Mon Dec 13 2021 Florian Weimer - 2.28-180 +- Do not install /usr/lib/debug/usr/bin/ld.so.debug (#2023420) + +* Fri Dec 10 2021 Florian Weimer - 2.28-179 +- Add /usr/bin/ld.so --list-diagnostics (#2023420) + +* Fri Dec 10 2021 Carlos O'Donell - 2.28-178 +- Preliminary support for new IBM zSeries hardware (#1984802) + +* Fri Dec 10 2021 Carlos O'Donell - 2.28-177 +- Fix --with and --without builds for benchtests and bootstrap (#2020989) + +* Wed Dec 1 2021 Florian Weimer - 2.28-176 +- A64FX memcpy/memmove/memset optimizations (#1929928) + +* Tue Nov 30 2021 Florian Weimer - 2.28-175 +- Fix dl-tls.c assert failure with pthread_create & dlopen (#1991001) +- Fix x86_64 TLS lazy binding with auditors (#1950056) + * Thu Nov 25 2021 Arjun Shankar - 2.28-174 - Introduce new glibc-doc.noarch subpackage (#2021671) - Move the reference manual info pages from glibc-devel to glibc-doc