From 8ae0024e6976e6ce744c2f2432eaae14311162a9 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 01 2017 03:20:13 +0000 Subject: import glibc-2.17-196.el7 --- diff --git a/SOURCES/glibc-rh1144516.patch b/SOURCES/glibc-rh1144516.patch new file mode 100644 index 0000000..8c45e74 --- /dev/null +++ b/SOURCES/glibc-rh1144516.patch @@ -0,0 +1,47 @@ +commit cfa4df95003c963c16d2102aef9c806f8175f373 +Author: Marcus Shawcroft +Date: Tue Sep 24 12:59:06 2013 +0100 + + [AArch64] Adding sigcontextinfo.h + +diff --git a/ports/sysdeps/unix/sysv/linux/aarch64/sigcontextinfo.h b/ports/sysdeps/unix/sysv/linux/aarch64/sigcontextinfo.h +new file mode 100644 +index 0000000..42ff38e +--- /dev/null ++++ b/ports/sysdeps/unix/sysv/linux/aarch64/sigcontextinfo.h +@@ -0,0 +1,35 @@ ++/* AArch64 definitions for signal handling calling conventions. ++ Copyright (C) 1996-2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include "kernel-features.h" ++ ++#define SIGCONTEXT siginfo_t *_si, struct ucontext * ++#define GET_PC(ctx) ((void *) (ctx)->uc_mcontext.pc) ++ ++/* There is no reliable way to get the sigcontext unless we use a ++ three-argument signal handler. */ ++#define __sigaction(sig, act, oact) ({ \ ++ (act)->sa_flags |= SA_SIGINFO; \ ++ (__sigaction) (sig, act, oact); \ ++}) ++ ++#define sigaction(sig, act, oact) ({ \ ++ (act)->sa_flags |= SA_SIGINFO; \ ++ (sigaction) (sig, act, oact); \ ++}) diff --git a/SOURCES/glibc-rh1207032.patch b/SOURCES/glibc-rh1207032.patch index ae6ae18..7ddfb41 100644 --- a/SOURCES/glibc-rh1207032.patch +++ b/SOURCES/glibc-rh1207032.patch @@ -391,7 +391,7 @@ Index: b/malloc/malloc.c assert(!p || chunk_is_mmapped(mem2chunk(p)) || ar_ptr == arena_for_chunk(mem2chunk(p))); -@@ -3184,43 +3200,48 @@ __libc_calloc(size_t n, size_t elem_size +@@ -3184,43 +3200,54 @@ __libc_calloc(size_t n, size_t elem_size sz = bytes; arena_get(av, sz); @@ -435,6 +435,12 @@ Index: b/malloc/malloc.c - oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop; } -#endif ++ else ++ { ++ /* No usable arenas. */ ++ oldtop = 0; ++ oldtopsize = 0; ++ } mem = _int_malloc(av, sz); @@ -464,7 +470,7 @@ Index: b/malloc/malloc.c p = mem2chunk(mem); /* Two optional cases in which clearing not necessary */ -@@ -3310,6 +3331,16 @@ _int_malloc(mstate av, size_t bytes) +@@ -3310,6 +3337,16 @@ _int_malloc(mstate av, size_t bytes) checked_request2size(bytes, nb); @@ -481,7 +487,7 @@ Index: b/malloc/malloc.c /* If the size qualifies as a fastbin, first check corresponding bin. This code is safe to execute even if av is not yet initialized, so we -@@ -3334,7 +3365,7 @@ _int_malloc(mstate av, size_t bytes) +@@ -3334,7 +3371,7 @@ _int_malloc(mstate av, size_t bytes) errstr = "malloc(): memory corruption (fast)"; errout: mutex_unlock(&av->mutex); @@ -490,7 +496,7 @@ Index: b/malloc/malloc.c mutex_lock(&av->mutex); return NULL; } -@@ -3421,9 +3452,9 @@ _int_malloc(mstate av, size_t bytes) +@@ -3421,9 +3458,9 @@ _int_malloc(mstate av, size_t bytes) if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0) || __builtin_expect (victim->size > av->system_mem, 0)) { @@ -502,7 +508,7 @@ Index: b/malloc/malloc.c mutex_lock(&av->mutex); } size = chunksize(victim); -@@ -3801,7 +3832,7 @@ _int_free(mstate av, mchunkptr p, int ha +@@ -3801,7 +3838,7 @@ _int_free(mstate av, mchunkptr p, int ha errout: if (have_lock || locked) (void)mutex_unlock(&av->mutex); @@ -511,7 +517,7 @@ Index: b/malloc/malloc.c if (have_lock) mutex_lock(&av->mutex); return; -@@ -4196,7 +4227,7 @@ _int_realloc(mstate av, mchunkptr oldp, +@@ -4196,7 +4233,7 @@ _int_realloc(mstate av, mchunkptr oldp, errstr = "realloc(): invalid old size"; errout: mutex_unlock(&av->mutex); @@ -520,7 +526,7 @@ Index: b/malloc/malloc.c mutex_lock(&av->mutex); return NULL; } -@@ -4436,7 +4467,7 @@ static void* +@@ -4436,7 +4473,7 @@ static void* _int_valloc(mstate av, size_t bytes) { /* Ensure initialization/consolidation */ @@ -529,7 +535,7 @@ Index: b/malloc/malloc.c return _int_memalign(av, GLRO(dl_pagesize), bytes); } -@@ -4451,7 +4482,7 @@ _int_pvalloc(mstate av, size_t bytes) +@@ -4451,7 +4488,7 @@ _int_pvalloc(mstate av, size_t bytes) size_t pagesz; /* Ensure initialization/consolidation */ @@ -538,7 +544,7 @@ Index: b/malloc/malloc.c pagesz = GLRO(dl_pagesize); return _int_memalign(av, pagesz, (bytes + pagesz - 1) & ~(pagesz - 1)); } -@@ -4463,6 +4494,10 @@ _int_pvalloc(mstate av, size_t bytes) +@@ -4463,6 +4500,10 @@ _int_pvalloc(mstate av, size_t bytes) static int mtrim(mstate av, size_t pad) { @@ -549,7 +555,7 @@ Index: b/malloc/malloc.c /* Ensure initialization/consolidation */ malloc_consolidate (av); -@@ -4956,8 +4991,14 @@ libc_hidden_def (__libc_mallopt) +@@ -4956,8 +4997,14 @@ libc_hidden_def (__libc_mallopt) extern char **__libc_argv attribute_hidden; static void diff --git a/SOURCES/glibc-rh1228114-1.patch b/SOURCES/glibc-rh1228114-1.patch new file mode 100644 index 0000000..65d539d --- /dev/null +++ b/SOURCES/glibc-rh1228114-1.patch @@ -0,0 +1,234 @@ +commit 89fb6835583088059b8d8987c86caac33e37e5ea +Author: Siddhesh Poyarekar +Date: Tue Jun 11 11:11:11 2013 +0530 + + Fix symbol definitions for __clock_* functions + + __clock_gettime and other __clock_* functions could result in an extra + PLT reference within libc.so if it actually gets used. None of the + code currently uses them, which is why this probably went unnoticed. + +Index: b/include/time.h +=================================================================== +--- a/include/time.h ++++ b/include/time.h +@@ -21,6 +21,7 @@ libc_hidden_proto (strptime) + + extern __typeof (clock_getres) __clock_getres; + extern __typeof (clock_gettime) __clock_gettime; ++libc_hidden_proto (__clock_gettime) + extern __typeof (clock_settime) __clock_settime; + extern __typeof (clock_nanosleep) __clock_nanosleep; + extern __typeof (clock_getcpuclockid) __clock_getcpuclockid; +Index: b/rt/clock_getcpuclockid.c +=================================================================== +--- a/rt/clock_getcpuclockid.c ++++ b/rt/clock_getcpuclockid.c +@@ -21,7 +21,7 @@ + #include + + int +-clock_getcpuclockid (pid_t pid, clockid_t *clock_id) ++__clock_getcpuclockid (pid_t pid, clockid_t *clock_id) + { + /* We don't allow any process ID but our own. */ + if (pid != 0 && pid != getpid ()) +@@ -37,4 +37,4 @@ clock_getcpuclockid (pid_t pid, clockid_ + return ENOENT; + #endif + } +-strong_alias (clock_getcpuclockid, __clock_getcpuclockid) ++weak_alias (__clock_getcpuclockid, clock_getcpuclockid) +Index: b/rt/clock_getres.c +=================================================================== +--- a/rt/clock_getres.c ++++ b/rt/clock_getres.c +@@ -21,10 +21,10 @@ + + /* Get resolution of clock. */ + int +-clock_getres (clockid_t clock_id, struct timespec *res) ++__clock_getres (clockid_t clock_id, struct timespec *res) + { + __set_errno (ENOSYS); + return -1; + } +-strong_alias (clock_getres, __clock_getres) ++weak_alias (__clock_getres, clock_getres) + stub_warning (clock_getres) +Index: b/rt/clock_gettime.c +=================================================================== +--- a/rt/clock_gettime.c ++++ b/rt/clock_gettime.c +@@ -21,10 +21,11 @@ + + /* Get current value of CLOCK and store it in TP. */ + int +-clock_gettime (clockid_t clock_id, struct timespec *tp) ++__clock_gettime (clockid_t clock_id, struct timespec *tp) + { + __set_errno (ENOSYS); + return -1; + } +-strong_alias (clock_gettime, __clock_gettime) ++weak_alias (__clock_gettime, clock_gettime) ++libc_hidden_def (__clock_gettime) + stub_warning (clock_gettime) +Index: b/rt/clock_nanosleep.c +=================================================================== +--- a/rt/clock_nanosleep.c ++++ b/rt/clock_nanosleep.c +@@ -20,8 +20,8 @@ + #include + + int +-clock_nanosleep (clockid_t clock_id, int flags, const struct timespec *req, +- struct timespec *rem) ++__clock_nanosleep (clockid_t clock_id, int flags, const struct timespec *req, ++ struct timespec *rem) + { + if (__builtin_expect (req->tv_nsec, 0) < 0 + || __builtin_expect (req->tv_nsec, 0) >= 1000000000) +@@ -33,5 +33,5 @@ clock_nanosleep (clockid_t clock_id, int + /* Not implemented. */ + return ENOSYS; + } +-strong_alias (clock_nanosleep, __clock_nanosleep) ++weak_alias (__clock_nanosleep, clock_nanosleep) + stub_warning (clock_nanosleep) +Index: b/rt/clock_settime.c +=================================================================== +--- a/rt/clock_settime.c ++++ b/rt/clock_settime.c +@@ -21,10 +21,10 @@ + + /* Set CLOCK to value TP. */ + int +-clock_settime (clockid_t clock_id, const struct timespec *tp) ++__clock_settime (clockid_t clock_id, const struct timespec *tp) + { + __set_errno (ENOSYS); + return -1; + } +-strong_alias (clock_settime, __clock_settime) ++weak_alias (__clock_settime, clock_settime) + stub_warning (clock_settime) +Index: b/sysdeps/posix/clock_getres.c +=================================================================== +--- a/sysdeps/posix/clock_getres.c ++++ b/sysdeps/posix/clock_getres.c +@@ -76,7 +76,7 @@ realtime_getres (struct timespec *res) + + /* Get resolution of clock. */ + int +-clock_getres (clockid_t clock_id, struct timespec *res) ++__clock_getres (clockid_t clock_id, struct timespec *res) + { + int retval = -1; + +@@ -115,4 +115,4 @@ clock_getres (clockid_t clock_id, struct + + return retval; + } +-strong_alias (clock_getres, __clock_getres) ++weak_alias (__clock_getres, clock_getres) +Index: b/sysdeps/unix/clock_gettime.c +=================================================================== +--- a/sysdeps/unix/clock_gettime.c ++++ b/sysdeps/unix/clock_gettime.c +@@ -89,7 +89,7 @@ realtime_gettime (struct timespec *tp) + + /* Get current value of CLOCK and store it in TP. */ + int +-clock_gettime (clockid_t clock_id, struct timespec *tp) ++__clock_gettime (clockid_t clock_id, struct timespec *tp) + { + int retval = -1; + +@@ -132,4 +132,5 @@ clock_gettime (clockid_t clock_id, struc + + return retval; + } +-strong_alias (clock_gettime, __clock_gettime) ++weak_alias (__clock_gettime, clock_gettime) ++libc_hidden_def (__clock_gettime) +Index: b/sysdeps/unix/clock_nanosleep.c +=================================================================== +--- a/sysdeps/unix/clock_nanosleep.c ++++ b/sysdeps/unix/clock_nanosleep.c +@@ -39,8 +39,8 @@ + /* This implementation assumes that these is only a `nanosleep' system + call. So we have to remap all other activities. */ + int +-clock_nanosleep (clockid_t clock_id, int flags, const struct timespec *req, +- struct timespec *rem) ++__clock_nanosleep (clockid_t clock_id, int flags, const struct timespec *req, ++ struct timespec *rem) + { + struct timespec now; + +@@ -98,4 +98,4 @@ clock_nanosleep (clockid_t clock_id, int + + return __builtin_expect (nanosleep (req, rem), 0) ? errno : 0; + } +-strong_alias (clock_nanosleep, __clock_nanosleep) ++weak_alias (__clock_nanosleep, clock_nanosleep) +Index: b/sysdeps/unix/clock_settime.c +=================================================================== +--- a/sysdeps/unix/clock_settime.c ++++ b/sysdeps/unix/clock_settime.c +@@ -72,7 +72,7 @@ hp_timing_settime (clockid_t clock_id, c + + /* Set CLOCK to value TP. */ + int +-clock_settime (clockid_t clock_id, const struct timespec *tp) ++__clock_settime (clockid_t clock_id, const struct timespec *tp) + { + int retval; + +@@ -124,4 +124,4 @@ clock_settime (clockid_t clock_id, const + + return retval; + } +-strong_alias (clock_settime, __clock_settime) ++weak_alias (__clock_settime, clock_settime) +Index: b/sysdeps/unix/sysv/linux/clock_getcpuclockid.c +=================================================================== +--- a/sysdeps/unix/sysv/linux/clock_getcpuclockid.c ++++ b/sysdeps/unix/sysv/linux/clock_getcpuclockid.c +@@ -23,7 +23,7 @@ + #include "kernel-posix-cpu-timers.h" + + int +-clock_getcpuclockid (pid_t pid, clockid_t *clock_id) ++__clock_getcpuclockid (pid_t pid, clockid_t *clock_id) + { + /* The clockid_t value is a simple computation from the PID. + But we do a clock_getres call to validate it. */ +@@ -46,4 +46,4 @@ clock_getcpuclockid (pid_t pid, clockid_ + else + return INTERNAL_SYSCALL_ERRNO (r, err); + } +-strong_alias (clock_getcpuclockid, __clock_getcpuclockid) ++weak_alias (__clock_getcpuclockid, clock_getcpuclockid) +Index: b/sysdeps/unix/sysv/linux/clock_nanosleep.c +=================================================================== +--- a/sysdeps/unix/sysv/linux/clock_nanosleep.c ++++ b/sysdeps/unix/sysv/linux/clock_nanosleep.c +@@ -26,8 +26,8 @@ + /* We can simply use the syscall. The CPU clocks are not supported + with this function. */ + int +-clock_nanosleep (clockid_t clock_id, int flags, const struct timespec *req, +- struct timespec *rem) ++__clock_nanosleep (clockid_t clock_id, int flags, const struct timespec *req, ++ struct timespec *rem) + { + INTERNAL_SYSCALL_DECL (err); + int r; +@@ -52,4 +52,4 @@ clock_nanosleep (clockid_t clock_id, int + return (INTERNAL_SYSCALL_ERROR_P (r, err) + ? INTERNAL_SYSCALL_ERRNO (r, err) : 0); + } +-strong_alias (clock_nanosleep, __clock_nanosleep) ++weak_alias (__clock_nanosleep, clock_nanosleep) diff --git a/SOURCES/glibc-rh1228114-2.patch b/SOURCES/glibc-rh1228114-2.patch new file mode 100644 index 0000000..f9d1876 --- /dev/null +++ b/SOURCES/glibc-rh1228114-2.patch @@ -0,0 +1,1552 @@ +commit cf0bd2f73bd65beab613865bba567d7787836888 +Author: Florian Weimer +Date: Tue Feb 28 15:28:45 2017 +0100 + + sunrpc: Improvements for UDP client timeout handling [BZ #20257] + + This commit fixes various aspects in the UDP client timeout handling. + Timeouts are now applied in a more consistent fashion. Discarded UDP + packets no longer prevent the timeout from happening at all. + +Index: b/inet/deadline.c +=================================================================== +--- /dev/null ++++ b/inet/deadline.c +@@ -0,0 +1,122 @@ ++/* Computing deadlines for timeouts. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++struct deadline_current_time internal_function ++__deadline_current_time (void) ++{ ++ struct deadline_current_time result; ++ if (__clock_gettime (CLOCK_MONOTONIC, &result.current) != 0) ++ { ++ struct timeval current_tv; ++ if (__gettimeofday (¤t_tv, NULL) == 0) ++ __libc_fatal ("Fatal error: gettimeofday system call failed\n"); ++ result.current.tv_sec = current_tv.tv_sec; ++ result.current.tv_nsec = current_tv.tv_usec * 1000; ++ } ++ assert (result.current.tv_sec >= 0); ++ return result; ++} ++ ++/* A special deadline value for which __deadline_is_infinite is ++ true. */ ++static inline struct deadline ++infinite_deadline (void) ++{ ++ return (struct deadline) { { -1, -1 } }; ++} ++ ++struct deadline internal_function ++__deadline_from_timeval (struct deadline_current_time current, ++ struct timeval tv) ++{ ++ assert (__is_timeval_valid_timeout (tv)); ++ ++ /* Compute second-based deadline. Perform the addition in ++ uintmax_t, which is unsigned, to simply overflow detection. */ ++ uintmax_t sec = current.current.tv_sec; ++ sec += tv.tv_sec; ++ if (sec < (uintmax_t) tv.tv_sec) ++ return infinite_deadline (); ++ ++ /* Compute nanosecond deadline. */ ++ int nsec = current.current.tv_nsec + tv.tv_usec * 1000; ++ if (nsec >= 1000 * 1000 * 1000) ++ { ++ /* Carry nanosecond overflow to seconds. */ ++ nsec -= 1000 * 1000 * 1000; ++ if (sec + 1 < sec) ++ return infinite_deadline (); ++ ++sec; ++ } ++ /* This uses a GCC extension, otherwise these casts for detecting ++ overflow would not be defined. */ ++ if ((time_t) sec < 0 || sec != (uintmax_t) (time_t) sec) ++ return infinite_deadline (); ++ ++ return (struct deadline) { { sec, nsec } }; ++} ++ ++int internal_function ++__deadline_to_ms (struct deadline_current_time current, ++ struct deadline deadline) ++{ ++ if (__deadline_is_infinite (deadline)) ++ return INT_MAX; ++ ++ if (current.current.tv_sec > deadline.absolute.tv_sec ++ || (current.current.tv_sec == deadline.absolute.tv_sec ++ && current.current.tv_nsec >= deadline.absolute.tv_nsec)) ++ return 0; ++ time_t sec = deadline.absolute.tv_sec - current.current.tv_sec; ++ if (sec >= INT_MAX) ++ /* This value will overflow below. */ ++ return INT_MAX; ++ int nsec = deadline.absolute.tv_nsec - current.current.tv_nsec; ++ if (nsec < 0) ++ { ++ /* Borrow from the seconds field. */ ++ assert (sec > 0); ++ --sec; ++ nsec += 1000 * 1000 * 1000; ++ } ++ ++ /* Prepare for rounding up to milliseconds. */ ++ nsec += 999999; ++ if (nsec > 1000 * 1000 * 1000) ++ { ++ assert (sec < INT_MAX); ++ ++sec; ++ nsec -= 1000 * 1000 * 1000; ++ } ++ ++ unsigned int msec = nsec / (1000 * 1000); ++ if (sec > INT_MAX / 1000) ++ return INT_MAX; ++ msec += sec * 1000; ++ if (msec > INT_MAX) ++ return INT_MAX; ++ return msec; ++} +Index: b/inet/tst-deadline.c +=================================================================== +--- /dev/null ++++ b/inet/tst-deadline.c +@@ -0,0 +1,188 @@ ++/* Tests for computing deadlines for timeouts. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* Find the maximum value which can be represented in a time_t. */ ++static time_t ++time_t_max (void) ++{ ++ _Static_assert (0 > (time_t) -1, "time_t is signed"); ++ uintmax_t current = 1; ++ while (true) ++ { ++ uintmax_t next = current * 2; ++ /* This cannot happen because time_t is signed. */ ++ TEST_VERIFY_EXIT (next > current); ++ ++next; ++ if ((time_t) next < 0 || next != (uintmax_t) (time_t) next) ++ /* Value cannot be represented in time_t. Return the previous ++ value. */ ++ return current; ++ current = next; ++ } ++} ++ ++static int ++do_test (void) ++{ ++ { ++ struct deadline_current_time current_time = __deadline_current_time (); ++ TEST_VERIFY (current_time.current.tv_sec >= 0); ++ current_time = __deadline_current_time (); ++ /* Due to CLOCK_MONOTONIC, either seconds or nanoseconds are ++ greater than zero. This is also true for the gettimeofday ++ fallback. */ ++ TEST_VERIFY (current_time.current.tv_sec >= 0); ++ TEST_VERIFY (current_time.current.tv_sec > 0 ++ || current_time.current.tv_nsec > 0); ++ } ++ ++ /* Check basic computations of deadlines. */ ++ struct deadline_current_time current_time = { { 1, 123456789 } }; ++ struct deadline deadline = __deadline_from_timeval ++ (current_time, (struct timeval) { 0, 1 }); ++ TEST_VERIFY (deadline.absolute.tv_sec == 1); ++ TEST_VERIFY (deadline.absolute.tv_nsec == 123457789); ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 1); ++ ++ deadline = __deadline_from_timeval ++ (current_time, ((struct timeval) { 0, 2 })); ++ TEST_VERIFY (deadline.absolute.tv_sec == 1); ++ TEST_VERIFY (deadline.absolute.tv_nsec == 123458789); ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 1); ++ ++ deadline = __deadline_from_timeval ++ (current_time, ((struct timeval) { 1, 0 })); ++ TEST_VERIFY (deadline.absolute.tv_sec == 2); ++ TEST_VERIFY (deadline.absolute.tv_nsec == 123456789); ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 1000); ++ ++ /* Check if timeouts are correctly rounded up to the next ++ millisecond. */ ++ for (int i = 0; i < 999999; ++i) ++ { ++ ++current_time.current.tv_nsec; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 1000); ++ } ++ ++ /* A full millisecond has elapsed, so the time to the deadline is ++ now less than 1000. */ ++ ++current_time.current.tv_nsec; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 999); ++ ++ /* Check __deadline_to_ms carry-over. */ ++ current_time = (struct deadline_current_time) { { 9, 123456789 } }; ++ deadline = (struct deadline) { { 10, 122456789 } }; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 999); ++ deadline = (struct deadline) { { 10, 122456790 } }; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 1000); ++ deadline = (struct deadline) { { 10, 123456788 } }; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 1000); ++ deadline = (struct deadline) { { 10, 123456789 } }; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 1000); ++ ++ /* Check __deadline_to_ms overflow. */ ++ deadline = (struct deadline) { { INT_MAX - 1, 1 } }; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == INT_MAX); ++ ++ /* Check __deadline_to_ms for elapsed deadlines. */ ++ current_time = (struct deadline_current_time) { { 9, 123456789 } }; ++ deadline.absolute = current_time.current; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 0); ++ current_time = (struct deadline_current_time) { { 9, 123456790 } }; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 0); ++ current_time = (struct deadline_current_time) { { 10, 0 } }; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 0); ++ current_time = (struct deadline_current_time) { { 10, 123456788 } }; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 0); ++ current_time = (struct deadline_current_time) { { 10, 123456789 } }; ++ TEST_VERIFY (__deadline_to_ms (current_time, deadline) == 0); ++ ++ /* Check carry-over in __deadline_from_timeval. */ ++ current_time = (struct deadline_current_time) { { 9, 998000001 } }; ++ for (int i = 0; i < 2000; ++i) ++ { ++ deadline = __deadline_from_timeval ++ (current_time, (struct timeval) { 1, i }); ++ TEST_VERIFY (deadline.absolute.tv_sec == 10); ++ TEST_VERIFY (deadline.absolute.tv_nsec == 998000001 + i * 1000); ++ } ++ for (int i = 2000; i < 3000; ++i) ++ { ++ deadline = __deadline_from_timeval ++ (current_time, (struct timeval) { 2, i }); ++ TEST_VERIFY (deadline.absolute.tv_sec == 12); ++ TEST_VERIFY (deadline.absolute.tv_nsec == 1 + (i - 2000) * 1000); ++ } ++ ++ /* Check infinite deadlines. */ ++ deadline = __deadline_from_timeval ++ ((struct deadline_current_time) { { 0, 1000 * 1000 * 1000 - 1000 } }, ++ (struct timeval) { time_t_max (), 1 }); ++ TEST_VERIFY (__deadline_is_infinite (deadline)); ++ deadline = __deadline_from_timeval ++ ((struct deadline_current_time) { { 0, 1000 * 1000 * 1000 - 1001 } }, ++ (struct timeval) { time_t_max (), 1 }); ++ TEST_VERIFY (!__deadline_is_infinite (deadline)); ++ deadline = __deadline_from_timeval ++ ((struct deadline_current_time) ++ { { time_t_max (), 1000 * 1000 * 1000 - 1000 } }, ++ (struct timeval) { 0, 1 }); ++ TEST_VERIFY (__deadline_is_infinite (deadline)); ++ deadline = __deadline_from_timeval ++ ((struct deadline_current_time) ++ { { time_t_max () / 2 + 1, 0 } }, ++ (struct timeval) { time_t_max () / 2 + 1, 0 }); ++ TEST_VERIFY (__deadline_is_infinite (deadline)); ++ ++ /* Check __deadline_first behavior. */ ++ deadline = __deadline_first ++ ((struct deadline) { { 1, 2 } }, ++ (struct deadline) { { 1, 3 } }); ++ TEST_VERIFY (deadline.absolute.tv_sec == 1); ++ TEST_VERIFY (deadline.absolute.tv_nsec == 2); ++ deadline = __deadline_first ++ ((struct deadline) { { 1, 3 } }, ++ (struct deadline) { { 1, 2 } }); ++ TEST_VERIFY (deadline.absolute.tv_sec == 1); ++ TEST_VERIFY (deadline.absolute.tv_nsec == 2); ++ deadline = __deadline_first ++ ((struct deadline) { { 1, 2 } }, ++ (struct deadline) { { 2, 1 } }); ++ TEST_VERIFY (deadline.absolute.tv_sec == 1); ++ TEST_VERIFY (deadline.absolute.tv_nsec == 2); ++ deadline = __deadline_first ++ ((struct deadline) { { 1, 2 } }, ++ (struct deadline) { { 2, 4 } }); ++ TEST_VERIFY (deadline.absolute.tv_sec == 1); ++ TEST_VERIFY (deadline.absolute.tv_nsec == 2); ++ deadline = __deadline_first ++ ((struct deadline) { { 2, 4 } }, ++ (struct deadline) { { 1, 2 } }); ++ TEST_VERIFY (deadline.absolute.tv_sec == 1); ++ TEST_VERIFY (deadline.absolute.tv_nsec == 2); ++ ++ return 0; ++} ++ ++#include +Index: b/sunrpc/Makefile +=================================================================== +--- a/sunrpc/Makefile ++++ b/sunrpc/Makefile +@@ -96,11 +96,13 @@ others += rpcgen + + all: # Make this the default target; it will be defined in Rules. + +-tests = tst-xdrmem tst-xdrmem2 ++tests = tst-xdrmem tst-xdrmem2 tst-udp-timeout \ ++ tst-udp-nonblocking + xtests := tst-getmyaddr + + ifeq ($(have-thread-library),yes) + xtests += thrsvc ++tests += tst-udp-garbage + endif + + headers += $(rpcsvc:%.x=rpcsvc/%.h) +@@ -225,3 +227,8 @@ endif + endif + + $(objpfx)thrsvc: $(common-objpfx)linkobj/libc.so $(shared-thread-library) ++ ++$(objpfx)tst-udp-timeout: $(common-objpfx)linkobj/libc.so ++$(objpfx)tst-udp-nonblocking: $(common-objpfx)linkobj/libc.so ++$(objpfx)tst-udp-garbage: \ ++ $(common-objpfx)linkobj/libc.so $(shared-thread-library) +Index: b/sunrpc/clnt_udp.c +=================================================================== +--- a/sunrpc/clnt_udp.c ++++ b/sunrpc/clnt_udp.c +@@ -54,6 +54,7 @@ + #endif + + #include ++#include + + extern u_long _create_xid (void); + +@@ -79,7 +80,9 @@ static const struct clnt_ops udp_ops = + }; + + /* +- * Private data kept per client handle ++ * Private data kept per client handle. This private struct is ++ * unfortunately part of the ABI; ypbind contains a copy of it and ++ * accesses it through CLIENT::cl_private field. + */ + struct cu_data + { +@@ -309,28 +312,38 @@ clntudp_call (cl, proc, xargs, argsp, xr + int inlen; + socklen_t fromlen; + struct pollfd fd; +- int milliseconds = (cu->cu_wait.tv_sec * 1000) + +- (cu->cu_wait.tv_usec / 1000); + struct sockaddr_in from; + struct rpc_msg reply_msg; + XDR reply_xdrs; +- struct timeval time_waited; + bool_t ok; + int nrefreshes = 2; /* number of times to refresh cred */ +- struct timeval timeout; + int anyup; /* any network interface up */ + +- if (cu->cu_total.tv_usec == -1) +- { +- timeout = utimeout; /* use supplied timeout */ +- } +- else +- { +- timeout = cu->cu_total; /* use default timeout */ +- } ++ struct deadline_current_time current_time = __deadline_current_time (); ++ struct deadline total_deadline; /* Determined once by overall timeout. */ ++ struct deadline response_deadline; /* Determined anew for each query. */ ++ ++ /* Choose the timeout value. For non-sending usage (xargs == NULL), ++ the total deadline does not matter, only cu->cu_wait is used ++ below. */ ++ if (xargs != NULL) ++ { ++ struct timeval tv; ++ if (cu->cu_total.tv_usec == -1) ++ /* Use supplied timeout. */ ++ tv = utimeout; ++ else ++ /* Use default timeout. */ ++ tv = cu->cu_total; ++ if (!__is_timeval_valid_timeout (tv)) ++ return (cu->cu_error.re_status = RPC_TIMEDOUT); ++ total_deadline = __deadline_from_timeval (current_time, tv); ++ } ++ ++ /* Guard against bad timeout specification. */ ++ if (!__is_timeval_valid_timeout (cu->cu_wait)) ++ return (cu->cu_error.re_status = RPC_TIMEDOUT); + +- time_waited.tv_sec = 0; +- time_waited.tv_usec = 0; + call_again: + xdrs = &(cu->cu_outxdrs); + if (xargs == NULL) +@@ -356,27 +369,46 @@ send_again: + return (cu->cu_error.re_status = RPC_CANTSEND); + } + +- /* +- * Hack to provide rpc-based message passing +- */ +- if (timeout.tv_sec == 0 && timeout.tv_usec == 0) +- { +- return (cu->cu_error.re_status = RPC_TIMEDOUT); +- } ++ /* sendto may have blocked, so recompute the current time. */ ++ current_time = __deadline_current_time (); + get_reply: +- /* +- * sub-optimal code appears here because we have +- * some clock time to spare while the packets are in flight. +- * (We assume that this is actually only executed once.) +- */ ++ response_deadline = __deadline_from_timeval (current_time, cu->cu_wait); ++ + reply_msg.acpted_rply.ar_verf = _null_auth; + reply_msg.acpted_rply.ar_results.where = resultsp; + reply_msg.acpted_rply.ar_results.proc = xresults; + fd.fd = cu->cu_sock; + fd.events = POLLIN; + anyup = 0; ++ ++ /* Per-response retry loop. current_time must be up-to-date at the ++ top of the loop. */ + for (;;) + { ++ int milliseconds; ++ if (xargs != NULL) ++ { ++ if (__deadline_elapsed (current_time, total_deadline)) ++ /* Overall timeout expired. */ ++ return (cu->cu_error.re_status = RPC_TIMEDOUT); ++ milliseconds = __deadline_to_ms ++ (current_time, __deadline_first (total_deadline, ++ response_deadline)); ++ if (milliseconds == 0) ++ /* Per-query timeout expired. */ ++ goto send_again; ++ } ++ else ++ { ++ /* xatgs == NULL. Collect a response without sending a ++ query. In this mode, we need to ignore the total ++ deadline. */ ++ milliseconds = __deadline_to_ms (current_time, response_deadline); ++ if (milliseconds == 0) ++ /* Cannot send again, so bail out. */ ++ return (cu->cu_error.re_status = RPC_CANTSEND); ++ } ++ + switch (__poll (&fd, 1, milliseconds)) + { + +@@ -387,27 +419,10 @@ send_again: + if (!anyup) + return (cu->cu_error.re_status = RPC_CANTRECV); + } +- +- time_waited.tv_sec += cu->cu_wait.tv_sec; +- time_waited.tv_usec += cu->cu_wait.tv_usec; +- while (time_waited.tv_usec >= 1000000) +- { +- time_waited.tv_sec++; +- time_waited.tv_usec -= 1000000; +- } +- if ((time_waited.tv_sec < timeout.tv_sec) || +- ((time_waited.tv_sec == timeout.tv_sec) && +- (time_waited.tv_usec < timeout.tv_usec))) +- goto send_again; +- return (cu->cu_error.re_status = RPC_TIMEDOUT); +- +- /* +- * buggy in other cases because time_waited is not being +- * updated. +- */ ++ goto next_response; + case -1: + if (errno == EINTR) +- continue; ++ goto next_response; + cu->cu_error.re_errno = errno; + return (cu->cu_error.re_status = RPC_CANTRECV); + } +@@ -463,20 +478,22 @@ send_again: + if (inlen < 0) + { + if (errno == EWOULDBLOCK) +- continue; ++ goto next_response; + cu->cu_error.re_errno = errno; + return (cu->cu_error.re_status = RPC_CANTRECV); + } +- if (inlen < 4) +- continue; ++ /* Accept the response if the packet is sufficiently long and ++ the transaction ID matches the query (if available). */ ++ if (inlen >= 4 ++ && (xargs == NULL ++ || memcmp (cu->cu_inbuf, cu->cu_outbuf, ++ sizeof (u_int32_t)) == 0)) ++ break; + +- /* see if reply transaction id matches sent id. +- Don't do this if we only wait for a replay */ +- if (xargs != NULL +- && memcmp (cu->cu_inbuf, cu->cu_outbuf, sizeof (u_int32_t)) != 0) +- continue; +- /* we now assume we have the proper reply */ +- break; ++ next_response: ++ /* Update the current time because poll and recvmsg waited for ++ an unknown time. */ ++ current_time = __deadline_current_time (); + } + + /* +Index: b/sunrpc/tst-udp-garbage.c +=================================================================== +--- /dev/null ++++ b/sunrpc/tst-udp-garbage.c +@@ -0,0 +1,104 @@ ++/* Test that garbage packets do not affect timeout handling. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Descriptor for the server UDP socket. */ ++static int server_fd; ++ ++static void * ++garbage_sender_thread (void *unused) ++{ ++ while (true) ++ { ++ struct sockaddr_storage sa; ++ socklen_t salen = sizeof (sa); ++ char buf[1]; ++ if (recvfrom (server_fd, buf, sizeof (buf), 0, ++ (struct sockaddr *) &sa, &salen) < 0) ++ FAIL_EXIT1 ("recvfrom: %m"); ++ ++ /* Send garbage packets indefinitely. */ ++ buf[0] = 0; ++ while (true) ++ { ++ /* sendto can fail if the client closed the socket. */ ++ if (sendto (server_fd, buf, sizeof (buf), 0, ++ (struct sockaddr *) &sa, salen) < 0) ++ break; ++ ++ /* Wait a bit, to avoid burning too many CPU cycles in a ++ tight loop. The wait period must be much shorter than ++ the client timeouts configured below. */ ++ usleep (50 * 1000); ++ } ++ } ++} ++ ++static int ++do_test (void) ++{ ++ support_become_root (); ++ support_enter_network_namespace (); ++ ++ server_fd = xsocket (AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, IPPROTO_UDP); ++ struct sockaddr_in server_address = ++ { ++ .sin_family = AF_INET, ++ .sin_addr.s_addr = htonl (INADDR_LOOPBACK), ++ }; ++ xbind (server_fd, ++ (struct sockaddr *) &server_address, sizeof (server_address)); ++ { ++ socklen_t sinlen = sizeof (server_address); ++ xgetsockname (server_fd, (struct sockaddr *) &server_address, &sinlen); ++ TEST_VERIFY (sizeof (server_address) == sinlen); ++ } ++ ++ /* Garbage packet source. */ ++ xpthread_detach (xpthread_create (NULL, garbage_sender_thread, NULL)); ++ ++ /* Test client. Use an arbitrary timeout of one second, which is ++ much longer than the garbage packet interval, but still ++ reasonably short, so that the test completes quickly. */ ++ int client_fd = RPC_ANYSOCK; ++ CLIENT *clnt = clntudp_create (&server_address, ++ 1, 2, /* Arbitrary RPC endpoint numbers. */ ++ (struct timeval) { 1, 0 }, ++ &client_fd); ++ if (clnt == NULL) ++ FAIL_EXIT1 ("clntudp_create: %m"); ++ ++ TEST_VERIFY (clnt_call (clnt, 3, /* Arbitrary RPC procedure number. */ ++ (xdrproc_t) xdr_void, NULL, ++ (xdrproc_t) xdr_void, NULL, ++ ((struct timeval) { 1, 0 }))); ++ ++ return 0; ++} ++ ++#include +Index: b/sunrpc/tst-udp-nonblocking.c +=================================================================== +--- /dev/null ++++ b/sunrpc/tst-udp-nonblocking.c +@@ -0,0 +1,333 @@ ++/* Test non-blocking use of the UDP client. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Test data serialization and deserialization. */ ++ ++struct test_query ++{ ++ uint32_t a; ++ uint32_t b; ++ uint32_t timeout_ms; ++}; ++ ++static bool_t ++xdr_test_query (XDR *xdrs, void *data, ...) ++{ ++ struct test_query *p = data; ++ return xdr_uint32_t (xdrs, &p->a) ++ && xdr_uint32_t (xdrs, &p->b) ++ && xdr_uint32_t (xdrs, &p->timeout_ms); ++} ++ ++struct test_response ++{ ++ uint32_t server_id; ++ uint32_t seq; ++ uint32_t sum; ++}; ++ ++static bool_t ++xdr_test_response (XDR *xdrs, void *data, ...) ++{ ++ struct test_response *p = data; ++ return xdr_uint32_t (xdrs, &p->server_id) ++ && xdr_uint32_t (xdrs, &p->seq) ++ && xdr_uint32_t (xdrs, &p->sum); ++} ++ ++/* Implementation of the test server. */ ++ ++enum ++ { ++ /* Number of test servers to run. */ ++ SERVER_COUNT = 3, ++ ++ /* RPC parameters, chosen at random. */ ++ PROGNUM = 8242, ++ VERSNUM = 19654, ++ ++ /* Main RPC operation. */ ++ PROC_ADD = 1, ++ ++ /* Request process termination. */ ++ PROC_EXIT, ++ ++ /* Special exit status to mark successful processing. */ ++ EXIT_MARKER = 55, ++ }; ++ ++/* Set by the parent process to tell test servers apart. */ ++static int server_id; ++ ++/* Implementation of the test server. */ ++static void ++server_dispatch (struct svc_req *request, SVCXPRT *transport) ++{ ++ /* Query sequence number. */ ++ static uint32_t seq = 0; ++ ++seq; ++ static bool proc_add_seen; ++ ++ if (test_verbose) ++ printf ("info: server_dispatch server_id=%d seq=%u rq_proc=%lu\n", ++ server_id, seq, request->rq_proc); ++ ++ switch (request->rq_proc) ++ { ++ case PROC_ADD: ++ { ++ struct test_query query; ++ memset (&query, 0xc0, sizeof (query)); ++ TEST_VERIFY_EXIT ++ (svc_getargs (transport, xdr_test_query, ++ (void *) &query)); ++ ++ if (test_verbose) ++ printf (" a=%u b=%u timeout_ms=%u\n", ++ query.a, query.b, query.timeout_ms); ++ ++ usleep (query.timeout_ms * 1000); ++ ++ struct test_response response = ++ { ++ .server_id = server_id, ++ .seq = seq, ++ .sum = query.a + query.b, ++ }; ++ TEST_VERIFY (svc_sendreply (transport, xdr_test_response, ++ (void *) &response)); ++ if (test_verbose) ++ printf (" server id %d response seq=%u sent\n", server_id, seq); ++ proc_add_seen = true; ++ } ++ break; ++ ++ case PROC_EXIT: ++ TEST_VERIFY (proc_add_seen); ++ TEST_VERIFY (svc_sendreply (transport, (xdrproc_t) xdr_void, NULL)); ++ _exit (EXIT_MARKER); ++ break; ++ ++ default: ++ FAIL_EXIT1 ("invalid rq_proc value: %lu", request->rq_proc); ++ break; ++ } ++} ++ ++/* Return the number seconds since an arbitrary point in time. */ ++static double ++get_ticks (void) ++{ ++ { ++ struct timespec ts; ++ if (clock_gettime (CLOCK_MONOTONIC, &ts) == 0) ++ return ts.tv_sec + ts.tv_nsec * 1e-9; ++ } ++ { ++ struct timeval tv; ++ TEST_VERIFY_EXIT (gettimeofday (&tv, NULL) == 0); ++ return tv.tv_sec + tv.tv_usec * 1e-6; ++ } ++} ++ ++static int ++do_test (void) ++{ ++ support_become_root (); ++ support_enter_network_namespace (); ++ ++ /* Information about the test servers. */ ++ struct ++ { ++ SVCXPRT *transport; ++ struct sockaddr_in address; ++ pid_t pid; ++ uint32_t xid; ++ } servers[SERVER_COUNT]; ++ ++ /* Spawn the test servers. */ ++ for (int i = 0; i < SERVER_COUNT; ++i) ++ { ++ servers[i].transport = svcudp_create (RPC_ANYSOCK); ++ TEST_VERIFY_EXIT (servers[i].transport != NULL); ++ servers[i].address = (struct sockaddr_in) ++ { ++ .sin_family = AF_INET, ++ .sin_addr.s_addr = htonl (INADDR_LOOPBACK), ++ .sin_port = htons (servers[i].transport->xp_port), ++ }; ++ servers[i].xid = 0xabcd0101 + i; ++ if (test_verbose) ++ printf ("info: setting up server %d xid=%x on port %d\n", ++ i, servers[i].xid, servers[i].transport->xp_port); ++ ++ server_id = i; ++ servers[i].pid = xfork (); ++ if (servers[i].pid == 0) ++ { ++ TEST_VERIFY (svc_register (servers[i].transport, ++ PROGNUM, VERSNUM, server_dispatch, 0)); ++ svc_run (); ++ FAIL_EXIT1 ("supposed to be unreachable"); ++ } ++ /* We need to close the socket so that we do not accidentally ++ consume the request. */ ++ TEST_VERIFY (close (servers[i].transport->xp_sock) == 0); ++ } ++ ++ ++ /* The following code mirrors what ypbind does. */ ++ ++ /* Copied from clnt_udp.c (like ypbind). */ ++ struct cu_data ++ { ++ int cu_sock; ++ bool_t cu_closeit; ++ struct sockaddr_in cu_raddr; ++ int cu_rlen; ++ struct timeval cu_wait; ++ struct timeval cu_total; ++ struct rpc_err cu_error; ++ XDR cu_outxdrs; ++ u_int cu_xdrpos; ++ u_int cu_sendsz; ++ char *cu_outbuf; ++ u_int cu_recvsz; ++ char cu_inbuf[1]; ++ }; ++ ++ int client_socket = xsocket (AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); ++ CLIENT *clnt = clntudp_create (&servers[0].address, PROGNUM, VERSNUM, ++ /* 5 seconds per-response timeout. */ ++ ((struct timeval) { 5, 0 }), ++ &client_socket); ++ TEST_VERIFY (clnt != NULL); ++ clnt->cl_auth = authunix_create_default (); ++ { ++ struct timeval zero = { 0, 0 }; ++ TEST_VERIFY (clnt_control (clnt, CLSET_TIMEOUT, (void *) &zero)); ++ } ++ ++ /* Poke at internal data structures (like ypbind). */ ++ struct cu_data *cu = (struct cu_data *) clnt->cl_private; ++ ++ /* Send a ping to each server. */ ++ double before_pings = get_ticks (); ++ for (int i = 0; i < SERVER_COUNT; ++i) ++ { ++ if (test_verbose) ++ printf ("info: sending server %d ping\n", i); ++ /* Reset the xid because it is changed by each invocation of ++ clnt_call. Subtract one to compensate for the xid update ++ during the call. */ ++ *((u_int32_t *) (cu->cu_outbuf)) = servers[i].xid - 1; ++ cu->cu_raddr = servers[i].address; ++ ++ struct test_query query = { .a = 100, .b = i + 1 }; ++ if (i == 1) ++ /* Shorter timeout to prefer this server. These timeouts must ++ be much shorter than the 5-second per-response timeout ++ configured with clntudp_create. */ ++ query.timeout_ms = 700; ++ else ++ query.timeout_ms = 1400; ++ struct test_response response = { 0 }; ++ /* NB: Do not check the return value. The server reply will ++ prove that the call worked. */ ++ double before_one_ping = get_ticks (); ++ clnt_call (clnt, PROC_ADD, ++ xdr_test_query, (void *) &query, ++ xdr_test_response, (void *) &response, ++ ((struct timeval) { 0, 0 })); ++ double after_one_ping = get_ticks (); ++ if (test_verbose) ++ printf ("info: non-blocking send took %f seconds\n", ++ after_one_ping - before_one_ping); ++ /* clnt_call should return immediately. Accept some delay in ++ case the process is descheduled. */ ++ TEST_VERIFY (after_one_ping - before_one_ping < 0.3); ++ } ++ ++ /* Collect the non-blocking response. */ ++ if (test_verbose) ++ printf ("info: collecting response\n"); ++ struct test_response response = { 0 }; ++ TEST_VERIFY ++ (clnt_call (clnt, PROC_ADD, NULL, NULL, ++ xdr_test_response, (void *) &response, ++ ((struct timeval) { 0, 0 })) == RPC_SUCCESS); ++ double after_pings = get_ticks (); ++ if (test_verbose) ++ printf ("info: send/receive took %f seconds\n", ++ after_pings - before_pings); ++ /* Expected timeout is 0.7 seconds. */ ++ TEST_VERIFY (0.7 <= after_pings - before_pings); ++ TEST_VERIFY (after_pings - before_pings < 1.2); ++ ++ uint32_t xid; ++ memcpy (&xid, &cu->cu_inbuf, sizeof (xid)); ++ if (test_verbose) ++ printf ("info: non-blocking response: xid=%x server_id=%u seq=%u sum=%u\n", ++ xid, response.server_id, response.seq, response.sum); ++ /* Check that the reply from the preferred server was used. */ ++ TEST_VERIFY (servers[1].xid == xid); ++ TEST_VERIFY (response.server_id == 1); ++ TEST_VERIFY (response.seq == 1); ++ TEST_VERIFY (response.sum == 102); ++ ++ auth_destroy (clnt->cl_auth); ++ clnt_destroy (clnt); ++ ++ for (int i = 0; i < SERVER_COUNT; ++i) ++ { ++ if (test_verbose) ++ printf ("info: requesting server %d termination\n", i); ++ client_socket = RPC_ANYSOCK; ++ clnt = clntudp_create (&servers[i].address, PROGNUM, VERSNUM, ++ ((struct timeval) { 5, 0 }), ++ &client_socket); ++ TEST_VERIFY_EXIT (clnt != NULL); ++ TEST_VERIFY (clnt_call (clnt, PROC_EXIT, ++ (xdrproc_t) xdr_void, NULL, ++ (xdrproc_t) xdr_void, NULL, ++ ((struct timeval) { 3, 0 })) == RPC_SUCCESS); ++ clnt_destroy (clnt); ++ ++ int status; ++ xwaitpid (servers[i].pid, &status, 0); ++ TEST_VERIFY (WIFEXITED (status) && WEXITSTATUS (status) == EXIT_MARKER); ++ } ++ ++ return 0; ++} ++ ++#include +Index: b/sunrpc/tst-udp-timeout.c +=================================================================== +--- /dev/null ++++ b/sunrpc/tst-udp-timeout.c +@@ -0,0 +1,402 @@ ++/* Test timeout handling in the UDP client. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Test data serialization and deserialization. */ ++ ++struct test_query ++{ ++ uint32_t a; ++ uint32_t b; ++ uint32_t timeout_ms; ++ uint32_t wait_for_seq; ++ uint32_t garbage_packets; ++}; ++ ++static bool_t ++xdr_test_query (XDR *xdrs, void *data, ...) ++{ ++ struct test_query *p = data; ++ return xdr_uint32_t (xdrs, &p->a) ++ && xdr_uint32_t (xdrs, &p->b) ++ && xdr_uint32_t (xdrs, &p->timeout_ms) ++ && xdr_uint32_t (xdrs, &p->wait_for_seq) ++ && xdr_uint32_t (xdrs, &p->garbage_packets); ++} ++ ++struct test_response ++{ ++ uint32_t seq; ++ uint32_t sum; ++}; ++ ++static bool_t ++xdr_test_response (XDR *xdrs, void *data, ...) ++{ ++ struct test_response *p = data; ++ return xdr_uint32_t (xdrs, &p->seq) ++ && xdr_uint32_t (xdrs, &p->sum); ++} ++ ++/* Implementation of the test server. */ ++ ++enum ++ { ++ /* RPC parameters, chosen at random. */ ++ PROGNUM = 15717, ++ VERSNUM = 13689, ++ ++ /* Main RPC operation. */ ++ PROC_ADD = 1, ++ ++ /* Reset the sequence number. */ ++ PROC_RESET_SEQ, ++ ++ /* Request process termination. */ ++ PROC_EXIT, ++ ++ /* Special exit status to mark successful processing. */ ++ EXIT_MARKER = 55, ++ }; ++ ++static void ++server_dispatch (struct svc_req *request, SVCXPRT *transport) ++{ ++ /* Query sequence number. */ ++ static uint32_t seq = 0; ++ ++seq; ++ ++ if (test_verbose) ++ printf ("info: server_dispatch seq=%u rq_proc=%lu\n", ++ seq, request->rq_proc); ++ ++ switch (request->rq_proc) ++ { ++ case PROC_ADD: ++ { ++ struct test_query query; ++ memset (&query, 0xc0, sizeof (query)); ++ TEST_VERIFY_EXIT ++ (svc_getargs (transport, xdr_test_query, ++ (void *) &query)); ++ ++ if (test_verbose) ++ printf (" a=%u b=%u timeout_ms=%u wait_for_seq=%u" ++ " garbage_packets=%u\n", ++ query.a, query.b, query.timeout_ms, query.wait_for_seq, ++ query.garbage_packets); ++ ++ if (seq < query.wait_for_seq) ++ { ++ /* No response at this point. */ ++ if (test_verbose) ++ printf (" skipped response\n"); ++ break; ++ } ++ ++ if (query.garbage_packets > 0) ++ { ++ int per_packet_timeout; ++ if (query.timeout_ms > 0) ++ per_packet_timeout ++ = query.timeout_ms * 1000 / query.garbage_packets; ++ else ++ per_packet_timeout = 0; ++ ++ char buf[20]; ++ memset (&buf, 0xc0, sizeof (buf)); ++ for (int i = 0; i < query.garbage_packets; ++i) ++ { ++ /* 13 is relatively prime to 20 = sizeof (buf) + 1, so ++ the len variable will cover the entire interval ++ [0, 20] if query.garbage_packets is sufficiently ++ large. */ ++ size_t len = (i * 13 + 1) % (sizeof (buf) + 1); ++ TEST_VERIFY (sendto (transport->xp_sock, ++ buf, len, MSG_NOSIGNAL, ++ (struct sockaddr *) &transport->xp_raddr, ++ transport->xp_addrlen) == len); ++ if (per_packet_timeout > 0) ++ usleep (per_packet_timeout); ++ } ++ } ++ else if (query.timeout_ms > 0) ++ usleep (query.timeout_ms * 1000); ++ ++ struct test_response response = ++ { ++ .seq = seq, ++ .sum = query.a + query.b, ++ }; ++ TEST_VERIFY (svc_sendreply (transport, xdr_test_response, ++ (void *) &response)); ++ } ++ break; ++ ++ case PROC_RESET_SEQ: ++ seq = 0; ++ TEST_VERIFY (svc_sendreply (transport, (xdrproc_t) xdr_void, NULL)); ++ break; ++ ++ case PROC_EXIT: ++ TEST_VERIFY (svc_sendreply (transport, (xdrproc_t) xdr_void, NULL)); ++ _exit (EXIT_MARKER); ++ break; ++ ++ default: ++ FAIL_EXIT1 ("invalid rq_proc value: %lu", request->rq_proc); ++ break; ++ } ++} ++ ++/* Implementation of the test client. */ ++ ++static struct test_response ++test_call (CLIENT *clnt, int proc, struct test_query query, ++ struct timeval timeout) ++{ ++ if (test_verbose) ++ printf ("info: test_call proc=%d timeout=%lu.%06lu\n", ++ proc, (unsigned long) timeout.tv_sec, ++ (unsigned long) timeout.tv_usec); ++ struct test_response response; ++ TEST_VERIFY_EXIT (clnt_call (clnt, proc, ++ xdr_test_query, (void *) &query, ++ xdr_test_response, (void *) &response, ++ timeout) ++ == RPC_SUCCESS); ++ return response; ++} ++ ++static void ++test_call_timeout (CLIENT *clnt, int proc, struct test_query query, ++ struct timeval timeout) ++{ ++ struct test_response response; ++ TEST_VERIFY (clnt_call (clnt, proc, ++ xdr_test_query, (void *) &query, ++ xdr_test_response, (void *) &response, ++ timeout) ++ == RPC_TIMEDOUT); ++} ++ ++/* Complete one regular RPC call to drain the server socket ++ buffer. Resets the sequence number. */ ++static void ++test_call_flush (CLIENT *clnt) ++{ ++ /* This needs a longer timeout to flush out all pending requests. ++ The choice of 5 seconds is larger than the per-response timeouts ++ requested via the timeout_ms field. */ ++ if (test_verbose) ++ printf ("info: flushing pending queries\n"); ++ TEST_VERIFY_EXIT (clnt_call (clnt, PROC_RESET_SEQ, ++ (xdrproc_t) xdr_void, NULL, ++ (xdrproc_t) xdr_void, NULL, ++ ((struct timeval) { 5, 0 })) ++ == RPC_SUCCESS); ++} ++ ++/* Return the number seconds since an arbitrary point in time. */ ++static double ++get_ticks (void) ++{ ++ { ++ struct timespec ts; ++ if (clock_gettime (CLOCK_MONOTONIC, &ts) == 0) ++ return ts.tv_sec + ts.tv_nsec * 1e-9; ++ } ++ { ++ struct timeval tv; ++ TEST_VERIFY_EXIT (gettimeofday (&tv, NULL) == 0); ++ return tv.tv_sec + tv.tv_usec * 1e-6; ++ } ++} ++ ++static void ++test_udp_server (int port) ++{ ++ struct sockaddr_in sin = ++ { ++ .sin_family = AF_INET, ++ .sin_addr.s_addr = htonl (INADDR_LOOPBACK), ++ .sin_port = htons (port) ++ }; ++ int sock = RPC_ANYSOCK; ++ ++ /* The client uses a 1.5 second timeout for retries. The timeouts ++ are arbitrary, but chosen so that there is a substantial gap ++ between them, but the total time spent waiting is not too ++ large. */ ++ CLIENT *clnt = clntudp_create (&sin, PROGNUM, VERSNUM, ++ (struct timeval) { 1, 500 * 1000 }, ++ &sock); ++ TEST_VERIFY_EXIT (clnt != NULL); ++ ++ /* Basic call/response test. */ ++ struct test_response response = test_call ++ (clnt, PROC_ADD, ++ (struct test_query) { .a = 17, .b = 4 }, ++ (struct timeval) { 3, 0 }); ++ TEST_VERIFY (response.sum == 21); ++ TEST_VERIFY (response.seq == 1); ++ ++ /* Check that garbage packets do not interfere with timeout ++ processing. */ ++ double before = get_ticks (); ++ response = test_call ++ (clnt, PROC_ADD, ++ (struct test_query) { ++ .a = 19, .b = 4, .timeout_ms = 500, .garbage_packets = 21, ++ }, ++ (struct timeval) { 3, 0 }); ++ TEST_VERIFY (response.sum == 23); ++ TEST_VERIFY (response.seq == 2); ++ double after = get_ticks (); ++ if (test_verbose) ++ printf ("info: 21 garbage packets took %f seconds\n", after - before); ++ /* Expected timeout is 0.5 seconds. Add some slack in case process ++ scheduling delays processing the query or response, but do not ++ accept a retry (which would happen at 1.5 seconds). */ ++ TEST_VERIFY (0.5 <= after - before); ++ TEST_VERIFY (after - before < 1.2); ++ test_call_flush (clnt); ++ ++ /* Check that missing a response introduces a 1.5 second timeout, as ++ requested when calling clntudp_create. */ ++ before = get_ticks (); ++ response = test_call ++ (clnt, PROC_ADD, ++ (struct test_query) { .a = 170, .b = 40, .wait_for_seq = 2 }, ++ (struct timeval) { 3, 0 }); ++ TEST_VERIFY (response.sum == 210); ++ TEST_VERIFY (response.seq == 2); ++ after = get_ticks (); ++ if (test_verbose) ++ printf ("info: skipping one response took %f seconds\n", ++ after - before); ++ /* Expected timeout is 1.5 seconds. Do not accept a second retry ++ (which would happen at 3 seconds). */ ++ TEST_VERIFY (1.5 <= after - before); ++ TEST_VERIFY (after - before < 2.9); ++ test_call_flush (clnt); ++ ++ /* Check that the overall timeout wins against the per-query ++ timeout. */ ++ before = get_ticks (); ++ test_call_timeout ++ (clnt, PROC_ADD, ++ (struct test_query) { .a = 170, .b = 41, .wait_for_seq = 2 }, ++ (struct timeval) { 0, 750 * 1000 }); ++ after = get_ticks (); ++ if (test_verbose) ++ printf ("info: 0.75 second timeout took %f seconds\n", ++ after - before); ++ TEST_VERIFY (0.75 <= after - before); ++ TEST_VERIFY (after - before < 1.4); ++ test_call_flush (clnt); ++ ++ for (int with_garbage = 0; with_garbage < 2; ++with_garbage) ++ { ++ /* Check that no response at all causes the client to bail out. */ ++ before = get_ticks (); ++ test_call_timeout ++ (clnt, PROC_ADD, ++ (struct test_query) { ++ .a = 170, .b = 40, .timeout_ms = 1200, ++ .garbage_packets = with_garbage * 21 ++ }, ++ (struct timeval) { 0, 750 * 1000 }); ++ after = get_ticks (); ++ if (test_verbose) ++ printf ("info: test_udp_server: 0.75 second timeout took %f seconds" ++ " (garbage %d)\n", ++ after - before, with_garbage); ++ TEST_VERIFY (0.75 <= after - before); ++ TEST_VERIFY (after - before < 1.4); ++ test_call_flush (clnt); ++ ++ /* As above, but check the total timeout. */ ++ before = get_ticks (); ++ test_call_timeout ++ (clnt, PROC_ADD, ++ (struct test_query) { ++ .a = 170, .b = 40, .timeout_ms = 3000, ++ .garbage_packets = with_garbage * 30 ++ }, ++ (struct timeval) { 2, 300 * 1000 }); ++ after = get_ticks (); ++ if (test_verbose) ++ printf ("info: test_udp_server: 2.3 second timeout took %f seconds" ++ " (garbage %d)\n", ++ after - before, with_garbage); ++ TEST_VERIFY (2.3 <= after - before); ++ TEST_VERIFY (after - before < 3.0); ++ test_call_flush (clnt); ++ } ++ ++ TEST_VERIFY_EXIT (clnt_call (clnt, PROC_EXIT, ++ (xdrproc_t) xdr_void, NULL, ++ (xdrproc_t) xdr_void, NULL, ++ ((struct timeval) { 5, 0 })) ++ == RPC_SUCCESS); ++ clnt_destroy (clnt); ++} ++ ++static int ++do_test (void) ++{ ++ support_become_root (); ++ support_enter_network_namespace (); ++ ++ SVCXPRT *transport = svcudp_create (RPC_ANYSOCK); ++ TEST_VERIFY_EXIT (transport != NULL); ++ TEST_VERIFY (svc_register (transport, PROGNUM, VERSNUM, server_dispatch, 0)); ++ ++ pid_t pid = xfork (); ++ if (pid == 0) ++ { ++ svc_run (); ++ FAIL_EXIT1 ("supposed to be unreachable"); ++ } ++ test_udp_server (transport->xp_port); ++ ++ int status; ++ xwaitpid (pid, &status, 0); ++ TEST_VERIFY (WIFEXITED (status) && WEXITSTATUS (status) == EXIT_MARKER); ++ ++ SVC_DESTROY (transport); ++ return 0; ++} ++ ++/* The minimum run time is around 17 seconds. */ ++#define TIMEOUT 25 ++#include +Index: b/inet/net-internal.h +=================================================================== +--- /dev/null ++++ b/inet/net-internal.h +@@ -0,0 +1,112 @@ ++/* Network-related functions for internal library use. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _NET_INTERNAL_H ++#define _NET_INTERNAL_H 1 ++ ++#include ++#include ++#include ++ ++/* Deadline handling for enforcing timeouts. ++ ++ Code should call __deadline_current_time to obtain the current time ++ and cache it locally. The cache needs updating after every ++ long-running or potentially blocking operation. Deadlines relative ++ to the current time can be computed using __deadline_from_timeval. ++ The deadlines may have to be recomputed in response to certain ++ events (such as an incoming packet), but they are absolute (not ++ relative to the current time). A timeout suitable for use with the ++ poll function can be computed from such a deadline using ++ __deadline_to_ms. ++ ++ The fields in the structs defined belowed should only be used ++ within the implementation. */ ++ ++/* Cache of the current time. Used to compute deadlines from relative ++ timeouts and vice versa. */ ++struct deadline_current_time ++{ ++ struct timespec current; ++}; ++ ++/* Return the current time. Terminates the process if the current ++ time is not available. */ ++struct deadline_current_time __deadline_current_time (void) ++ internal_function attribute_hidden; ++ ++/* Computed absolute deadline. */ ++struct deadline ++{ ++ struct timespec absolute; ++}; ++ ++ ++/* For internal use only. */ ++static inline bool ++__deadline_is_infinite (struct deadline deadline) ++{ ++ return deadline.absolute.tv_nsec < 0; ++} ++ ++/* Return true if the current time is at the deadline or past it. */ ++static inline bool ++__deadline_elapsed (struct deadline_current_time current, ++ struct deadline deadline) ++{ ++ return !__deadline_is_infinite (deadline) ++ && (current.current.tv_sec > deadline.absolute.tv_sec ++ || (current.current.tv_sec == deadline.absolute.tv_sec ++ && current.current.tv_nsec >= deadline.absolute.tv_nsec)); ++} ++ ++/* Return the deadline which occurs first. */ ++static inline struct deadline ++__deadline_first (struct deadline left, struct deadline right) ++{ ++ if (__deadline_is_infinite (right) ++ || left.absolute.tv_sec < right.absolute.tv_sec ++ || (left.absolute.tv_sec == right.absolute.tv_sec ++ && left.absolute.tv_nsec < right.absolute.tv_nsec)) ++ return left; ++ else ++ return right; ++} ++ ++/* Add TV to the current time and return it. Returns a special ++ infinite absolute deadline on overflow. */ ++struct deadline __deadline_from_timeval (struct deadline_current_time, ++ struct timeval tv) ++ internal_function attribute_hidden; ++ ++/* Compute the number of milliseconds until the specified deadline, ++ from the current time in the argument. The result is mainly for ++ use with poll. If the deadline has already passed, return 0. If ++ the result would overflow an int, return INT_MAX. */ ++int __deadline_to_ms (struct deadline_current_time, struct deadline) ++ internal_function attribute_hidden; ++ ++/* Return true if TV.tv_sec is non-negative and TV.tv_usec is in the ++ interval [0, 999999]. */ ++static inline bool ++__is_timeval_valid_timeout (struct timeval tv) ++{ ++ return tv.tv_sec >= 0 && tv.tv_usec >= 0 && tv.tv_usec < 1000 * 1000; ++} ++ ++#endif /* _NET_INTERNAL_H */ +Index: b/inet/Makefile +=================================================================== +--- a/inet/Makefile ++++ b/inet/Makefile +@@ -44,13 +44,18 @@ routines := htonl htons \ + getaliasent_r getaliasent getaliasname getaliasname_r \ + in6_addr getnameinfo if_index ifaddrs inet6_option \ + getipv4sourcefilter setipv4sourcefilter \ +- getsourcefilter setsourcefilter inet6_opt inet6_rth ++ getsourcefilter setsourcefilter inet6_opt inet6_rth \ ++ deadline + + aux := check_pf check_native ifreq + + tests := htontest test_ifindex tst-ntoa tst-ether_aton tst-network \ + tst-gethnm test-ifaddrs bug-if1 test-inet6_opt tst-ether_line \ +- tst-getni1 tst-getni2 tst-inet6_rth tst-checks ++ tst-getni1 tst-getni2 tst-inet6_rth tst-checks tst-deadline ++ ++# tst-deadline must be linked statically so that we can access ++# internal functions. ++tests-static += tst-deadline + + include ../Rules + diff --git a/SOURCES/glibc-rh1234449-2.patch b/SOURCES/glibc-rh1234449-2.patch index 67d8d4c..5d3dea5 100644 --- a/SOURCES/glibc-rh1234449-2.patch +++ b/SOURCES/glibc-rh1234449-2.patch @@ -4,6 +4,35 @@ Date: Fri Apr 24 17:34:47 2015 +0200 Make time zone file parser more robust [BZ #17715] +commit 6807b1db8233ed84671f061b5d825622233df303 +Author: Kevin Easton +Date: Tue Feb 24 23:57:07 2015 -0500 + + Reduce lock contention in __tz_convert() [BZ #16145] (partial fix) + +commit 9d46370ca338054cb6ea7ebeddcf06c7ac7ad1a9 +Author: Joseph Myers +Date: Fri Oct 16 20:21:49 2015 +0000 + + Convert 703 function definitions to prototype style. + (A subset of these changes (tzset.c) were applied as part of this patch.) + +commit 0748546f660d27a2ad29fa6174d456e2f6490758 +Author: Paul Eggert +Date: Wed Sep 18 13:15:12 2013 -0700 + + Support TZ transition times < 00:00:00. + + This is needed for version-3 tz-format files; it supports time + stamps past 2037 for America/Godthab (the only entry in the tz + database for which this change is relevant). + * manual/time.texi (TZ Variable): Document transition times + from -167:59:59 through -00:00:01. + * time/tzset.c (tz_rule): Time of day is now signed. + (__tzset_parse_tz): Parse negative time of day. + (A subset of these changes were applied as part of this patch.) + + diff --git a/time/tzfile.c b/time/tzfile.c --- a/time/tzfile.c +++ b/time/tzfile.c diff --git a/SOURCES/glibc-rh1288613.patch b/SOURCES/glibc-rh1288613.patch index 1710690..754b422 100644 --- a/SOURCES/glibc-rh1288613.patch +++ b/SOURCES/glibc-rh1288613.patch @@ -25,10 +25,11 @@ Date: Thu Jun 18 21:40:46 2015 +0000 and $(shared-thread-library). (tst-res_hconf_reorder-ENV): New variable. -diff --git a/resolv/Makefile b/resolv/Makefile -index f62eea4..3509d98 100644 ---- a/resolv/Makefile -+++ b/resolv/Makefile +Index: glibc-2.17-c758a686/resolv/Makefile +=================================================================== +--- glibc-2.17-c758a686.orig/resolv/Makefile ++++ glibc-2.17-c758a686/resolv/Makefile +@@ -40,6 +40,7 @@ extra-libs := libresolv libnss_dns ifeq ($(have-thread-library),yes) extra-libs += libanl routines += gai_sigqueue @@ -36,7 +37,7 @@ index f62eea4..3509d98 100644 endif extra-libs-others = $(extra-libs) libresolv-routines := gethnamaddr res_comp res_debug \ -@@ -108,6 +109,9 @@ +@@ -108,6 +108,9 @@ $(objpfx)libanl.so: $(common-objpfx)libc $(objpfx)ga_test: $(objpfx)libanl.so $(shared-thread-library) @@ -46,12 +47,11 @@ index f62eea4..3509d98 100644 $(objpfx)tst-leaks: $(objpfx)libresolv.so tst-leaks-ENV = MALLOC_TRACE=$(objpfx)tst-leaks.mtrace $(objpfx)mtrace-tst-leaks: $(objpfx)tst-leaks.out - -diff --git a/resolv/res_hconf.c b/resolv/res_hconf.c -index c9642ce..0d4f3f4 100644 ---- a/resolv/res_hconf.c -+++ b/resolv/res_hconf.c -@@ -462,10 +462,10 @@ +Index: glibc-2.17-c758a686/resolv/res_hconf.c +=================================================================== +--- glibc-2.17-c758a686.orig/resolv/res_hconf.c ++++ glibc-2.17-c758a686/resolv/res_hconf.c +@@ -462,10 +462,10 @@ _res_hconf_reorder_addrs (struct hostent errno = save; num_ifs = new_num_ifs; @@ -64,11 +64,10 @@ index c9642ce..0d4f3f4 100644 __close (sd); } -diff --git a/resolv/tst-res_hconf_reorder.c b/resolv/tst-res_hconf_reorder.c -new file mode 100644 -index 0000000..1e7e0e2 +Index: glibc-2.17-c758a686/resolv/tst-res_hconf_reorder.c +=================================================================== --- /dev/null -+++ b/resolv/tst-res_hconf_reorder.c ++++ glibc-2.17-c758a686/resolv/tst-res_hconf_reorder.c @@ -0,0 +1,112 @@ +/* BZ #17977 _res_hconf_reorder_addrs test. + diff --git a/SOURCES/glibc-rh1293976-2.patch b/SOURCES/glibc-rh1293976-2.patch index e4c275a..c41dab3 100644 --- a/SOURCES/glibc-rh1293976-2.patch +++ b/SOURCES/glibc-rh1293976-2.patch @@ -1,9 +1,9 @@ Short description: malloc: Test various special cases related to allocation failures Author(s): Florian Weimer Origin: git://sourceware.org/git/glibc.git -Bug-RHEL: #1296453 (rhel-7.2.z), #1293976 (rhel-7.3), #1256285 (SRT) +Bug-RHEL: #1296453 (rhel-7.2.z), #1293976 (rhel-7.3), #1256285 (SRT), #1418978 Bug-Fedora: NA -Bug-Upstream: NA +Bug-Upstream: #19469 Upstream status: committed # # commit 1bd5483e104c8bde6e61dc5e3f8a848bc861872d @@ -23,13 +23,21 @@ Upstream status: committed # # (a) and (b) appear specific to a faulty downstream backport. # (c) was fixed as part of commit 10ad46bc6526edc5c7afcc57112da96917ff3629. -# -# The test for (a) was inspired by a reproducer supplied by Jeff Layton. +# +# commit f690b56979dea81340a397c1b5e44827a6fb06e7 +# Author: Florian Weimer +# Date: Tue Aug 2 17:01:02 2016 +0200 # -Index: glibc-2.17-c758a686/malloc/Makefile +# malloc: Run tests without calling mallopt [BZ #19469] +# +# The compiled tests no longer refer to the mallopt symbol +# from their main functions. (Some tests still call mallopt +# explicitly, which is fine.) + +Index: b/malloc/Makefile =================================================================== ---- glibc-2.17-c758a686.orig/malloc/Makefile -+++ glibc-2.17-c758a686/malloc/Makefile +--- a/malloc/Makefile ++++ b/malloc/Makefile @@ -27,7 +27,8 @@ headers := $(dist-headers) obstack.h mch tests := mallocbug tst-malloc tst-valloc tst-calloc tst-obstack \ tst-mallocstate tst-mcheck tst-mallocfork tst-trim1 \ @@ -49,11 +57,19 @@ Index: glibc-2.17-c758a686/malloc/Makefile $(objpfx)tst-malloc-thread-exit: $(common-objpfx)nptl/libpthread.so \ $(common-objpfx)nptl/libpthread_nonshared.a -Index: glibc-2.17-c758a686/malloc/tst-malloc-thread-fail.c +@@ -149,3 +152,7 @@ $(objpfx)libmemusage.so: $(common-objpfx + + # Extra dependencies + $(foreach o,$(all-object-suffixes),$(objpfx)malloc$(o)): arena.c hooks.c ++ ++# Compile the tests with a flag which suppresses the mallopt call in ++# the test skeleton. ++$(tests:%=$(objpfx)%.o): CPPFLAGS += -DTEST_NO_MALLOPT +Index: b/malloc/tst-malloc-thread-fail.c =================================================================== --- /dev/null -+++ glibc-2.17-c758a686/malloc/tst-malloc-thread-fail.c -@@ -0,0 +1,468 @@ ++++ b/malloc/tst-malloc-thread-fail.c +@@ -0,0 +1,464 @@ +/* Test allocation function behavior on allocation failure. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. @@ -516,25 +532,20 @@ Index: glibc-2.17-c758a686/malloc/tst-malloc-thread-fail.c +/* The repeated allocations take some time on slow machines. */ +#define TIMEOUT 20 + -+/* No malloc perturbation should be used. We are testing that -+ default 0 perturbation values work. */ -+#define MALLOC_PERTURB 0 -+ +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" -Index: glibc-2.17-c758a686/test-skeleton.c +Index: b/test-skeleton.c =================================================================== ---- glibc-2.17-c758a686.orig/test-skeleton.c -+++ glibc-2.17-c758a686/test-skeleton.c -@@ -233,7 +233,10 @@ main (int argc, char *argv[]) +--- a/test-skeleton.c ++++ b/test-skeleton.c +@@ -247,8 +247,10 @@ main (int argc, char *argv[]) + unsigned int timeoutfactor = 1; pid_t termpid; ++#ifndef TEST_NO_MALLOPT /* Make uses of freed and uninitialized memory known. */ -- mallopt (M_PERTURB, 42); -+#ifndef MALLOC_PERTURB -+# define MALLOC_PERTURB 42 + mallopt (M_PERTURB, 42); +#endif -+ mallopt (M_PERTURB, MALLOC_PERTURB); #ifdef STDOUT_UNBUFFERED setbuf (stdout, NULL); diff --git a/SOURCES/glibc-rh1298975.patch b/SOURCES/glibc-rh1298975.patch new file mode 100644 index 0000000..f2495af --- /dev/null +++ b/SOURCES/glibc-rh1298975.patch @@ -0,0 +1,619 @@ +commit ced8f8933673f4efda1d666d26a1a949602035ed +Author: Stephen Gallagher +Date: Fri Apr 29 22:11:09 2016 -0400 + + NSS: Implement group merging support. + +commit 2413e73c32fc36470885ae548631e081d66f4201 +Author: Zack Weinberg +Date: Mon Jul 18 09:33:21 2016 -0300 + + Don't install the internal header grp-merge.h + +--- glibc-2.17-c758a686/grp/Makefile ++++ glibc-2.17-c758a686/grp/Makefile +@@ -23,7 +23,8 @@ + + routines := fgetgrent initgroups setgroups \ + getgrent getgrgid getgrnam putgrent \ +- getgrent_r getgrgid_r getgrnam_r fgetgrent_r ++ getgrent_r getgrgid_r getgrnam_r fgetgrent_r \ ++ grp-merge + + include ../Makeconfig + +--- glibc-2.17-c758a686/grp/Versions ++++ glibc-2.17-c758a686/grp/Versions +@@ -28,4 +28,7 @@ + # g* + getgrouplist; + } ++ GLIBC_PRIVATE { ++ __merge_grp; __copy_grp; ++ } + } +--- glibc-2.17-c758a686/grp/getgrgid_r.c ++++ glibc-2.17-c758a686/grp/getgrgid_r.c +@@ -18,6 +18,7 @@ + + #include + ++#include + + #define LOOKUP_TYPE struct group + #define FUNCTION_NAME getgrgid +@@ -25,5 +26,7 @@ + #define ADD_PARAMS gid_t gid + #define ADD_VARIABLES gid + #define BUFLEN NSS_BUFLEN_GROUP ++#define DEEPCOPY_FN __copy_grp ++#define MERGE_FN __merge_grp + + #include +--- glibc-2.17-c758a686/grp/getgrnam_r.c ++++ glibc-2.17-c758a686/grp/getgrnam_r.c +@@ -18,6 +18,7 @@ + + #include + ++#include + + #define LOOKUP_TYPE struct group + #define FUNCTION_NAME getgrnam +@@ -25,4 +26,7 @@ + #define ADD_PARAMS const char *name + #define ADD_VARIABLES name + ++#define DEEPCOPY_FN __copy_grp ++#define MERGE_FN __merge_grp ++ + #include +--- glibc-2.17-c758a686/grp/grp-merge.c ++++ glibc-2.17-c758a686/grp/grp-merge.c +@@ -0,0 +1,186 @@ ++/* Group merging implementation. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define BUFCHECK(size) \ ++ ({ \ ++ do \ ++ { \ ++ if (c + (size) > buflen) \ ++ { \ ++ free (members); \ ++ return ERANGE; \ ++ } \ ++ } \ ++ while (0); \ ++ }) ++ ++int ++internal_function ++__copy_grp (const struct group srcgrp, const size_t buflen, ++ struct group *destgrp, char *destbuf, char **endptr) ++{ ++ size_t i; ++ size_t c = 0; ++ size_t len; ++ size_t memcount; ++ char **members = NULL; ++ ++ /* Copy the GID. */ ++ destgrp->gr_gid = srcgrp.gr_gid; ++ ++ /* Copy the name. */ ++ len = strlen (srcgrp.gr_name) + 1; ++ BUFCHECK (len); ++ memcpy (&destbuf[c], srcgrp.gr_name, len); ++ destgrp->gr_name = &destbuf[c]; ++ c += len; ++ ++ /* Copy the password. */ ++ len = strlen (srcgrp.gr_passwd) + 1; ++ BUFCHECK (len); ++ memcpy (&destbuf[c], srcgrp.gr_passwd, len); ++ destgrp->gr_passwd = &destbuf[c]; ++ c += len; ++ ++ /* Count all of the members. */ ++ for (memcount = 0; srcgrp.gr_mem[memcount]; memcount++) ++ ; ++ ++ /* Allocate a temporary holding area for the pointers to the member ++ contents, including space for a NULL-terminator. */ ++ members = malloc (sizeof (char *) * (memcount + 1)); ++ if (members == NULL) ++ return ENOMEM; ++ ++ /* Copy all of the group members to destbuf and add a pointer to each of ++ them into the 'members' array. */ ++ for (i = 0; srcgrp.gr_mem[i]; i++) ++ { ++ len = strlen (srcgrp.gr_mem[i]) + 1; ++ BUFCHECK (len); ++ memcpy (&destbuf[c], srcgrp.gr_mem[i], len); ++ members[i] = &destbuf[c]; ++ c += len; ++ } ++ members[i] = NULL; ++ ++ /* Copy the pointers from the members array into the buffer and assign them ++ to the gr_mem member of destgrp. */ ++ destgrp->gr_mem = (char **) &destbuf[c]; ++ len = sizeof (char *) * (memcount + 1); ++ BUFCHECK (len); ++ memcpy (&destbuf[c], members, len); ++ c += len; ++ free (members); ++ members = NULL; ++ ++ /* Save the count of members at the end. */ ++ BUFCHECK (sizeof (size_t)); ++ memcpy (&destbuf[c], &memcount, sizeof (size_t)); ++ c += sizeof (size_t); ++ ++ if (endptr) ++ *endptr = destbuf + c; ++ return 0; ++} ++libc_hidden_def (__copy_grp) ++ ++/* Check that the name, GID and passwd fields match, then ++ copy in the gr_mem array. */ ++int ++internal_function ++__merge_grp (struct group *savedgrp, char *savedbuf, char *savedend, ++ size_t buflen, struct group *mergegrp, char *mergebuf) ++{ ++ size_t c, i, len; ++ size_t savedmemcount; ++ size_t memcount; ++ size_t membersize; ++ char **members = NULL; ++ ++ /* We only support merging members of groups with identical names and ++ GID values. If we hit this case, we need to overwrite the current ++ buffer with the saved one (which is functionally equivalent to ++ treating the new lookup as NSS_STATUS_NOTFOUND). */ ++ if (mergegrp->gr_gid != savedgrp->gr_gid ++ || strcmp (mergegrp->gr_name, savedgrp->gr_name)) ++ return __copy_grp (*savedgrp, buflen, mergegrp, mergebuf, NULL); ++ ++ /* Get the count of group members from the last sizeof (size_t) bytes in the ++ mergegrp buffer. */ ++ savedmemcount = (size_t) *(savedend - sizeof (size_t)); ++ ++ /* Get the count of new members to add. */ ++ for (memcount = 0; mergegrp->gr_mem[memcount]; memcount++) ++ ; ++ ++ /* Create a temporary array to hold the pointers to the member values from ++ both the saved and merge groups. */ ++ membersize = savedmemcount + memcount + 1; ++ members = malloc (sizeof (char *) * membersize); ++ if (members == NULL) ++ return ENOMEM; ++ ++ /* Copy in the existing member pointers from the saved group ++ Note: this is not NULL-terminated yet. */ ++ memcpy (members, savedgrp->gr_mem, sizeof (char *) * savedmemcount); ++ ++ /* Back up into the savedbuf until we get back to the NULL-terminator of the ++ group member list. (This means walking back savedmemcount + 1 (char *) pointers ++ and the member count value. ++ The value of c is going to be the used length of the buffer backed up by ++ the member count and further backed up by the size of the pointers. */ ++ c = savedend - savedbuf ++ - sizeof (size_t) ++ - sizeof (char *) * (savedmemcount + 1); ++ ++ /* Add all the new group members, overwriting the old NULL-terminator while ++ adding the new pointers to the temporary array. */ ++ for (i = 0; mergegrp->gr_mem[i]; i++) ++ { ++ len = strlen (mergegrp->gr_mem[i]) + 1; ++ BUFCHECK (len); ++ memcpy (&savedbuf[c], mergegrp->gr_mem[i], len); ++ members[savedmemcount + i] = &savedbuf[c]; ++ c += len; ++ } ++ /* Add the NULL-terminator. */ ++ members[savedmemcount + memcount] = NULL; ++ ++ /* Copy the member array back into the buffer after the member list and free ++ the member array. */ ++ savedgrp->gr_mem = (char **) &savedbuf[c]; ++ len = sizeof (char *) * membersize; ++ BUFCHECK (len); ++ memcpy (&savedbuf[c], members, len); ++ c += len; ++ ++ free (members); ++ members = NULL; ++ ++ /* Finally, copy the results back into mergebuf, since that's the buffer ++ that we were provided by the caller. */ ++ return __copy_grp (*savedgrp, buflen, mergegrp, mergebuf, NULL); ++} ++libc_hidden_def (__merge_grp) +--- glibc-2.17-c758a686/grp/grp-merge.h ++++ glibc-2.17-c758a686/grp/grp-merge.h +@@ -0,0 +1,37 @@ ++/* Group merging implementation. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _GRP_MERGE_H ++#define _GRP_MERGE_H 1 ++ ++#include ++ ++/* Duplicate a grp struct (and its members). When no longer needed, the ++ calling function must free(newbuf). */ ++int ++__copy_grp (const struct group srcgrp, const size_t buflen, ++ struct group *destgrp, char *destbuf, char **endptr) ++ internal_function; ++ ++/* Merge the member lists of two grp structs together. */ ++int ++__merge_grp (struct group *savedgrp, char *savedbuf, char *savedend, ++ size_t buflen, struct group *mergegrp, char *mergebuf) ++ internal_function; ++ ++#endif /* _GRP_MERGE_H */ +--- glibc-2.17-c758a686/include/grp-merge.h ++++ glibc-2.17-c758a686/include/grp-merge.h +@@ -0,0 +1,7 @@ ++#ifndef _GRP_MERGE_H ++#include ++ ++libc_hidden_proto (__copy_grp) ++libc_hidden_proto (__merge_grp) ++ ++#endif /* _GRP_MERGE_H */ +--- glibc-2.17-c758a686/manual/nss.texi ++++ glibc-2.17-c758a686/manual/nss.texi +@@ -180,7 +180,7 @@ + + The case of the keywords is insignificant. The @var{status} + values are the results of a call to a lookup function of a specific +-service. They mean ++service. They mean: + + @ftable @samp + @item success +@@ -204,6 +204,50 @@ + @end ftable + + @noindent ++The @var{action} values mean: ++ ++@ftable @samp ++@item return ++ ++If the status matches, stop the lookup process at this service ++specification. If an entry is available, provide it to the application. ++If an error occurred, report it to the application. In case of a prior ++@samp{merge} action, the data is combined with previous lookup results, ++as explained below. ++ ++@item continue ++ ++If the status matches, proceed with the lookup process at the next ++entry, discarding the result of the current lookup (and any merged ++data). An exception is the @samp{initgroups} database and the ++@samp{success} status, where @samp{continue} acts like @code{merge} ++below. ++ ++@item merge ++ ++Proceed with the lookup process, retaining the current lookup result. ++This action is useful only with the @samp{success} status. If a ++subsequent service lookup succeeds and has a matching @samp{return} ++specification, the results are merged, the lookup process ends, and the ++merged results are returned to the application. If the following service ++has a matching @samp{merge} action, the lookup process continues, ++retaining the combined data from this and any previous lookups. ++ ++After a @code{merge} action, errors from subsequent lookups are ignored, ++and the data gathered so far will be returned. ++ ++The @samp{merge} only applies to the @samp{success} status. It is ++currently implemented for the @samp{group} database and its group ++members field, @samp{gr_mem}. If specified for other databases, it ++causes the lookup to fail (if the @var{status} matches). ++ ++When processing @samp{merge} for @samp{group} membership, the group GID ++and name must be identical for both entries. If only one or the other is ++a match, the behavior is undefined. ++ ++@end ftable ++ ++@noindent + If we have a line like + + @smallexample +--- glibc-2.17-c758a686/nscd/getgrgid_r.c ++++ glibc-2.17-c758a686/nscd/getgrgid_r.c +@@ -17,6 +17,7 @@ + + #include + ++#include + + #define LOOKUP_TYPE struct group + #define FUNCTION_NAME getgrgid +@@ -25,6 +26,9 @@ + #define ADD_VARIABLES gid + #define BUFLEN NSS_BUFLEN_GROUP + ++#define DEEPCOPY_FN __copy_grp ++#define MERGE_FN __merge_grp ++ + /* We are nscd, so we don't want to be talking to ourselves. */ + #undef USE_NSCD + +--- glibc-2.17-c758a686/nscd/getgrnam_r.c ++++ glibc-2.17-c758a686/nscd/getgrnam_r.c +@@ -17,6 +17,7 @@ + + #include + ++#include + + #define LOOKUP_TYPE struct group + #define FUNCTION_NAME getgrnam +@@ -24,6 +25,9 @@ + #define ADD_PARAMS const char *name + #define ADD_VARIABLES name + ++#define DEEPCOPY_FN __copy_grp ++#define MERGE_FN __merge_grp ++ + /* We are nscd, so we don't want to be talking to ourselves. */ + #undef USE_NSCD + +--- glibc-2.17-c758a686/nss/getXXbyYY_r.c ++++ glibc-2.17-c758a686/nss/getXXbyYY_r.c +@@ -131,6 +131,52 @@ + # define AF_VAL AF_INET + #endif + ++ ++/* Set defaults for merge functions that haven't been defined. */ ++#ifndef DEEPCOPY_FN ++static inline int ++__copy_einval (LOOKUP_TYPE a, ++ const size_t b, ++ LOOKUP_TYPE *c, ++ char *d, ++ char **e) ++{ ++ return EINVAL; ++} ++# define DEEPCOPY_FN __copy_einval ++#endif ++ ++#ifndef MERGE_FN ++static inline int ++__merge_einval (LOOKUP_TYPE *a, ++ char *b, ++ char *c, ++ size_t d, ++ LOOKUP_TYPE *e, ++ char *f) ++{ ++ return EINVAL; ++} ++# define MERGE_FN __merge_einval ++#endif ++ ++#define CHECK_MERGE(err, status) \ ++ ({ \ ++ do \ ++ { \ ++ if (err) \ ++ { \ ++ __set_errno (err); \ ++ if (err == ERANGE) \ ++ status = NSS_STATUS_TRYAGAIN; \ ++ else \ ++ status = NSS_STATUS_UNAVAIL; \ ++ break; \ ++ } \ ++ } \ ++ while (0); \ ++ }) ++ + /* Type of the lookup function we need here. */ + typedef enum nss_status (*lookup_function) (ADD_PARAMS, LOOKUP_TYPE *, char *, + size_t, int * H_ERRNO_PARM +@@ -152,13 +198,16 @@ + static service_user *startp; + static lookup_function start_fct; + service_user *nip; ++ int do_merge = 0; ++ LOOKUP_TYPE mergegrp; ++ char *mergebuf = NULL; ++ char *endptr = NULL; + union + { + lookup_function l; + void *ptr; + } fct; +- +- int no_more; ++ int no_more, err; + enum nss_status status = NSS_STATUS_UNAVAIL; + #ifdef USE_NSCD + int nscd_status; +@@ -278,9 +327,66 @@ + && errno == ERANGE) + break; + ++ if (do_merge) ++ { ++ ++ if (status == NSS_STATUS_SUCCESS) ++ { ++ /* The previous loop saved a buffer for merging. ++ Perform the merge now. */ ++ err = MERGE_FN (&mergegrp, mergebuf, endptr, buflen, resbuf, ++ buffer); ++ CHECK_MERGE (err,status); ++ do_merge = 0; ++ } ++ else ++ { ++ /* If the result wasn't SUCCESS, copy the saved buffer back ++ into the result buffer and set the status back to ++ NSS_STATUS_SUCCESS to match the previous pass through the ++ loop. ++ * If the next action is CONTINUE, it will overwrite the value ++ currently in the buffer and return the new value. ++ * If the next action is RETURN, we'll return the previously- ++ acquired values. ++ * If the next action is MERGE, then it will be added to the ++ buffer saved from the previous source. */ ++ err = DEEPCOPY_FN (mergegrp, buflen, resbuf, buffer, NULL); ++ CHECK_MERGE (err, status); ++ status = NSS_STATUS_SUCCESS; ++ } ++ } ++ ++ /* If we were are configured to merge this value with the next one, ++ save the current value of the group struct. */ ++ if (nss_next_action (nip, status) == NSS_ACTION_MERGE ++ && status == NSS_STATUS_SUCCESS) ++ { ++ /* Copy the current values into a buffer to be merged with the next ++ set of retrieved values. */ ++ if (mergebuf == NULL) ++ { ++ /* Only allocate once and reuse it for as many merges as we need ++ to perform. */ ++ mergebuf = malloc (buflen); ++ if (mergebuf == NULL) ++ { ++ __set_errno (ENOMEM); ++ status = NSS_STATUS_UNAVAIL; ++ break; ++ } ++ } ++ ++ err = DEEPCOPY_FN (*resbuf, buflen, &mergegrp, mergebuf, &endptr); ++ CHECK_MERGE (err, status); ++ do_merge = 1; ++ } ++ + no_more = __nss_next2 (&nip, REENTRANT_NAME_STRING, + REENTRANT2_NAME_STRING, &fct.ptr, status, 0); + } ++ free (mergebuf); ++ mergebuf = NULL; + + #ifdef HANDLE_DIGITS_DOTS + done: +--- glibc-2.17-c758a686/nss/getnssent_r.c ++++ glibc-2.17-c758a686/nss/getnssent_r.c +@@ -79,7 +79,18 @@ + else + status = DL_CALL_FCT (fct.f, (0)); + +- no_more = __nss_next2 (nip, func_name, NULL, &fct.ptr, status, 0); ++ ++ /* This is a special-case. When [SUCCESS=merge] is in play, ++ _nss_next2() will skip to the next database. Due to the ++ implementation of that function, we can't know whether we're ++ in an enumeration or an individual lookup, which behaves ++ differently with regards to merging. We'll treat SUCCESS as ++ an indication to start the enumeration at this database. */ ++ if (nss_next_action (*nip, status) == NSS_ACTION_MERGE) ++ no_more = 1; ++ else ++ no_more = __nss_next2 (nip, func_name, NULL, &fct.ptr, status, 0); ++ + if (is_last_nip) + *last_nip = *nip; + } +@@ -175,8 +186,18 @@ + + do + { +- no_more = __nss_next2 (nip, getent_func_name, NULL, &fct.ptr, +- status, 0); ++ /* This is a special-case. When [SUCCESS=merge] is in play, ++ _nss_next2() will skip to the next database. Due to the ++ implementation of that function, we can't know whether we're ++ in an enumeration or an individual lookup, which behaves ++ differently with regards to merging. We'll treat SUCCESS as ++ an indication to return the results here. */ ++ if (status == NSS_STATUS_SUCCESS ++ && nss_next_action (*nip, status) == NSS_ACTION_MERGE) ++ no_more = 1; ++ else ++ no_more = __nss_next2 (nip, getent_func_name, NULL, &fct.ptr, ++ status, 0); + + if (is_last_nip) + *last_nip = *nip; +--- glibc-2.17-c758a686/nss/nsswitch.c ++++ glibc-2.17-c758a686/nss/nsswitch.c +@@ -712,6 +712,9 @@ + else if (line - name == 8 + && __strncasecmp (name, "CONTINUE", 8) == 0) + action = NSS_ACTION_CONTINUE; ++ else if (line - name == 5 ++ && __strncasecmp (name, "MERGE", 5) == 0) ++ action = NSS_ACTION_MERGE; + else + goto finish; + +--- glibc-2.17-c758a686/nss/nsswitch.h ++++ glibc-2.17-c758a686/nss/nsswitch.h +@@ -32,7 +32,8 @@ + typedef enum + { + NSS_ACTION_CONTINUE, +- NSS_ACTION_RETURN ++ NSS_ACTION_RETURN, ++ NSS_ACTION_MERGE + } lookup_actions; + + diff --git a/SOURCES/glibc-rh1318877.patch b/SOURCES/glibc-rh1318877.patch new file mode 100644 index 0000000..4b435ae --- /dev/null +++ b/SOURCES/glibc-rh1318877.patch @@ -0,0 +1,62 @@ +commit 1ef74943ce2f114c78b215af57c2ccc72ccdb0b7 +Author: Paul Pluzhnikov +Date: Thu Apr 25 11:08:31 2013 -0700 + + Get rid of __STDC_FORMAT_MACROS, __STDC_LIMIT_MACROS and + __STDC_CONSTANT_MACROS. + +--- glibc-2.17-c758a686/sysdeps/generic/inttypes.h ++++ glibc-2.17-c758a686/sysdeps/generic/inttypes.h +@@ -41,10 +41,6 @@ + #endif + + +-/* The ISO C99 standard specifies that these macros must only be +- defined if explicitly requested. */ +-#if !defined __cplusplus || defined __STDC_FORMAT_MACROS +- + # if __WORDSIZE == 64 + # define __PRI64_PREFIX "l" + # define __PRIPTR_PREFIX "l" +@@ -267,8 +263,6 @@ + # define SCNuPTR __PRIPTR_PREFIX "u" + # define SCNxPTR __PRIPTR_PREFIX "x" + +-#endif /* C++ && format macros */ +- + + __BEGIN_DECLS + +--- glibc-2.17-c758a686/sysdeps/generic/stdint.h ++++ glibc-2.17-c758a686/sysdeps/generic/stdint.h +@@ -141,10 +141,6 @@ + #endif + + +-/* The ISO C99 standard specifies that in C++ implementations these +- macros should only be defined if explicitly requested. */ +-#if !defined __cplusplus || defined __STDC_LIMIT_MACROS +- + # if __WORDSIZE == 64 + # define __INT64_C(c) c ## L + # define __UINT64_C(c) c ## UL +@@ -278,12 +274,6 @@ + # define WINT_MIN (0u) + # define WINT_MAX (4294967295u) + +-#endif /* C++ && limit macros */ +- +- +-/* The ISO C99 standard specifies that in C++ implementations these +- should only be defined if explicitly requested. */ +-#if !defined __cplusplus || defined __STDC_CONSTANT_MACROS + + /* Signed. */ + # define INT8_C(c) c +@@ -314,6 +304,4 @@ + # define UINTMAX_C(c) c ## ULL + # endif + +-#endif /* C++ && constant macros */ +- + #endif /* stdint.h */ diff --git a/SOURCES/glibc-rh1322544.patch b/SOURCES/glibc-rh1322544.patch new file mode 100644 index 0000000..2c85dda --- /dev/null +++ b/SOURCES/glibc-rh1322544.patch @@ -0,0 +1,21 @@ +commit 217a74a85cdd60df236c296ad88142b78d35eccf +Author: Andreas Schwab +Date: Mon Dec 8 15:13:38 2014 +0100 + + Don't touch user-controlled stdio locks in forked child (bug 12847) + + The stdio locks for streams with the _IO_USER_LOCK flag should not be + touched by internal code. + +--- glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/fork.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/fork.c +@@ -45,7 +45,8 @@ + _IO_ITER i; + + for (i = _IO_iter_begin(); i != _IO_iter_end(); i = _IO_iter_next(i)) +- _IO_lock_init (*((_IO_lock_t *) _IO_iter_file(i)->_lock)); ++ if ((_IO_iter_file (i)->_flags & _IO_USER_LOCK) == 0) ++ _IO_lock_init (*((_IO_lock_t *) _IO_iter_file(i)->_lock)); + } + + diff --git a/SOURCES/glibc-rh1324568.patch b/SOURCES/glibc-rh1324568.patch new file mode 100644 index 0000000..7b46350 --- /dev/null +++ b/SOURCES/glibc-rh1324568.patch @@ -0,0 +1,54 @@ +Upstream commits: + +commit a071766ebfd853179ac39f9773f894029bf86d36 +Author: Andreas Schwab +Date: Thu Mar 20 15:05:25 2014 +0100 + + Fix use of half-initialized result in getaddrinfo when using nscd (bug 16743) + + This fixes a bug in the way the results from __nscd_getai are collected: + for every returned result a new entry is first added to the + gaih_addrtuple list, but if that result doesn't match the request this + entry remains uninitialized. So for this non-matching result an extra + result with uninitialized content is returned. + + To reproduce (with nscd running): + + $ getent ahostsv4 localhost + 127.0.0.1 STREAM localhost + 127.0.0.1 DGRAM + 127.0.0.1 RAW + (null) STREAM + (null) DGRAM + (null) RAW + +commit 8dc9751764eb1bedf06d19695524b31a16773413 +Author: Andreas Schwab +Date: Wed May 7 11:47:20 2014 +0200 + + Fix parsing of getai result from nscd for IPv6-only request + + +Index: b/sysdeps/posix/getaddrinfo.c +=================================================================== +--- a/sysdeps/posix/getaddrinfo.c ++++ b/sysdeps/posix/getaddrinfo.c +@@ -725,6 +725,18 @@ gaih_inet (const char *name, const struc + { + socklen_t size = (air->family[i] == AF_INET + ? INADDRSZ : IN6ADDRSZ); ++ ++ if (!((air->family[i] == AF_INET ++ && req->ai_family == AF_INET6 ++ && (req->ai_flags & AI_V4MAPPED) != 0) ++ || req->ai_family == AF_UNSPEC ++ || air->family[i] == req->ai_family)) ++ { ++ /* Skip over non-matching result. */ ++ addrs += size; ++ continue; ++ } ++ + if (*pat == NULL) + { + *pat = addrfree++; diff --git a/SOURCES/glibc-rh1325138.patch b/SOURCES/glibc-rh1325138.patch new file mode 100644 index 0000000..0d20c14 --- /dev/null +++ b/SOURCES/glibc-rh1325138.patch @@ -0,0 +1,19 @@ +commit 6a1cf708dd5681b517744d6d4fac02e4e4a0aa2e +Author: Aurelien Jarno +Date: Wed Mar 11 21:03:50 2015 -0400 + + Fix ldconfig segmentation fault with corrupted cache (Bug 18093). + +--- glibc-2.17-c758a686/elf/cache.c ++++ glibc-2.17-c758a686/elf/cache.c +@@ -688,7 +688,9 @@ + if (aux_cache == MAP_FAILED + || aux_cache_size < sizeof (struct aux_cache_file) + || memcmp (aux_cache->magic, AUX_CACHEMAGIC, sizeof AUX_CACHEMAGIC - 1) +- || aux_cache->nlibs >= aux_cache_size) ++ || aux_cache_size != (sizeof(struct aux_cache_file) + ++ aux_cache->nlibs * sizeof(struct aux_cache_file_entry) + ++ aux_cache->len_strings)) + { + close (fd); + init_aux_cache (); diff --git a/SOURCES/glibc-rh1326739.patch b/SOURCES/glibc-rh1326739.patch new file mode 100644 index 0000000..b99ed07 --- /dev/null +++ b/SOURCES/glibc-rh1326739.patch @@ -0,0 +1,28 @@ +commit 52ffbdf25a1100986f4ae27bb0febbe5a722ab25 +Author: Florian Weimer +Date: Wed Sep 10 20:29:15 2014 +0200 + + malloc: additional unlink hardening for non-small bins [BZ #17344] + + Turn two asserts into a conditional call to malloc_printerr. The + memory locations are accessed later anyway, so the performance + impact is minor. + +Index: b/malloc/malloc.c +=================================================================== +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -1441,8 +1441,11 @@ typedef struct malloc_chunk* mbinptr; + BK->fd = FD; \ + if (!in_smallbin_range (P->size) \ + && __builtin_expect (P->fd_nextsize != NULL, 0)) { \ +- assert (P->fd_nextsize->bk_nextsize == P); \ +- assert (P->bk_nextsize->fd_nextsize == P); \ ++ if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0) \ ++ || __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0)) \ ++ malloc_printerr (check_action, \ ++ "corrupted double-linked list (not small)", P,\ ++ AV); \ + if (FD->fd_nextsize == NULL) { \ + if (P->fd_nextsize == P) \ + FD->fd_nextsize = FD->bk_nextsize = FD; \ diff --git a/SOURCES/glibc-rh1330705-1.patch b/SOURCES/glibc-rh1330705-1.patch new file mode 100644 index 0000000..987a58f --- /dev/null +++ b/SOURCES/glibc-rh1330705-1.patch @@ -0,0 +1,64 @@ +commit ffdd31816a67f48697ea4d6b852e58d2886d42ca +Author: Andreas Schwab +Date: Wed Sep 11 11:15:45 2013 +0200 + + Add O_TMPFILE to + +diff --git a/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h b/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h +index 2e31691..02c9a7f 100644 +--- a/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h ++++ b/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h +@@ -36,6 +36,7 @@ + #define __O_DIRECT 02000000 /* Direct disk access. */ + #define __O_NOATIME 04000000 /* Do not set atime. */ + #define __O_PATH 040000000 /* Resolve pathname but do not open file. */ ++#define __O_TMPFILE 0100100000 /* Atomically create nameless file. */ + + /* Not necessary, files are always with 64bit off_t. */ + #define __O_LARGEFILE 0 +diff --git a/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h b/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h +index 744548a..76faa40 100644 +--- a/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h ++++ b/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h +@@ -37,6 +37,7 @@ + #define __O_CLOEXEC 010000000 /* Set close_on_exec. */ + #define __O_NOATIME 004000000 /* Do not set atime. */ + #define __O_PATH 020000000 ++#define __O_TMPFILE 040010000 /* Atomically create nameless file. */ + + #define __O_LARGEFILE 00004000 + +diff --git a/sysdeps/unix/sysv/linux/bits/fcntl-linux.h b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h +index b5929bd..9b0421e 100644 +--- a/sysdeps/unix/sysv/linux/bits/fcntl-linux.h ++++ b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h +@@ -96,6 +96,9 @@ + #ifndef __O_DSYNC + # define __O_DSYNC 010000 + #endif ++#ifndef __O_TMPFILE ++# define __O_TMPFILE 020200000 ++#endif + + #ifndef F_GETLK + # ifndef __USE_FILE_OFFSET64 +@@ -128,6 +131,7 @@ + # define O_DIRECT __O_DIRECT /* Direct disk access. */ + # define O_NOATIME __O_NOATIME /* Do not set atime. */ + # define O_PATH __O_PATH /* Resolve pathname but do not open file. */ ++# define O_TMPFILE __O_TMPFILE /* Atomically create nameless file. */ + #endif + + /* For now, Linux has no separate synchronicitiy options for read +diff --git a/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h b/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h +index 01084bb..f384bc7 100644 +--- a/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h ++++ b/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h +@@ -39,6 +39,7 @@ + #define __O_DIRECT 0x100000 /* direct disk access hint */ + #define __O_NOATIME 0x200000 /* Do not set atime. */ + #define __O_PATH 0x1000000 /* Resolve pathname but do not open file. */ ++#define __O_TMPFILE 0x2010000 /* Atomically create nameless file. */ + + #if __WORDSIZE == 64 + # define __O_LARGEFILE 0 diff --git a/SOURCES/glibc-rh1330705-2.patch b/SOURCES/glibc-rh1330705-2.patch new file mode 100644 index 0000000..cc1687c --- /dev/null +++ b/SOURCES/glibc-rh1330705-2.patch @@ -0,0 +1,19 @@ +commit 59b61c82fe18e612058302e4c726385c4eb301d8 +Author: Andreas Schwab +Date: Sun Feb 1 14:04:15 2015 +0100 + + Fix value of O_TMPFILE for architectures with non-default O_DIRECTORY (bug 17912) + +diff --git a/sysdeps/unix/sysv/linux/bits/fcntl-linux.h b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h +index 3d28c84..d2baeb3 100644 +--- a/sysdeps/unix/sysv/linux/bits/fcntl-linux.h ++++ b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h +@@ -97,7 +97,7 @@ + # define __O_DSYNC 010000 + #endif + #ifndef __O_TMPFILE +-# define __O_TMPFILE 020200000 ++# define __O_TMPFILE (020000000 | __O_DIRECTORY) + #endif + + #ifndef F_GETLK diff --git a/SOURCES/glibc-rh1330705-3.patch b/SOURCES/glibc-rh1330705-3.patch new file mode 100644 index 0000000..7ec9055 --- /dev/null +++ b/SOURCES/glibc-rh1330705-3.patch @@ -0,0 +1,457 @@ +commit cc0e6ed81fa3ab0eeecfc576098b4522f0323c4b +Author: Roland McGrath +Date: Fri May 3 16:33:26 2013 -0700 + + Consolidate definitions of _FORTIFY_SOURCE wrappers for open{,64}{,at}. + +Index: b/io/Makefile +=================================================================== +--- a/io/Makefile ++++ b/io/Makefile +@@ -36,10 +36,10 @@ routines := \ + statvfs fstatvfs statvfs64 fstatvfs64 \ + umask chmod fchmod lchmod fchmodat \ + mkdir mkdirat \ +- open open64 openat openat64 close \ ++ open open_2 open64 open64_2 openat openat_2 openat64 openat64_2 \ + read write lseek lseek64 access euidaccess faccessat \ + fcntl flock lockf lockf64 \ +- dup dup2 dup3 pipe pipe2 \ ++ close dup dup2 dup3 pipe pipe2 \ + creat creat64 \ + chdir fchdir \ + getcwd getwd getdirname \ +Index: b/io/open.c +=================================================================== +--- a/io/open.c ++++ b/io/open.c +@@ -22,7 +22,6 @@ + #include + #include + +-extern char **__libc_argv attribute_hidden; + + /* Open FILE with access OFLAG. If OFLAG includes O_CREAT, + a third argument is the file protection. */ +@@ -57,15 +56,6 @@ weak_alias (__libc_open, open) + + stub_warning (open) + +- +-int +-__open_2 (file, oflag) +- const char *file; +- int oflag; +-{ +- if (oflag & O_CREAT) +- __fortify_fail ("invalid open call: O_CREAT without mode"); +- +- return __open (file, oflag); +-} ++/* __open_2 is a generic wrapper that calls __open. ++ So give a stub warning for that symbol too. */ + stub_warning (__open_2) +Index: b/io/open64.c +=================================================================== +--- a/io/open64.c ++++ b/io/open64.c +@@ -54,15 +54,6 @@ weak_alias (__libc_open64, open64) + + stub_warning (open64) + +- +-int +-__open64_2 (file, oflag) +- const char *file; +- int oflag; +-{ +- if (oflag & O_CREAT) +- __fortify_fail ("invalid open64 call: O_CREAT without mode"); +- +- return __open64 (file, oflag); +-} ++/* __open64_2 is a generic wrapper that calls __open64. ++ So give a stub warning for that symbol too. */ + stub_warning (__open64_2) +Index: b/io/open64_2.c +=================================================================== +--- /dev/null ++++ b/io/open64_2.c +@@ -0,0 +1,29 @@ ++/* _FORTIFY_SOURCE wrapper for open64. ++ Copyright (C) 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++int ++__open64_2 (const char *file, int oflag) ++{ ++ if (oflag & O_CREAT) ++ __fortify_fail ("invalid open64 call: O_CREAT without mode"); ++ ++ return __open64 (file, oflag); ++} +Index: b/io/open_2.c +=================================================================== +--- /dev/null ++++ b/io/open_2.c +@@ -0,0 +1,29 @@ ++/* _FORTIFY_SOURCE wrapper for open. ++ Copyright (C) 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++int ++__open_2 (const char *file, int oflag) ++{ ++ if (oflag & O_CREAT) ++ __fortify_fail ("invalid open call: O_CREAT without mode"); ++ ++ return __open (file, oflag); ++} +Index: b/io/openat.c +=================================================================== +--- a/io/openat.c ++++ b/io/openat.c +@@ -75,16 +75,6 @@ libc_hidden_def (__openat) + weak_alias (__openat, openat) + stub_warning (openat) + +- +-int +-__openat_2 (fd, file, oflag) +- int fd; +- const char *file; +- int oflag; +-{ +- if (oflag & O_CREAT) +- __fortify_fail ("invalid openat call: O_CREAT without mode"); +- +- return __openat (fd, file, oflag); +-} ++/* __openat_2 is a generic wrapper that calls __openat. ++ So give a stub warning for that symbol too. */ + stub_warning (__openat_2) +Index: b/io/openat64.c +=================================================================== +--- a/io/openat64.c ++++ b/io/openat64.c +@@ -68,16 +68,6 @@ libc_hidden_def (__openat64) + weak_alias (__openat64, openat64) + stub_warning (openat64) + +- +-int +-__openat64_2 (fd, file, oflag) +- int fd; +- const char *file; +- int oflag; +-{ +- if (oflag & O_CREAT) +- __fortify_fail ("invalid openat64 call: O_CREAT without mode"); +- +- return __openat64 (fd, file, oflag); +-} ++/* __openat64_2 is a generic wrapper that calls __openat64. ++ So give a stub warning for that symbol too. */ + stub_warning (__openat_2) +Index: b/io/openat64_2.c +=================================================================== +--- /dev/null ++++ b/io/openat64_2.c +@@ -0,0 +1,29 @@ ++/* _FORTIFY_SOURCE wrapper for openat64. ++ Copyright (C) 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++int ++__openat64_2 (int fd, const char *file, int oflag) ++{ ++ if (oflag & O_CREAT) ++ __fortify_fail ("invalid openat64 call: O_CREAT without mode"); ++ ++ return __openat64 (fd, file, oflag); ++} +Index: b/io/openat_2.c +=================================================================== +--- /dev/null ++++ b/io/openat_2.c +@@ -0,0 +1,29 @@ ++/* _FORTIFY_SOURCE wrapper for openat. ++ Copyright (C) 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++int ++__openat_2 (int fd, const char *file, int oflag) ++{ ++ if (oflag & O_CREAT) ++ __fortify_fail ("invalid openat call: O_CREAT without mode"); ++ ++ return __openat (fd, file, oflag); ++} +Index: b/sysdeps/mach/hurd/open.c +=================================================================== +--- a/sysdeps/mach/hurd/open.c ++++ b/sysdeps/mach/hurd/open.c +@@ -52,20 +52,9 @@ weak_alias (__libc_open, __open) + libc_hidden_weak (__open) + weak_alias (__libc_open, open) + +-int +-__open_2 (file, oflag) +- const char *file; +- int oflag; +-{ +- if (oflag & O_CREAT) +- __fortify_fail ("invalid open call: O_CREAT without mode"); +- +- return __open (file, oflag); +-} + + /* open64 is just the same as open for us. */ + weak_alias (__libc_open, __libc_open64) + weak_alias (__libc_open, __open64) + libc_hidden_weak (_open64) + weak_alias (__libc_open, open64) +-strong_alias (__open_2, __open64_2) +Index: b/sysdeps/mach/hurd/openat.c +=================================================================== +--- a/sysdeps/mach/hurd/openat.c ++++ b/sysdeps/mach/hurd/openat.c +@@ -56,20 +56,7 @@ __openat (fd, file, oflag) + libc_hidden_def (__openat) + weak_alias (__openat, openat) + +-int +-__openat_2 (fd, file, oflag) +- int fd; +- const char *file; +- int oflag; +-{ +- if (oflag & O_CREAT) +- __fortify_fail ("invalid openat call: O_CREAT without mode"); +- +- return __openat (fd, file, oflag); +-} +- + /* openat64 is just the same as openat for us. */ + weak_alias (__openat, __openat64) + libc_hidden_weak (__openat64) + weak_alias (__openat, openat64) +-strong_alias (__openat_2, __openat64_2) +Index: b/sysdeps/unix/sysv/linux/Makefile +=================================================================== +--- a/sysdeps/unix/sysv/linux/Makefile ++++ b/sysdeps/unix/sysv/linux/Makefile +@@ -158,7 +158,7 @@ endif + + ifeq ($(subdir),io) + sysdep_routines += xstatconv internal_statvfs internal_statvfs64 \ +- sync_file_range open_2 open64_2 fallocate fallocate64 ++ sync_file_range fallocate fallocate64 + sysdep_headers += bits/fcntl-linux.h + endif + +Index: b/sysdeps/unix/sysv/linux/open64_2.c +=================================================================== +--- a/sysdeps/unix/sysv/linux/open64_2.c ++++ /dev/null +@@ -1,31 +0,0 @@ +-/* Copyright (C) 2007 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +- +- +-int +-__open64_2 (file, oflag) +- const char *file; +- int oflag; +-{ +- if (oflag & O_CREAT) +- __fortify_fail ("invalid open64 call: O_CREAT without mode"); +- +- return __open64 (file, oflag); +-} +Index: b/sysdeps/unix/sysv/linux/open_2.c +=================================================================== +--- a/sysdeps/unix/sysv/linux/open_2.c ++++ /dev/null +@@ -1,31 +0,0 @@ +-/* Copyright (C) 2007 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +- +- +-int +-__open_2 (file, oflag) +- const char *file; +- int oflag; +-{ +- if (oflag & O_CREAT) +- __fortify_fail ("invalid open call: O_CREAT without mode"); +- +- return __open (file, oflag); +-} +Index: b/sysdeps/unix/sysv/linux/openat.c +=================================================================== +--- a/sysdeps/unix/sysv/linux/openat.c ++++ b/sysdeps/unix/sysv/linux/openat.c +@@ -29,7 +29,6 @@ + + #ifndef OPENAT + # define OPENAT openat +-# define __OPENAT_2 __openat_2 + + # ifndef __ASSUME_ATFCTS + /* Set errno after a failed call. If BUF is not null, +@@ -179,18 +178,3 @@ __OPENAT (fd, file, oflag) + } + libc_hidden_def (__OPENAT) + weak_alias (__OPENAT, OPENAT) +- +- +-int +-__OPENAT_2 (fd, file, oflag) +- int fd; +- const char *file; +- int oflag; +-{ +- if (oflag & O_CREAT) +-#define MSG(s) MSG2 (s) +-#define MSG2(s) "invalid " #s " call: O_CREAT without mode" +- __fortify_fail (MSG (OPENAT)); +- +- return __OPENAT (fd, file, oflag); +-} +Index: b/sysdeps/unix/sysv/linux/openat64.c +=================================================================== +--- a/sysdeps/unix/sysv/linux/openat64.c ++++ b/sysdeps/unix/sysv/linux/openat64.c +@@ -1,5 +1,4 @@ + #define OPENAT openat64 +-#define __OPENAT_2 __openat64_2 + #define MORE_OFLAGS O_LARGEFILE + + #include "openat.c" +Index: b/sysdeps/unix/sysv/linux/wordsize-64/openat.c +=================================================================== +--- a/sysdeps/unix/sysv/linux/wordsize-64/openat.c ++++ b/sysdeps/unix/sysv/linux/wordsize-64/openat.c +@@ -1,16 +1,14 @@ + #define __openat64 __rename___openat64 +-#define __openat64_2 __rename___openat64_2 + #define __openat64_nocancel __rename___openat64_nocancel + #define openat64 __rename_openat64 + + #include "../openat.c" + + #undef __openat64 +-#undef __openat64_2 + #undef __openat64_nocancel + #undef openat64 + +-weak_alias (__openat, __openat64) +-weak_alias (__openat_2, __openat64_2) +-weak_alias (__openat_nocancel, __openat64_nocancel) ++strong_alias (__openat, __openat64) ++hidden_ver (__openat, __openat64) ++strong_alias (__openat_nocancel, __openat64_nocancel) + weak_alias (openat, openat64) diff --git a/SOURCES/glibc-rh1330705-4.patch b/SOURCES/glibc-rh1330705-4.patch new file mode 100644 index 0000000..2471048 --- /dev/null +++ b/SOURCES/glibc-rh1330705-4.patch @@ -0,0 +1,462 @@ +Adjusted for the lack of the ports move. + +commit 65f6f938cd562a614a68e15d0581a34b177ec29d +Author: Eric Rannaud +Date: Tue Feb 24 13:12:26 2015 +0530 + + linux: open and openat ignore 'mode' with O_TMPFILE in flags + + Both open and openat load their last argument 'mode' lazily, using + va_arg() only if O_CREAT is found in oflag. This is wrong, mode is also + necessary if O_TMPFILE is in oflag. + + By chance on x86_64, the problem wasn't evident when using O_TMPFILE + with open, as the 3rd argument of open, even when not loaded with + va_arg, is left untouched in RDX, where the syscall expects it. + + However, openat was not so lucky, and O_TMPFILE couldn't be used: mode + is the 4th argument, in RCX, but the syscall expects its 4th argument in + a different register than the glibc wrapper, in R10. + + Introduce a macro __OPEN_NEEDS_MODE (oflag) to test if either O_CREAT or + O_TMPFILE is set in oflag. + +Index: b/io/bits/fcntl2.h +=================================================================== +--- a/io/bits/fcntl2.h ++++ b/io/bits/fcntl2.h +@@ -20,7 +20,7 @@ + # error "Never include directly; use instead." + #endif + +-/* Check that calls to open and openat with O_CREAT set have an ++/* Check that calls to open and openat with O_CREAT or O_TMPFILE set have an + appropriate third/fourth parameter. */ + #ifndef __USE_FILE_OFFSET64 + extern int __open_2 (const char *__path, int __oflag) __nonnull ((1)); +@@ -35,7 +35,7 @@ extern int __REDIRECT (__open_alias, (co + __errordecl (__open_too_many_args, + "open can be called either with 2 or 3 arguments, not more"); + __errordecl (__open_missing_mode, +- "open with O_CREAT in second argument needs 3 arguments"); ++ "open with O_CREAT or O_TMPFILE in second argument needs 3 arguments"); + + __fortify_function int + open (const char *__path, int __oflag, ...) +@@ -45,7 +45,7 @@ open (const char *__path, int __oflag, . + + if (__builtin_constant_p (__oflag)) + { +- if ((__oflag & O_CREAT) != 0 && __va_arg_pack_len () < 1) ++ if (__OPEN_NEEDS_MODE (__oflag) && __va_arg_pack_len () < 1) + { + __open_missing_mode (); + return __open_2 (__path, __oflag); +@@ -67,7 +67,7 @@ extern int __REDIRECT (__open64_alias, ( + __errordecl (__open64_too_many_args, + "open64 can be called either with 2 or 3 arguments, not more"); + __errordecl (__open64_missing_mode, +- "open64 with O_CREAT in second argument needs 3 arguments"); ++ "open64 with O_CREAT or O_TMPFILE in second argument needs 3 arguments"); + + __fortify_function int + open64 (const char *__path, int __oflag, ...) +@@ -77,7 +77,7 @@ open64 (const char *__path, int __oflag, + + if (__builtin_constant_p (__oflag)) + { +- if ((__oflag & O_CREAT) != 0 && __va_arg_pack_len () < 1) ++ if (__OPEN_NEEDS_MODE (__oflag) && __va_arg_pack_len () < 1) + { + __open64_missing_mode (); + return __open64_2 (__path, __oflag); +@@ -111,7 +111,7 @@ extern int __REDIRECT (__openat_alias, ( + __errordecl (__openat_too_many_args, + "openat can be called either with 3 or 4 arguments, not more"); + __errordecl (__openat_missing_mode, +- "openat with O_CREAT in third argument needs 4 arguments"); ++ "openat with O_CREAT or O_TMPFILE in third argument needs 4 arguments"); + + __fortify_function int + openat (int __fd, const char *__path, int __oflag, ...) +@@ -121,7 +121,7 @@ openat (int __fd, const char *__path, in + + if (__builtin_constant_p (__oflag)) + { +- if ((__oflag & O_CREAT) != 0 && __va_arg_pack_len () < 1) ++ if (__OPEN_NEEDS_MODE (__oflag) && __va_arg_pack_len () < 1) + { + __openat_missing_mode (); + return __openat_2 (__fd, __path, __oflag); +@@ -145,7 +145,7 @@ extern int __REDIRECT (__openat64_alias, + __errordecl (__openat64_too_many_args, + "openat64 can be called either with 3 or 4 arguments, not more"); + __errordecl (__openat64_missing_mode, +- "openat64 with O_CREAT in third argument needs 4 arguments"); ++ "openat64 with O_CREAT or O_TMPFILE in third argument needs 4 arguments"); + + __fortify_function int + openat64 (int __fd, const char *__path, int __oflag, ...) +@@ -155,7 +155,7 @@ openat64 (int __fd, const char *__path, + + if (__builtin_constant_p (__oflag)) + { +- if ((__oflag & O_CREAT) != 0 && __va_arg_pack_len () < 1) ++ if (__OPEN_NEEDS_MODE (__oflag) && __va_arg_pack_len () < 1) + { + __openat64_missing_mode (); + return __openat64_2 (__fd, __path, __oflag); +Index: b/io/fcntl.h +=================================================================== +--- a/io/fcntl.h ++++ b/io/fcntl.h +@@ -34,6 +34,15 @@ __BEGIN_DECLS + numbers and flag bits for `open', `fcntl', et al. */ + #include + ++/* Detect if open needs mode as a third argument (or for openat as a fourth ++ argument). */ ++#ifdef __O_TMPFILE ++# define __OPEN_NEEDS_MODE(oflag) \ ++ (((oflag) & O_CREAT) != 0 || ((oflag) & __O_TMPFILE) == __O_TMPFILE) ++#else ++# define __OPEN_NEEDS_MODE(oflag) (((oflag) & O_CREAT) != 0) ++#endif ++ + /* POSIX.1-2001 specifies that these types are defined by . + Earlier POSIX standards permitted any type ending in `_t' to be defined + by any POSIX header, so we don't conditionalize the definitions here. */ +@@ -154,8 +163,9 @@ typedef __pid_t pid_t; + extern int fcntl (int __fd, int __cmd, ...); + + /* Open FILE and return a new file descriptor for it, or -1 on error. +- OFLAG determines the type of access used. If O_CREAT is on OFLAG, +- the third argument is taken as a `mode_t', the mode of the created file. ++ OFLAG determines the type of access used. If O_CREAT or O_TMPFILE is set ++ in OFLAG, the third argument is taken as a `mode_t', the mode of the ++ created file. + + This function is a cancellation point and therefore not marked with + __THROW. */ +Index: b/io/open.c +=================================================================== +--- a/io/open.c ++++ b/io/open.c +@@ -23,7 +23,7 @@ + #include + + +-/* Open FILE with access OFLAG. If OFLAG includes O_CREAT, ++/* Open FILE with access OFLAG. If O_CREAT or O_TMPFILE is in OFLAG, + a third argument is the file protection. */ + int + __libc_open (file, oflag) +@@ -38,7 +38,7 @@ __libc_open (file, oflag) + return -1; + } + +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start(arg, oflag); +Index: b/io/open64.c +=================================================================== +--- a/io/open64.c ++++ b/io/open64.c +@@ -22,7 +22,7 @@ + #include + #include + +-/* Open FILE with access OFLAG. If OFLAG includes O_CREAT, ++/* Open FILE with access OFLAG. If O_CREAT or O_TMPFILE is in OFLAG, + a third argument is the file protection. */ + int + __libc_open64 (file, oflag) +@@ -37,7 +37,7 @@ __libc_open64 (file, oflag) + return -1; + } + +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start (arg, oflag); +Index: b/io/open64_2.c +=================================================================== +--- a/io/open64_2.c ++++ b/io/open64_2.c +@@ -22,8 +22,8 @@ + int + __open64_2 (const char *file, int oflag) + { +- if (oflag & O_CREAT) +- __fortify_fail ("invalid open64 call: O_CREAT without mode"); ++ if (__OPEN_NEEDS_MODE (oflag)) ++ __fortify_fail ("invalid open64 call: O_CREAT or O_TMPFILE without mode"); + + return __open64 (file, oflag); + } +Index: b/io/open_2.c +=================================================================== +--- a/io/open_2.c ++++ b/io/open_2.c +@@ -22,8 +22,8 @@ + int + __open_2 (const char *file, int oflag) + { +- if (oflag & O_CREAT) +- __fortify_fail ("invalid open call: O_CREAT without mode"); ++ if (__OPEN_NEEDS_MODE (oflag)) ++ __fortify_fail ("invalid open call: O_CREAT or O_TMPFILE without mode"); + + return __open (file, oflag); + } +Index: b/io/openat.c +=================================================================== +--- a/io/openat.c ++++ b/io/openat.c +@@ -30,7 +30,7 @@ int __have_atfcts; + #endif + + /* Open FILE with access OFLAG. Interpret relative paths relative to +- the directory associated with FD. If OFLAG includes O_CREAT, a ++ the directory associated with FD. If O_CREAT or O_TMPFILE is in OFLAG, a + third argument is the file protection. */ + int + __openat (fd, file, oflag) +@@ -60,7 +60,7 @@ __openat (fd, file, oflag) + } + } + +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start (arg, oflag); +Index: b/io/openat64.c +=================================================================== +--- a/io/openat64.c ++++ b/io/openat64.c +@@ -23,7 +23,7 @@ + #include + + /* Open FILE with access OFLAG. Interpret relative paths relative to +- the directory associated with FD. If OFLAG includes O_CREAT, a ++ the directory associated with FD. If O_CREAT or O_TMPFILE is in OFLAG, a + third argument is the file protection. */ + int + __openat64 (fd, file, oflag) +@@ -53,7 +53,7 @@ __openat64 (fd, file, oflag) + } + } + +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start (arg, oflag); +Index: b/io/openat64_2.c +=================================================================== +--- a/io/openat64_2.c ++++ b/io/openat64_2.c +@@ -22,8 +22,8 @@ + int + __openat64_2 (int fd, const char *file, int oflag) + { +- if (oflag & O_CREAT) +- __fortify_fail ("invalid openat64 call: O_CREAT without mode"); ++ if (__OPEN_NEEDS_MODE (oflag)) ++ __fortify_fail ("invalid openat64 call: O_CREAT or O_TMPFILE without mode"); + + return __openat64 (fd, file, oflag); + } +Index: b/io/openat_2.c +=================================================================== +--- a/io/openat_2.c ++++ b/io/openat_2.c +@@ -22,8 +22,8 @@ + int + __openat_2 (int fd, const char *file, int oflag) + { +- if (oflag & O_CREAT) +- __fortify_fail ("invalid openat call: O_CREAT without mode"); ++ if (__OPEN_NEEDS_MODE (oflag)) ++ __fortify_fail ("invalid openat call: O_CREAT or O_TMPFILE without mode"); + + return __openat (fd, file, oflag); + } +Index: b/sysdeps/mach/hurd/open.c +=================================================================== +--- a/sysdeps/mach/hurd/open.c ++++ b/sysdeps/mach/hurd/open.c +@@ -22,7 +22,7 @@ + #include + #include + +-/* Open FILE with access OFLAG. If OFLAG includes O_CREAT, ++/* Open FILE with access OFLAG. If O_CREAT or O_TMPFILE is in OFLAG, + a third argument is the file protection. */ + int + __libc_open (const char *file, int oflag, ...) +@@ -30,7 +30,7 @@ __libc_open (const char *file, int oflag + mode_t mode; + io_t port; + +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start (arg, oflag); +Index: b/sysdeps/mach/hurd/openat.c +=================================================================== +--- a/sysdeps/mach/hurd/openat.c ++++ b/sysdeps/mach/hurd/openat.c +@@ -26,7 +26,7 @@ + #include + + /* Open FILE with access OFLAG. Interpret relative paths relative to +- the directory associated with FD. If OFLAG includes O_CREAT, a ++ the directory associated with FD. If O_CREAT or O_TMPFILE is in OFLAG, a + third argument is the file protection. */ + int + __openat (fd, file, oflag) +@@ -37,7 +37,7 @@ __openat (fd, file, oflag) + mode_t mode; + io_t port; + +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start (arg, oflag); +Index: b/sysdeps/posix/open64.c +=================================================================== +--- a/sysdeps/posix/open64.c ++++ b/sysdeps/posix/open64.c +@@ -20,14 +20,14 @@ + #include + #include + +-/* Open FILE with access OFLAG. If OFLAG includes O_CREAT, ++/* Open FILE with access OFLAG. If O_CREAT or O_TMPFILE is in OFLAG, + a third argument is the file protection. */ + int + __libc_open64 (const char *file, int oflag, ...) + { + int mode = 0; + +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start (arg, oflag); +Index: b/sysdeps/unix/sysv/linux/dl-openat64.c +=================================================================== +--- a/sysdeps/unix/sysv/linux/dl-openat64.c ++++ b/sysdeps/unix/sysv/linux/dl-openat64.c +@@ -28,7 +28,7 @@ openat64 (dfd, file, oflag) + const char *file; + int oflag; + { +- assert ((oflag & O_CREAT) == 0); ++ assert (!__OPEN_NEEDS_MODE (oflag)); + + #ifdef __NR_openat + return INLINE_SYSCALL (openat, 3, dfd, file, oflag | O_LARGEFILE); +Index: b/ports/sysdeps/unix/sysv/linux/generic/open.c +=================================================================== +--- a/ports/sysdeps/unix/sysv/linux/generic/open.c ++++ b/ports/sysdeps/unix/sysv/linux/generic/open.c +@@ -22,14 +22,14 @@ + #include + #include + +-/* Open FILE with access OFLAG. If OFLAG includes O_CREAT, ++/* Open FILE with access OFLAG. If O_CREAT or O_TMPFILE is in OFLAG, + a third argument is the file protection. */ + int + __libc_open (const char *file, int oflag, ...) + { + int mode = 0; + +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start (arg, oflag); +@@ -59,7 +59,7 @@ __open_nocancel (const char *file, int o + { + int mode = 0; + +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start (arg, oflag); +Index: b/ports/sysdeps/unix/sysv/linux/generic/open64.c +=================================================================== +--- a/ports/sysdeps/unix/sysv/linux/generic/open64.c ++++ b/ports/sysdeps/unix/sysv/linux/generic/open64.c +@@ -22,14 +22,14 @@ + #include + #include + +-/* Open FILE with access OFLAG. If OFLAG includes O_CREAT, ++/* Open FILE with access OFLAG. If O_CREAT or O_TMPFILE is in OFLAG, + a third argument is the file protection. */ + int + __libc_open64 (const char *file, int oflag, ...) + { + int mode = 0; + +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start (arg, oflag); +Index: b/sysdeps/unix/sysv/linux/open64.c +=================================================================== +--- a/sysdeps/unix/sysv/linux/open64.c ++++ b/sysdeps/unix/sysv/linux/open64.c +@@ -22,14 +22,14 @@ + #include + #include + +-/* Open FILE with access OFLAG. If OFLAG includes O_CREAT, ++/* Open FILE with access OFLAG. If O_CREAT or O_TMPFILE is in OFLAG, + a third argument is the file protection. */ + int + __libc_open64 (const char *file, int oflag, ...) + { + int mode = 0; + +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start (arg, oflag); +Index: b/sysdeps/unix/sysv/linux/openat.c +=================================================================== +--- a/sysdeps/unix/sysv/linux/openat.c ++++ b/sysdeps/unix/sysv/linux/openat.c +@@ -148,8 +148,8 @@ OPENAT_NOT_CANCEL (fd, file, oflag, mode + + + /* Open FILE with access OFLAG. Interpret relative paths relative to +- the directory associated with FD. If OFLAG includes O_CREAT, a +- third argument is the file protection. */ ++ the directory associated with FD. If OFLAG includes O_CREAT or ++ O_TMPFILE, a fourth argument is the file protection. */ + int + __OPENAT (fd, file, oflag) + int fd; +@@ -157,7 +157,7 @@ __OPENAT (fd, file, oflag) + int oflag; + { + mode_t mode = 0; +- if (oflag & O_CREAT) ++ if (__OPEN_NEEDS_MODE (oflag)) + { + va_list arg; + va_start (arg, oflag); diff --git a/SOURCES/glibc-rh1330705-5.patch b/SOURCES/glibc-rh1330705-5.patch new file mode 100644 index 0000000..f428286 --- /dev/null +++ b/SOURCES/glibc-rh1330705-5.patch @@ -0,0 +1,386 @@ +commit 85f7554cd97e7f03d8dc66278653045ef63a2221 +Author: Florian Weimer +Date: Wed Sep 21 15:24:01 2016 +0200 + + Add test case for O_TMPFILE handling in open, openat + + Also put xasprintf into test-skeleton.c (written in such a way that + including is not needed). + +commit 51364ff23e9760777bfea4eb9ac89c29a794074b +Author: Florian Weimer +Date: Fri Sep 23 09:41:35 2016 +0200 + + test-skeleton.c: Remove unintended #include . + +Index: b/io/tst-open-tmpfile.c +=================================================================== +--- /dev/null ++++ b/io/tst-open-tmpfile.c +@@ -0,0 +1,319 @@ ++/* Test open and openat with O_TMPFILE. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This test verifies that open and openat work as expected, i.e. they ++ create a deleted file with the requested file mode. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int do_test (void); ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" ++ ++#ifdef O_TMPFILE ++typedef int (*wrapper_func) (const char *, int, mode_t); ++ ++/* Error-checking wrapper for the open function, compatible with the ++ wrapper_func type. */ ++static int ++wrap_open (const char *path, int flags, mode_t mode) ++{ ++ int ret = open (path, flags, mode); ++ if (ret < 0) ++ { ++ printf ("error: open (\"%s\", 0x%x, 0%03o): %m\n", path, flags, mode); ++ exit (1); ++ } ++ return ret; ++} ++ ++/* Error-checking wrapper for the openat function, compatible with the ++ wrapper_func type. */ ++static int ++wrap_openat (const char *path, int flags, mode_t mode) ++{ ++ int ret = openat (AT_FDCWD, path, flags, mode); ++ if (ret < 0) ++ { ++ printf ("error: openat (\"%s\", 0x%x, 0%03o): %m\n", path, flags, mode); ++ exit (1); ++ } ++ return ret; ++} ++ ++/* Error-checking wrapper for the open64 function, compatible with the ++ wrapper_func type. */ ++static int ++wrap_open64 (const char *path, int flags, mode_t mode) ++{ ++ int ret = open64 (path, flags, mode); ++ if (ret < 0) ++ { ++ printf ("error: open64 (\"%s\", 0x%x, 0%03o): %m\n", path, flags, mode); ++ exit (1); ++ } ++ return ret; ++} ++ ++/* Error-checking wrapper for the openat64 function, compatible with the ++ wrapper_func type. */ ++static int ++wrap_openat64 (const char *path, int flags, mode_t mode) ++{ ++ int ret = openat64 (AT_FDCWD, path, flags, mode); ++ if (ret < 0) ++ { ++ printf ("error: openat64 (\"%s\", 0x%x, 0%03o): %m\n", path, flags, mode); ++ exit (1); ++ } ++ return ret; ++} ++ ++/* Return true if FD is flagged as deleted in /proc/self/fd, false if ++ not. */ ++static bool ++is_file_deteted (int fd) ++{ ++ char *proc_fd_path = xasprintf ("/proc/self/fd/%d", fd); ++ char file_path[4096]; ++ ssize_t file_path_length ++ = readlink (proc_fd_path, file_path, sizeof (file_path)); ++ if (file_path_length < 0) ++ { ++ printf ("error: readlink (\"%s\"): %m", proc_fd_path); ++ free (proc_fd_path); ++ exit (1); ++ } ++ free (proc_fd_path); ++ if (file_path_length == sizeof (file_path)) ++ { ++ printf ("error: path in /proc resolves to overlong file name: %.*s\n", ++ (int) file_path_length, file_path); ++ exit (1); ++ } ++ const char *deleted = " (deleted)"; ++ if (file_path_length < strlen (deleted)) ++ { ++ printf ("error: path in /proc is too short: %.*s\n", ++ (int) file_path_length, file_path); ++ exit (1); ++ } ++ return memcmp (file_path + file_path_length - strlen (deleted), ++ deleted, strlen (deleted)) == 0; ++} ++ ++/* Obtain a file name which is difficult to guess. */ ++static char * ++get_random_name (void) ++{ ++ unsigned long long bytes[2]; ++ int random_device = open ("/dev/urandom", O_RDONLY); ++ if (random_device < 0) ++ { ++ printf ("error: open (\"/dev/urandom\"): %m\n"); ++ exit (1); ++ } ++ ssize_t ret = read (random_device, bytes, sizeof (bytes)); ++ if (ret < 0) ++ { ++ printf ("error: read (\"/dev/urandom\"): %m\n"); ++ exit (1); ++ } ++ if (ret != sizeof (bytes)) ++ { ++ printf ("error: short read from /dev/urandom: %zd\n", ret); ++ exit (1); ++ } ++ close (random_device); ++ return xasprintf ("tst-open-tmpfile-%08llx%08llx.tmp", bytes[0], bytes[1]); ++} ++ ++/* Check open/openat (as specified by OP and WRAPPER) with a specific ++ PATH/FLAGS/MODE combination. */ ++static void ++check_wrapper_flags_mode (const char *op, wrapper_func wrapper, ++ const char *path, int flags, mode_t mode) ++{ ++ int fd = wrapper (path, flags | O_TMPFILE, mode); ++ struct stat64 st; ++ if (fstat64 (fd, &st) != 0) ++ { ++ printf ("error: fstat64: %m\n"); ++ exit (1); ++ } ++ ++ /* Verify that the mode was correctly processed. */ ++ int actual_mode = st.st_mode & 0777; ++ if (actual_mode != mode) ++ { ++ printf ("error: unexpected mode; expected 0%03o, actual 0%03o\n", ++ mode, actual_mode); ++ exit (1); ++ } ++ ++ /* Check that the file is marked as deleted in /proc. */ ++ if (!is_file_deteted (fd)) ++ { ++ printf ("error: path in /proc is not marked as deleted\n"); ++ exit (1); ++ } ++ ++ /* Check that the file can be turned into a regular file with ++ linkat. Open a file descriptor for the directory at PATH. Use ++ AT_FDCWD if PATH is ".", to exercise that functionality as ++ well. */ ++ int path_fd; ++ if (strcmp (path, ".") == 0) ++ path_fd = AT_FDCWD; ++ else ++ { ++ path_fd = open (path, O_RDONLY | O_DIRECTORY); ++ if (path_fd < 0) ++ { ++ printf ("error: open (\"%s\"): %m\n", path); ++ exit (1); ++ } ++ } ++ ++ /* Use a hard-to-guess name for the new directory entry. */ ++ char *new_name = get_random_name (); ++ ++ /* linkat does not require privileges if the path in /proc/self/fd ++ is used. */ ++ char *proc_fd_path = xasprintf ("/proc/self/fd/%d", fd); ++ if (linkat (AT_FDCWD, proc_fd_path, path_fd, new_name, ++ AT_SYMLINK_FOLLOW) == 0) ++ { ++ if (unlinkat (path_fd, new_name, 0) != 0 && errno != ENOENT) ++ { ++ printf ("error: unlinkat (\"%s/%s\"): %m\n", path, new_name); ++ exit (1); ++ } ++ } ++ else ++ { ++ /* linkat failed. This is expected if O_EXCL was specified. */ ++ if ((flags & O_EXCL) == 0) ++ { ++ printf ("error: linkat failed after %s (\"%s\", 0x%x, 0%03o): %m\n", ++ op, path, flags, mode); ++ exit (1); ++ } ++ } ++ ++ free (proc_fd_path); ++ free (new_name); ++ if (path_fd != AT_FDCWD) ++ close (path_fd); ++ close (fd); ++} ++ ++/* Check OP/WRAPPER with various flags at a specific PATH and ++ MODE. */ ++static void ++check_wrapper_mode (const char *op, wrapper_func wrapper, ++ const char *path, mode_t mode) ++{ ++ check_wrapper_flags_mode (op, wrapper, path, O_WRONLY, mode); ++ check_wrapper_flags_mode (op, wrapper, path, O_WRONLY | O_EXCL, mode); ++ check_wrapper_flags_mode (op, wrapper, path, O_RDWR, mode); ++ check_wrapper_flags_mode (op, wrapper, path, O_RDWR | O_EXCL, mode); ++} ++ ++/* Check open/openat with varying permissions. */ ++static void ++check_wrapper (const char *op, wrapper_func wrapper, ++ const char *path) ++{ ++ printf ("info: testing %s at: %s\n", op, path); ++ check_wrapper_mode (op, wrapper, path, 0); ++ check_wrapper_mode (op, wrapper, path, 0640); ++ check_wrapper_mode (op, wrapper, path, 0600); ++ check_wrapper_mode (op, wrapper, path, 0755); ++ check_wrapper_mode (op, wrapper, path, 0750); ++} ++ ++/* Verify that the directory at PATH supports O_TMPFILE. Exit with ++ status 77 (unsupported) if the kernel does not support O_TMPFILE. ++ Even with kernel support, not all file systems O_TMPFILE, so return ++ true if the directory supports O_TMPFILE, false if not. */ ++static bool ++probe_path (const char *path) ++{ ++ int fd = openat (AT_FDCWD, path, O_TMPFILE | O_RDWR, 0); ++ if (fd < 0) ++ { ++ if (errno == EISDIR) ++ /* The system does not support O_TMPFILE. */ ++ { ++ printf ("info: kernel does not support O_TMPFILE\n"); ++ exit (77); ++ } ++ if (errno == EOPNOTSUPP) ++ { ++ printf ("info: path does not support O_TMPFILE: %s\n", path); ++ return false; ++ } ++ printf ("error: openat (\"%s\", O_TMPFILE | O_RDWR): %m\n", path); ++ exit (1); ++ } ++ close (fd); ++ return true; ++} ++ ++static int ++do_test (void) ++{ ++ umask (0); ++ const char *paths[] = { ".", "/dev/shm", "/tmp", ++ getenv ("TEST_TMPFILE_PATH"), ++ NULL }; ++ bool supported = false; ++ for (int i = 0; paths[i] != NULL; ++i) ++ if (probe_path (paths[i])) ++ { ++ supported = true; ++ check_wrapper ("open", wrap_open, paths[i]); ++ check_wrapper ("openat", wrap_openat, paths[i]); ++ check_wrapper ("open64", wrap_open64, paths[i]); ++ check_wrapper ("openat64", wrap_openat64, paths[i]); ++ } ++ ++ if (!supported) ++ return 77; ++ ++ return 0; ++} ++ ++#else /* !O_TMPFILE */ ++ ++static int ++do_test (void) ++{ ++ return 77; ++} ++ ++#endif /* O_TMPFILE */ +Index: b/test-skeleton.c +=================================================================== +--- a/test-skeleton.c ++++ b/test-skeleton.c +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include + + /* The test function is normally called `do_test' and it is called + with argc and argv as the arguments. We nevertheless provide the +@@ -63,6 +64,20 @@ static pid_t pid; + /* Directory to place temporary files in. */ + static const char *test_dir; + ++/* Call asprintf with error checking. */ ++__attribute__ ((always_inline, format (printf, 1, 2))) ++static __inline__ char * ++xasprintf (const char *format, ...) ++{ ++ char *result; ++ if (asprintf (&result, format, __builtin_va_arg_pack ()) < 0) ++ { ++ printf ("error: asprintf: %m\n"); ++ exit (1); ++ } ++ return result; ++} ++ + /* List of temporary files. */ + struct temp_name_list + { +Index: b/io/Makefile +=================================================================== +--- a/io/Makefile ++++ b/io/Makefile +@@ -69,7 +69,8 @@ tests := test-utime test-stat test-stat + tst-renameat tst-fchownat tst-fchmodat tst-faccessat \ + tst-symlinkat tst-linkat tst-readlinkat tst-mkdirat \ + tst-mknodat tst-mkfifoat tst-ttyname_r bug-ftw5 \ +- tst-posix_fallocate ++ tst-posix_fallocate \ ++ tst-open-tmpfile + + include ../Rules + diff --git a/SOURCES/glibc-rh1330705-6.patch b/SOURCES/glibc-rh1330705-6.patch new file mode 100644 index 0000000..85581a1 --- /dev/null +++ b/SOURCES/glibc-rh1330705-6.patch @@ -0,0 +1,94 @@ +Do not expose O_TMPFILE in a public header. + +Adjust tst-open-tmpfile so that it exits with 0 in case of missing +kernel support. + +Index: b/io/tst-open-tmpfile.c +=================================================================== +--- a/io/tst-open-tmpfile.c ++++ b/io/tst-open-tmpfile.c +@@ -33,7 +33,6 @@ static int do_test (void); + #define TEST_FUNCTION do_test () + #include "../test-skeleton.c" + +-#ifdef O_TMPFILE + typedef int (*wrapper_func) (const char *, int, mode_t); + + /* Error-checking wrapper for the open function, compatible with the +@@ -157,7 +156,7 @@ static void + check_wrapper_flags_mode (const char *op, wrapper_func wrapper, + const char *path, int flags, mode_t mode) + { +- int fd = wrapper (path, flags | O_TMPFILE, mode); ++ int fd = wrapper (path, flags | __O_TMPFILE, mode); + struct stat64 st; + if (fstat64 (fd, &st) != 0) + { +@@ -257,20 +256,20 @@ check_wrapper (const char *op, wrapper_f + } + + /* Verify that the directory at PATH supports O_TMPFILE. Exit with +- status 77 (unsupported) if the kernel does not support O_TMPFILE. +- Even with kernel support, not all file systems O_TMPFILE, so return +- true if the directory supports O_TMPFILE, false if not. */ ++ status 0 if the kernel does not support O_TMPFILE. Even with ++ kernel support, not all file systems O_TMPFILE, so return true if ++ the directory supports O_TMPFILE, false if not. */ + static bool + probe_path (const char *path) + { +- int fd = openat (AT_FDCWD, path, O_TMPFILE | O_RDWR, 0); ++ int fd = openat (AT_FDCWD, path, __O_TMPFILE | O_RDWR, 0); + if (fd < 0) + { + if (errno == EISDIR) + /* The system does not support O_TMPFILE. */ + { + printf ("info: kernel does not support O_TMPFILE\n"); +- exit (77); ++ exit (0); + } + if (errno == EOPNOTSUPP) + { +@@ -291,29 +290,14 @@ do_test (void) + const char *paths[] = { ".", "/dev/shm", "/tmp", + getenv ("TEST_TMPFILE_PATH"), + NULL }; +- bool supported = false; + for (int i = 0; paths[i] != NULL; ++i) + if (probe_path (paths[i])) + { +- supported = true; + check_wrapper ("open", wrap_open, paths[i]); + check_wrapper ("openat", wrap_openat, paths[i]); + check_wrapper ("open64", wrap_open64, paths[i]); + check_wrapper ("openat64", wrap_openat64, paths[i]); + } + +- if (!supported) +- return 77; +- + return 0; + } +- +-#else /* !O_TMPFILE */ +- +-static int +-do_test (void) +-{ +- return 77; +-} +- +-#endif /* O_TMPFILE */ +Index: b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h +=================================================================== +--- a/sysdeps/unix/sysv/linux/bits/fcntl-linux.h ++++ b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h +@@ -131,7 +131,6 @@ + # define O_DIRECT __O_DIRECT /* Direct disk access. */ + # define O_NOATIME __O_NOATIME /* Do not set atime. */ + # define O_PATH __O_PATH /* Resolve pathname but do not open file. */ +-# define O_TMPFILE __O_TMPFILE /* Atomically create nameless file. */ + #endif + + /* For now, Linux has no separate synchronicitiy options for read diff --git a/SOURCES/glibc-rh1337242.patch b/SOURCES/glibc-rh1337242.patch new file mode 100644 index 0000000..04194ed --- /dev/null +++ b/SOURCES/glibc-rh1337242.patch @@ -0,0 +1,62 @@ +commit b3a810d0d3d5c6ce7ddfb61321cd7971808ca703 +Author: Stefan Liebler +Date: Tue May 17 10:45:48 2016 +0200 + + Fix tst-cancel17/tst-cancelx17, which sometimes segfaults while exiting. + + The testcase tst-cancel[x]17 ends sometimes with a segmentation fault. + This happens in one of 10000 cases. Then the real testcase has already + exited with success and returned from do_test(). The segmentation fault + occurs after returning from main in _dl_fini(). + + In those cases, the aio_read(&a) was not canceled because the read + request was already in progress. In the meanwhile aio_write(ap) wrote + something to the pipe and the read request is able to read the + requested byte. + The read request hasn't finished before returning from do_test(). + After it finishes, it writes the return value and error code from the + read syscall to the struct aiocb a, which lies on the stack of do_test. + The stack of the subsequent function call of _dl_fini or _dl_sort_fini, + which is inlined in _dl_fini is corrupted. + + In case of S390, it reads a zero and decrements it by 1: + unsigned int k = nmaps - 1; + struct link_map **runp = maps[k]->l_initfini; + The load from unmapped memory leads to the segmentation fault. + The stack corruption also happens on other architectures. + I saw them e.g. on x86 and ppc, too. + + This patch adds an aio_suspend call to ensure, that the read request + is finished before returning from do_test(). + + ChangeLog: + + * nptl/tst-cancel17.c (do_test): Wait for finishing aio_read(&a). + +diff --git a/nptl/tst-cancel17.c b/nptl/tst-cancel17.c +index fb89292..eedd28e 100644 +--- a/nptl/tst-cancel17.c ++++ b/nptl/tst-cancel17.c +@@ -333,6 +333,22 @@ do_test (void) + + puts ("early cancellation succeeded"); + ++ if (ap == &a2) ++ { ++ /* The aio_read(&a) was not canceled because the read request was ++ already in progress. In the meanwhile aio_write(ap) wrote something ++ to the pipe and the read request either has already been finished or ++ is able to read the requested byte. ++ Wait for the read request before returning from this function because ++ the return value and error code from the read syscall will be written ++ to the struct aiocb a, which lies on the stack of this function. ++ Otherwise the stack from subsequent function calls - e.g. _dl_fini - ++ will be corrupted, which can lead to undefined behaviour like a ++ segmentation fault. */ ++ const struct aiocb *l[1] = { &a }; ++ TEMP_FAILURE_RETRY (aio_suspend(l, 1, NULL)); ++ } ++ + return 0; + } + diff --git a/SOURCES/glibc-rh1338672.patch b/SOURCES/glibc-rh1338672.patch new file mode 100644 index 0000000..eb89666 --- /dev/null +++ b/SOURCES/glibc-rh1338672.patch @@ -0,0 +1,296 @@ +commit 3375cfafa7961c6ae0e509c31c3b3cef9ad1f03d +Author: Florian Weimer +Date: Mon May 23 19:43:09 2016 +0200 + + Make padding in struct sockaddr_storage explicit [BZ #20111] + + This avoids aliasing issues with GCC 6 in -fno-strict-aliasing + mode. (With implicit padding, not all data is copied.) + + This change makes it explicit that struct sockaddr_storage is + only 126 bytes large on m68k (unlike elsewhere, where we end up + with the requested 128 bytes). The new test case makes sure that + this does not happen on other architectures. + +[modified by DJ Delorie for RHEL] + +diff -rupN a/bits/sockaddr.h b/bits/sockaddr.h +--- a/bits/sockaddr.h 2012-12-24 22:02:13.000000000 -0500 ++++ b/bits/sockaddr.h 2017-03-01 16:54:46.606261055 -0500 +@@ -1,4 +1,4 @@ +-/* Definition of `struct sockaddr_*' common members. Generic/4.2 BSD version. ++/* Definition of struct sockaddr_* common members and sizes, generic version. + Copyright (C) 1995,1996,1997,1998,2000,2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + +@@ -36,4 +36,7 @@ typedef unsigned short int sa_family_t; + + #define __SOCKADDR_COMMON_SIZE (sizeof (unsigned short int)) + ++/* Size of struct sockaddr_storage. */ ++#define _SS_SIZE 128 ++ + #endif /* bits/sockaddr.h */ +diff -rupN a/bits/socket.h b/bits/socket.h +--- a/bits/socket.h 2012-12-24 22:02:13.000000000 -0500 ++++ b/bits/socket.h 2017-03-01 16:38:24.861208175 -0500 +@@ -133,20 +133,20 @@ struct sockaddr + + + /* Structure large enough to hold any socket address (with the historical +- exception of AF_UNIX). We reserve 128 bytes. */ ++ exception of AF_UNIX). */ + #if ULONG_MAX > 0xffffffff + # define __ss_aligntype __uint64_t + #else + # define __ss_aligntype __uint32_t + #endif +-#define _SS_SIZE 128 +-#define _SS_PADSIZE (_SS_SIZE - (2 * sizeof (__ss_aligntype))) ++#define _SS_PADSIZE \ ++ (_SS_SIZE - __SOCKADDR_COMMON_SIZE - sizeof (__ss_aligntype)) + + struct sockaddr_storage + { + __SOCKADDR_COMMON (ss_); /* Address family, etc. */ +- __ss_aligntype __ss_align; /* Force desired alignment. */ + char __ss_padding[_SS_PADSIZE]; ++ __ss_aligntype __ss_align; /* Force desired alignment. */ + }; + + +diff -rupN a/inet/Makefile b/inet/Makefile +--- a/inet/Makefile 2017-03-01 16:06:12.000000000 -0500 ++++ b/inet/Makefile 2017-03-01 16:55:21.919485376 -0500 +@@ -51,7 +51,7 @@ aux := check_pf check_native ifreq + + tests := htontest test_ifindex tst-ntoa tst-ether_aton tst-network \ + tst-gethnm test-ifaddrs bug-if1 test-inet6_opt tst-ether_line \ +- tst-getni1 tst-getni2 tst-inet6_rth tst-checks tst-deadline ++ tst-getni1 tst-getni2 tst-inet6_rth tst-checks tst-deadline tst-sockaddr + + # tst-deadline must be linked statically so that we can access + # internal functions. +@@ -89,6 +89,8 @@ CFLAGS-either_hton.c = -fexceptions + CFLAGS-getnetgrent.c = -fexceptions + CFLAGS-getnetgrent_r.c = -fexceptions + ++CFLAGS-tst-sockaddr.c = -fno-strict-aliasing ++ + endif + + ifeq ($(build-static-nss),yes) +diff -rupN a/inet/tst-sockaddr.c b/inet/tst-sockaddr.c +--- a/inet/tst-sockaddr.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/inet/tst-sockaddr.c 2017-03-01 16:38:24.869208278 -0500 +@@ -0,0 +1,125 @@ ++/* Tests for socket address type definitions. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* This is a copy of the previous definition of struct ++ sockaddr_storage. It is not equal to the old value of _SS_SIZE ++ (128) on all architectures. We must stay compatible with the old ++ definition. */ ++ ++#define OLD_REFERENCE_SIZE 128 ++#define OLD_PADSIZE (OLD_REFERENCE_SIZE - (2 * sizeof (__ss_aligntype))) ++struct sockaddr_storage_old ++ { ++ __SOCKADDR_COMMON (old_); ++ __ss_aligntype old_align; ++ char old_padding[OLD_PADSIZE]; ++ }; ++ ++static bool errors; ++ ++static void ++check (bool ok, const char *message) ++{ ++ if (!ok) ++ { ++ printf ("error: failed check: %s\n", message); ++ errors = true; ++ } ++} ++ ++static int ++do_test (void) ++{ ++ check (OLD_REFERENCE_SIZE >= _SS_SIZE, ++ "old target size is not smaller than actual size"); ++ check (sizeof (struct sockaddr_storage_old) ++ == sizeof (struct sockaddr_storage), ++ "old and new sizes match"); ++ check (__alignof (struct sockaddr_storage_old) ++ == __alignof (struct sockaddr_storage), ++ "old and new alignment matches"); ++ check (offsetof (struct sockaddr_storage_old, old_family) ++ == offsetof (struct sockaddr_storage, ss_family), ++ "old and new family offsets match"); ++ check (sizeof (struct sockaddr_storage) == _SS_SIZE, ++ "struct sockaddr_storage size"); ++ ++ /* Check for lack of holes in the struct definition. */ ++ check (offsetof (struct sockaddr_storage, __ss_padding) ++ == __SOCKADDR_COMMON_SIZE, ++ "implicit padding before explicit padding"); ++ check (offsetof (struct sockaddr_storage, __ss_align) ++ == __SOCKADDR_COMMON_SIZE ++ + sizeof (((struct sockaddr_storage) {}).__ss_padding), ++ "implicit padding before explicit padding"); ++ ++ /* Check for POSIX compatibility requirements between struct ++ sockaddr_storage and struct sockaddr_un. */ ++ check (sizeof (struct sockaddr_storage) >= sizeof (struct sockaddr_un), ++ "sockaddr_storage is at least as large as sockaddr_un"); ++ check (__alignof (struct sockaddr_storage) ++ >= __alignof (struct sockaddr_un), ++ "sockaddr_storage is at least as aligned as sockaddr_un"); ++ check (offsetof (struct sockaddr_storage, ss_family) ++ == offsetof (struct sockaddr_un, sun_family), ++ "family offsets match"); ++ ++ /* Check that the compiler preserves bit patterns in aggregate ++ copies. Based on . */ ++ check (sizeof (struct sockaddr_storage) >= sizeof (struct sockaddr_in), ++ "sockaddr_storage is at least as large as sockaddr_in"); ++ { ++ struct sockaddr_storage addr; ++ memset (&addr, 0, sizeof (addr)); ++ { ++ struct sockaddr_in *sinp = (struct sockaddr_in *)&addr; ++ sinp->sin_family = AF_INET; ++ sinp->sin_addr.s_addr = htonl (INADDR_LOOPBACK); ++ sinp->sin_port = htons (80); ++ } ++ struct sockaddr_storage copy; ++ copy = addr; ++ ++ struct sockaddr_storage *p = malloc (sizeof (*p)); ++ if (p == NULL) ++ { ++ printf ("error: malloc: %m\n"); ++ return 1; ++ } ++ *p = copy; ++ const struct sockaddr_in *sinp = (const struct sockaddr_in *)p; ++ check (sinp->sin_family == AF_INET, "sin_family"); ++ check (sinp->sin_addr.s_addr == htonl (INADDR_LOOPBACK), "sin_addr"); ++ check (sinp->sin_port == htons (80), "sin_port"); ++ free (p); ++ } ++ ++ return errors; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" +diff -rupN a/sysdeps/mach/hurd/bits/socket.h b/sysdeps/mach/hurd/bits/socket.h +--- a/sysdeps/mach/hurd/bits/socket.h 2012-12-24 22:02:13.000000000 -0500 ++++ b/sysdeps/mach/hurd/bits/socket.h 2017-03-01 16:38:24.873208329 -0500 +@@ -156,20 +156,20 @@ struct sockaddr + + + /* Structure large enough to hold any socket address (with the historical +- exception of AF_UNIX). We reserve 128 bytes. */ ++ exception of AF_UNIX). */ + #if ULONG_MAX > 0xffffffff + # define __ss_aligntype __uint64_t + #else + # define __ss_aligntype __uint32_t + #endif +-#define _SS_SIZE 128 +-#define _SS_PADSIZE (_SS_SIZE - (2 * sizeof (__ss_aligntype))) ++#define _SS_PADSIZE \ ++ (_SS_SIZE - __SOCKADDR_COMMON_SIZE - sizeof (__ss_aligntype)) + + struct sockaddr_storage + { + __SOCKADDR_COMMON (ss_); /* Address family, etc. */ +- __ss_aligntype __ss_align; /* Force desired alignment. */ + char __ss_padding[_SS_PADSIZE]; ++ __ss_aligntype __ss_align; /* Force desired alignment. */ + }; + + +diff -rupN a/sysdeps/unix/bsd/bsd4.4/bits/sockaddr.h b/sysdeps/unix/bsd/bsd4.4/bits/sockaddr.h +--- a/sysdeps/unix/bsd/bsd4.4/bits/sockaddr.h 2012-12-24 22:02:13.000000000 -0500 ++++ b/sysdeps/unix/bsd/bsd4.4/bits/sockaddr.h 2017-03-01 17:30:34.962261748 -0500 +@@ -37,6 +37,9 @@ typedef unsigned char sa_family_t; + + #define __SOCKADDR_COMMON_SIZE (2 * sizeof (unsigned char)) + ++/* Size of struct sockaddr_storage. */ ++#define _SS_SIZE 128 ++ + #define _HAVE_SA_LEN 1 /* We have the sa_len field. */ + + #endif /* bits/sockaddr.h */ +diff -rupN a/sysdeps/unix/bsd/bsd4.4/bits/socket.h b/sysdeps/unix/bsd/bsd4.4/bits/socket.h +--- a/sysdeps/unix/bsd/bsd4.4/bits/socket.h 2012-12-24 22:02:13.000000000 -0500 ++++ b/sysdeps/unix/bsd/bsd4.4/bits/socket.h 2017-03-01 17:31:23.790246360 -0500 +@@ -142,14 +142,13 @@ struct sockaddr + #else + # define __ss_aligntype __uint32_t + #endif +-#define _SS_SIZE 128 +-#define _SS_PADSIZE (_SS_SIZE - (2 * sizeof (__ss_aligntype))) ++#define _SS_PADSIZE (_SS_SIZE - __SOCKADDR_COMMON_SIZE - sizeof (__ss_aligntype)) + + struct sockaddr_storage + { + __SOCKADDR_COMMON (ss_); /* Address family, etc. */ +- __ss_aligntype __ss_align; /* Force desired alignment. */ + char __ss_padding[_SS_PADSIZE]; ++ __ss_aligntype __ss_align; /* Force desired alignment. */ + }; + + +diff -rupN a/sysdeps/unix/sysv/linux/bits/socket.h b/sysdeps/unix/sysv/linux/bits/socket.h +--- a/sysdeps/unix/sysv/linux/bits/socket.h 2017-03-01 16:06:12.000000000 -0500 ++++ b/sysdeps/unix/sysv/linux/bits/socket.h 2017-03-01 16:38:26.993235460 -0500 +@@ -155,16 +155,16 @@ struct sockaddr + + + /* Structure large enough to hold any socket address (with the historical +- exception of AF_UNIX). We reserve 128 bytes. */ ++ exception of AF_UNIX). */ + #define __ss_aligntype unsigned long int +-#define _SS_SIZE 128 +-#define _SS_PADSIZE (_SS_SIZE - (2 * sizeof (__ss_aligntype))) ++#define _SS_PADSIZE \ ++ (_SS_SIZE - __SOCKADDR_COMMON_SIZE - sizeof (__ss_aligntype)) + + struct sockaddr_storage + { + __SOCKADDR_COMMON (ss_); /* Address family, etc. */ +- __ss_aligntype __ss_align; /* Force desired alignment. */ + char __ss_padding[_SS_PADSIZE]; ++ __ss_aligntype __ss_align; /* Force desired alignment. */ + }; + + diff --git a/SOURCES/glibc-rh1374652.patch b/SOURCES/glibc-rh1374652.patch new file mode 100644 index 0000000..e753e51 --- /dev/null +++ b/SOURCES/glibc-rh1374652.patch @@ -0,0 +1,1212 @@ +From 8f5e8b01a1da2a207228f2072c934fa5918554b8 Mon Sep 17 00:00:00 2001 +From: Joseph Myers +Date: Fri, 4 Dec 2015 20:36:28 +0000 +Subject: [PATCH] Fix nan functions handling of payload strings (bug 16961, bug + 16962). + +The nan, nanf and nanl functions handle payload strings by doing e.g.: + + if (tagp[0] != '\0') + { + char buf[6 + strlen (tagp)]; + sprintf (buf, "NAN(%s)", tagp); + return strtod (buf, NULL); + } + +This is an unbounded stack allocation based on the length of the +argument. Furthermore, if the argument starts with an n-char-sequence +followed by ')', that n-char-sequence is wrongly treated as +significant for determining the payload of the resulting NaN, when ISO +C says the call should be equivalent to strtod ("NAN", NULL), without +being affected by that initial n-char-sequence. This patch fixes both +those problems by using the __strtod_nan etc. functions recently +factored out of strtod etc. for that purpose, with those functions +being exported from libc at version GLIBC_PRIVATE. + +Tested for x86_64, x86, mips64 and powerpc. + + [BZ #16961] + [BZ #16962] + * math/s_nan.c (__nan): Use __strtod_nan instead of constructing a + string on the stack for strtod. + * math/s_nanf.c (__nanf): Use __strtof_nan instead of constructing + a string on the stack for strtof. + * math/s_nanl.c (__nanl): Use __strtold_nan instead of + constructing a string on the stack for strtold. + * stdlib/Versions (libc): Add __strtof_nan, __strtod_nan and + __strtold_nan to GLIBC_PRIVATE. + * math/test-nan-overflow.c: New file. + * math/test-nan-payload.c: Likewise. + * math/Makefile (tests): Add test-nan-overflow and + test-nan-payload. + +From e02cabecf0d025ec4f4ddee290bdf7aadb873bb3 Mon Sep 17 00:00:00 2001 +From: Joseph Myers +Date: Tue, 24 Nov 2015 22:24:52 +0000 +Subject: [PATCH] Refactor strtod parsing of NaN payloads. + +The nan* functions handle their string argument by constructing a +NAN(...) string on the stack as a VLA and passing it to strtod +functions. + +This approach has problems discussed in bug 16961 and bug 16962: the +stack usage is unbounded, and it gives incorrect results in certain +cases where the argument is not a valid n-char-sequence. + +The natural fix for both issues is to refactor the NaN payload parsing +out of strtod into a separate function that the nan* functions can +call directly, so that no temporary string needs constructing on the +stack at all. This patch does that refactoring in preparation for +fixing those bugs (but without actually using the new functions from +nan* - which will also require exporting them from libc at version +GLIBC_PRIVATE). This patch is not intended to change any user-visible +behavior, so no tests are added (fixes for the above bugs will of +course add tests for them). + +This patch builds on my recent fixes for strtol and strtod issues in +Turkish locales. Given those fixes, the parsing of NaN payloads is +locale-independent; thus, the new functions do not need to take a +locale_t argument. + +Tested for x86_64, x86, mips64 and powerpc. + + * stdlib/strtod_nan.c: New file. + * stdlib/strtod_nan_double.h: Likewise. + * stdlib/strtod_nan_float.h: Likewise. + * stdlib/strtod_nan_main.c: Likewise. + * stdlib/strtod_nan_narrow.h: Likewise. + * stdlib/strtod_nan_wide.h: Likewise. + * stdlib/strtof_nan.c: Likewise. + * stdlib/strtold_nan.c: Likewise. + * sysdeps/ieee754/ldbl-128/strtod_nan_ldouble.h: Likewise. + * sysdeps/ieee754/ldbl-128ibm/strtod_nan_ldouble.h: Likewise. + * sysdeps/ieee754/ldbl-96/strtod_nan_ldouble.h: Likewise. + * wcsmbs/wcstod_nan.c: Likewise. + * wcsmbs/wcstof_nan.c: Likewise. + * wcsmbs/wcstold_nan.c: Likewise. + * stdlib/Makefile (routines): Add strtof_nan, strtod_nan and + strtold_nan. + * wcsmbs/Makefile (routines): Add wcstod_nan, wcstold_nan and + wcstof_nan. + * include/stdlib.h (__strtof_nan): Declare and use + libc_hidden_proto. + (__strtod_nan): Likewise. + (__strtold_nan): Likewise. + (__wcstof_nan): Likewise. + (__wcstod_nan): Likewise. + (__wcstold_nan): Likewise. + * include/wchar.h (____wcstoull_l_internal): Declare. + * stdlib/strtod_l.c: Do not include . + (____strtoull_l_internal): Remove declaration. + (STRTOF_NAN): Define macro. + (SET_MANTISSA): Remove macro. + (STRTOULL): Likewise. + (____STRTOF_INTERNAL): Use STRTOF_NAN to parse NaN payload. + * stdlib/strtof_l.c (____strtoull_l_internal): Remove declaration. + (STRTOF_NAN): Define macro. + (SET_MANTISSA): Remove macro. + * sysdeps/ieee754/ldbl-128/strtold_l.c (STRTOF_NAN): Define macro. + (SET_MANTISSA): Remove macro. + * sysdeps/ieee754/ldbl-128ibm/strtold_l.c (STRTOF_NAN): Define + macro. + (SET_MANTISSA): Remove macro. + * sysdeps/ieee754/ldbl-64-128/strtold_l.c (STRTOF_NAN): Define + macro. + (SET_MANTISSA): Remove macro. + * sysdeps/ieee754/ldbl-96/strtold_l.c (STRTOF_NAN): Define macro. + (SET_MANTISSA): Remove macro. + * wcsmbs/wcstod_l.c (____wcstoull_l_internal): Remove declaration. + * wcsmbs/wcstof_l.c (____wcstoull_l_internal): Likewise. + * wcsmbs/wcstold_l.c (____wcstoull_l_internal): Likewise. + +diff -rupN a/include/stdlib.h b/include/stdlib.h +--- a/include/stdlib.h 2017-03-02 16:34:01.000000000 -0500 ++++ b/include/stdlib.h 2017-03-02 16:45:05.457639119 -0500 +@@ -193,6 +193,24 @@ libc_hidden_proto (strtoll) + libc_hidden_proto (strtoul) + libc_hidden_proto (strtoull) + ++extern float __strtof_nan (const char *, char **, char) internal_function; ++extern double __strtod_nan (const char *, char **, char) internal_function; ++extern long double __strtold_nan (const char *, char **, char) ++ internal_function; ++extern float __wcstof_nan (const wchar_t *, wchar_t **, wchar_t) ++ internal_function; ++extern double __wcstod_nan (const wchar_t *, wchar_t **, wchar_t) ++ internal_function; ++extern long double __wcstold_nan (const wchar_t *, wchar_t **, wchar_t) ++ internal_function; ++ ++libc_hidden_proto (__strtof_nan) ++libc_hidden_proto (__strtod_nan) ++libc_hidden_proto (__strtold_nan) ++libc_hidden_proto (__wcstof_nan) ++libc_hidden_proto (__wcstod_nan) ++libc_hidden_proto (__wcstold_nan) ++ + extern char *__ecvt (double __value, int __ndigit, int *__restrict __decpt, + int *__restrict __sign); + extern char *__fcvt (double __value, int __ndigit, int *__restrict __decpt, +diff -rupN a/include/wchar.h b/include/wchar.h +--- a/include/wchar.h 2012-12-24 22:02:13.000000000 -0500 ++++ b/include/wchar.h 2017-03-02 16:45:05.461639109 -0500 +@@ -52,6 +52,9 @@ extern unsigned long long int __wcstoull + __restrict __endptr, + int __base, + int __group) __THROW; ++extern unsigned long long int ____wcstoull_l_internal (const wchar_t *, ++ wchar_t **, int, int, ++ __locale_t); + libc_hidden_proto (__wcstof_internal) + libc_hidden_proto (__wcstod_internal) + libc_hidden_proto (__wcstold_internal) +diff -rupN a/math/Makefile b/math/Makefile +--- a/math/Makefile 2017-03-02 16:34:02.000000000 -0500 ++++ b/math/Makefile 2017-03-02 16:44:30.659725844 -0500 +@@ -88,7 +88,8 @@ long-c-yes = $(calls:=l) + tests = test-matherr test-fenv atest-exp atest-sincos atest-exp2 basic-test \ + test-misc test-fpucw tst-definitions test-tgmath test-tgmath-ret \ + bug-nextafter bug-nexttoward bug-tgmath1 test-tgmath-int \ +- test-tgmath2 test-powl tst-CMPLX tst-CMPLX2 ++ test-tgmath2 test-powl tst-CMPLX tst-CMPLX2 \ ++ test-nan-overflow test-nan-payload + # We do the `long double' tests only if this data type is available and + # distinct from `double'. + test-longdouble-yes = test-ldouble test-ildoubl +diff -rupN a/math/s_nan.c b/math/s_nan.c +--- a/math/s_nan.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/math/s_nan.c 2017-03-02 16:43:01.680999065 -0500 +@@ -28,14 +28,7 @@ + double + __nan (const char *tagp) + { +- if (tagp[0] != '\0') +- { +- char buf[6 + strlen (tagp)]; +- sprintf (buf, "NAN(%s)", tagp); +- return strtod (buf, NULL); +- } +- +- return NAN; ++ return __strtod_nan (tagp, NULL, 0); + } + weak_alias (__nan, nan) + #ifdef NO_LONG_DOUBLE +diff -rupN a/math/s_nanf.c b/math/s_nanf.c +--- a/math/s_nanf.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/math/s_nanf.c 2017-03-02 16:43:01.683999054 -0500 +@@ -28,13 +28,6 @@ + float + __nanf (const char *tagp) + { +- if (tagp[0] != '\0') +- { +- char buf[6 + strlen (tagp)]; +- sprintf (buf, "NAN(%s)", tagp); +- return strtof (buf, NULL); +- } +- +- return NAN; ++ return __strtof_nan (tagp, NULL, 0); + } + weak_alias (__nanf, nanf) +diff -rupN a/math/s_nanl.c b/math/s_nanl.c +--- a/math/s_nanl.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/math/s_nanl.c 2017-03-02 16:43:01.686999044 -0500 +@@ -28,13 +28,6 @@ + long double + __nanl (const char *tagp) + { +- if (tagp[0] != '\0') +- { +- char buf[6 + strlen (tagp)]; +- sprintf (buf, "NAN(%s)", tagp); +- return strtold (buf, NULL); +- } +- +- return NAN; ++ return __strtold_nan (tagp, NULL, 0); + } + weak_alias (__nanl, nanl) +diff -rupN a/math/test-nan-overflow.c b/math/test-nan-overflow.c +--- a/math/test-nan-overflow.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/math/test-nan-overflow.c 2017-03-02 16:43:01.689999033 -0500 +@@ -0,0 +1,66 @@ ++/* Test nan functions stack overflow (bug 16962). ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++#define STACK_LIM 1048576 ++#define STRING_SIZE (2 * STACK_LIM) ++ ++static int ++do_test (void) ++{ ++ int result = 0; ++ struct rlimit lim; ++ getrlimit (RLIMIT_STACK, &lim); ++ lim.rlim_cur = STACK_LIM; ++ setrlimit (RLIMIT_STACK, &lim); ++ char *nanstr = malloc (STRING_SIZE); ++ if (nanstr == NULL) ++ { ++ puts ("malloc failed, cannot test"); ++ return 77; ++ } ++ memset (nanstr, '0', STRING_SIZE - 1); ++ nanstr[STRING_SIZE - 1] = 0; ++#define NAN_TEST(TYPE, FUNC) \ ++ do \ ++ { \ ++ char *volatile p = nanstr; \ ++ volatile TYPE v = FUNC (p); \ ++ if (isnan (v)) \ ++ puts ("PASS: " #FUNC); \ ++ else \ ++ { \ ++ puts ("FAIL: " #FUNC); \ ++ result = 1; \ ++ } \ ++ } \ ++ while (0) ++ NAN_TEST (float, nanf); ++ NAN_TEST (double, nan); ++#ifndef NO_LONG_DOUBLE ++ NAN_TEST (long double, nanl); ++#endif ++ return result; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" +diff -rupN a/math/test-nan-payload.c b/math/test-nan-payload.c +--- a/math/test-nan-payload.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/math/test-nan-payload.c 2017-03-02 16:43:01.693999019 -0500 +@@ -0,0 +1,122 @@ ++/* Test nan functions payload handling (bug 16961). ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* Avoid built-in functions. */ ++#define WRAP_NAN(FUNC, STR) \ ++ ({ const char *volatile wns = (STR); FUNC (wns); }) ++#define WRAP_STRTO(FUNC, STR) \ ++ ({ const char *volatile wss = (STR); FUNC (wss, NULL); }) ++ ++#define CHECK_IS_NAN(TYPE, A) \ ++ do \ ++ { \ ++ if (isnan (A)) \ ++ puts ("PASS: " #TYPE " " #A); \ ++ else \ ++ { \ ++ puts ("FAIL: " #TYPE " " #A); \ ++ result = 1; \ ++ } \ ++ } \ ++ while (0) ++ ++#define CHECK_SAME_NAN(TYPE, A, B) \ ++ do \ ++ { \ ++ if (memcmp (&(A), &(B), sizeof (A)) == 0) \ ++ puts ("PASS: " #TYPE " " #A " = " #B); \ ++ else \ ++ { \ ++ puts ("FAIL: " #TYPE " " #A " = " #B); \ ++ result = 1; \ ++ } \ ++ } \ ++ while (0) ++ ++#define CHECK_DIFF_NAN(TYPE, A, B) \ ++ do \ ++ { \ ++ if (memcmp (&(A), &(B), sizeof (A)) != 0) \ ++ puts ("PASS: " #TYPE " " #A " != " #B); \ ++ else \ ++ { \ ++ puts ("FAIL: " #TYPE " " #A " != " #B); \ ++ result = 1; \ ++ } \ ++ } \ ++ while (0) ++ ++/* Cannot test payloads by memcmp for formats where NaNs have padding ++ bits. */ ++#define CAN_TEST_EQ(MANT_DIG) ((MANT_DIG) != 64 && (MANT_DIG) != 106) ++ ++#define RUN_TESTS(TYPE, SFUNC, FUNC, MANT_DIG) \ ++ do \ ++ { \ ++ TYPE n123 = WRAP_NAN (FUNC, "123"); \ ++ CHECK_IS_NAN (TYPE, n123); \ ++ TYPE s123 = WRAP_STRTO (SFUNC, "NAN(123)"); \ ++ CHECK_IS_NAN (TYPE, s123); \ ++ TYPE n456 = WRAP_NAN (FUNC, "456"); \ ++ CHECK_IS_NAN (TYPE, n456); \ ++ TYPE s456 = WRAP_STRTO (SFUNC, "NAN(456)"); \ ++ CHECK_IS_NAN (TYPE, s456); \ ++ TYPE n123x = WRAP_NAN (FUNC, "123)"); \ ++ CHECK_IS_NAN (TYPE, n123x); \ ++ TYPE nemp = WRAP_NAN (FUNC, ""); \ ++ CHECK_IS_NAN (TYPE, nemp); \ ++ TYPE semp = WRAP_STRTO (SFUNC, "NAN()"); \ ++ CHECK_IS_NAN (TYPE, semp); \ ++ TYPE sx = WRAP_STRTO (SFUNC, "NAN"); \ ++ CHECK_IS_NAN (TYPE, sx); \ ++ if (CAN_TEST_EQ (MANT_DIG)) \ ++ CHECK_SAME_NAN (TYPE, n123, s123); \ ++ if (CAN_TEST_EQ (MANT_DIG)) \ ++ CHECK_SAME_NAN (TYPE, n456, s456); \ ++ if (CAN_TEST_EQ (MANT_DIG)) \ ++ CHECK_SAME_NAN (TYPE, nemp, semp); \ ++ if (CAN_TEST_EQ (MANT_DIG)) \ ++ CHECK_SAME_NAN (TYPE, n123x, sx); \ ++ CHECK_DIFF_NAN (TYPE, n123, n456); \ ++ CHECK_DIFF_NAN (TYPE, n123, nemp); \ ++ CHECK_DIFF_NAN (TYPE, n123, n123x); \ ++ CHECK_DIFF_NAN (TYPE, n456, nemp); \ ++ CHECK_DIFF_NAN (TYPE, n456, n123x); \ ++ } \ ++ while (0) ++ ++static int ++do_test (void) ++{ ++ int result = 0; ++ RUN_TESTS (float, strtof, nanf, FLT_MANT_DIG); ++ RUN_TESTS (double, strtod, nan, DBL_MANT_DIG); ++#ifndef NO_LONG_DOUBLE ++ RUN_TESTS (long double, strtold, nanl, LDBL_MANT_DIG); ++#endif ++ return result; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" +diff -rupN a/stdlib/Makefile b/stdlib/Makefile +--- a/stdlib/Makefile 2017-03-02 16:34:02.000000000 -0500 ++++ b/stdlib/Makefile 2017-03-02 16:45:05.463639105 -0500 +@@ -47,6 +47,7 @@ routines := \ + strtol_l strtoul_l strtoll_l strtoull_l \ + strtof strtod strtold \ + strtof_l strtod_l strtold_l \ ++ strtof_nan strtod_nan strtold_nan \ + system canonicalize \ + a64l l64a \ + rpmatch strfmon strfmon_l getsubopt xpg_basename fmtmsg \ +diff -rupN a/stdlib/Versions b/stdlib/Versions +--- a/stdlib/Versions 2012-12-24 22:02:13.000000000 -0500 ++++ b/stdlib/Versions 2017-03-02 16:44:52.140671064 -0500 +@@ -114,5 +114,6 @@ libc { + __abort_msg; + # Used from other libraries + __libc_secure_getenv; ++ __strtof_nan; __strtod_nan; __strtold_nan; + } + } +diff -rupN a/stdlib/strtod_l.c b/stdlib/strtod_l.c +--- a/stdlib/strtod_l.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/stdlib/strtod_l.c 2017-03-02 16:59:50.224590342 -0500 +@@ -20,8 +20,6 @@ + #include + + extern double ____strtod_l_internal (const char *, char **, int, __locale_t); +-extern unsigned long long int ____strtoull_l_internal (const char *, char **, +- int, int, __locale_t); + + /* Configuration part. These macros are defined by `strtold.c', + `strtof.c', `wcstod.c', `wcstold.c', and `wcstof.c' to produce the +@@ -33,28 +31,20 @@ extern unsigned long long int ____strtou + # ifdef USE_WIDE_CHAR + # define STRTOF wcstod_l + # define __STRTOF __wcstod_l ++# define STRTOF_NAN __wcstod_nan + # else + # define STRTOF strtod_l + # define __STRTOF __strtod_l ++# define STRTOF_NAN __strtod_nan + # endif + # define MPN2FLOAT __mpn_construct_double + # define FLOAT_HUGE_VAL HUGE_VAL +-# define SET_MANTISSA(flt, mant) \ +- do { union ieee754_double u; \ +- u.d = (flt); \ +- if ((mant & 0xfffffffffffffULL) == 0) \ +- mant = 0x8000000000000ULL; \ +- u.ieee.mantissa0 = ((mant) >> 32) & 0xfffff; \ +- u.ieee.mantissa1 = (mant) & 0xffffffff; \ +- (flt) = u.d; \ +- } while (0) + #endif + /* End of configuration part. */ + + #include + #include + #include +-#include + #include "../locale/localeinfo.h" + #include + #include +@@ -105,7 +95,6 @@ extern unsigned long long int ____strtou + # define TOLOWER_C(Ch) __towlower_l ((Ch), _nl_C_locobj_ptr) + # define STRNCASECMP(S1, S2, N) \ + __wcsncasecmp_l ((S1), (S2), (N), _nl_C_locobj_ptr) +-# define STRTOULL(S, E, B) ____wcstoull_l_internal ((S), (E), (B), 0, loc) + #else + # define STRING_TYPE char + # define CHAR_TYPE char +@@ -117,7 +106,6 @@ extern unsigned long long int ____strtou + # define TOLOWER_C(Ch) __tolower_l ((Ch), _nl_C_locobj_ptr) + # define STRNCASECMP(S1, S2, N) \ + __strncasecmp_l ((S1), (S2), (N), _nl_C_locobj_ptr) +-# define STRTOULL(S, E, B) ____strtoull_l_internal ((S), (E), (B), 0, loc) + #endif + + +@@ -649,33 +637,14 @@ ____STRTOF_INTERNAL (nptr, endptr, group + if (*cp == L_('(')) + { + const STRING_TYPE *startp = cp; +- do +- ++cp; +- while ((*cp >= L_('0') && *cp <= L_('9')) +- || ({ CHAR_TYPE lo = TOLOWER (*cp); +- lo >= L_('a') && lo <= L_('z'); }) +- || *cp == L_('_')); +- +- if (*cp != L_(')')) +- /* The closing brace is missing. Only match the NAN +- part. */ +- cp = startp; ++ STRING_TYPE *endp; ++ retval = STRTOF_NAN (cp + 1, &endp, L_(')')); ++ if (*endp == L_(')')) ++ /* Consume the closing parenthesis. */ ++ cp = endp + 1; + else +- { +- /* This is a system-dependent way to specify the +- bitmask used for the NaN. We expect it to be +- a number which is put in the mantissa of the +- number. */ +- STRING_TYPE *endp; +- unsigned long long int mant; +- +- mant = STRTOULL (startp + 1, &endp, 0); +- if (endp == cp) +- SET_MANTISSA (retval, mant); +- +- /* Consume the closing brace. */ +- ++cp; +- } ++ /* Only match the NAN part. */ ++ cp = startp; + } + + if (endptr != NULL) +diff -rupN a/stdlib/strtod_nan.c b/stdlib/strtod_nan.c +--- a/stdlib/strtod_nan.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/stdlib/strtod_nan.c 2017-03-02 16:45:05.473639081 -0500 +@@ -0,0 +1,24 @@ ++/* Convert string for NaN payload to corresponding NaN. Narrow ++ strings, double. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#define STRTOD_NAN __strtod_nan ++#include +diff -rupN a/stdlib/strtod_nan_double.h b/stdlib/strtod_nan_double.h +--- a/stdlib/strtod_nan_double.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/stdlib/strtod_nan_double.h 2017-03-02 16:45:05.477639072 -0500 +@@ -0,0 +1,30 @@ ++/* Convert string for NaN payload to corresponding NaN. For double. ++ Copyright (C) 1997-2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define FLOAT double ++#define SET_MANTISSA(flt, mant) \ ++ do \ ++ { \ ++ union ieee754_double u; \ ++ u.d = (flt); \ ++ u.ieee_nan.mantissa0 = (mant) >> 32; \ ++ u.ieee_nan.mantissa1 = (mant); \ ++ if ((u.ieee.mantissa0 | u.ieee.mantissa1) != 0) \ ++ (flt) = u.d; \ ++ } \ ++ while (0) +diff -rupN a/stdlib/strtod_nan_float.h b/stdlib/strtod_nan_float.h +--- a/stdlib/strtod_nan_float.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/stdlib/strtod_nan_float.h 2017-03-02 16:45:05.480639065 -0500 +@@ -0,0 +1,29 @@ ++/* Convert string for NaN payload to corresponding NaN. For float. ++ Copyright (C) 1997-2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define FLOAT float ++#define SET_MANTISSA(flt, mant) \ ++ do \ ++ { \ ++ union ieee754_float u; \ ++ u.f = (flt); \ ++ u.ieee_nan.mantissa = (mant); \ ++ if (u.ieee.mantissa != 0) \ ++ (flt) = u.f; \ ++ } \ ++ while (0) +diff -rupN a/stdlib/strtod_nan_main.c b/stdlib/strtod_nan_main.c +--- a/stdlib/strtod_nan_main.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/stdlib/strtod_nan_main.c 2017-03-02 16:45:05.483639058 -0500 +@@ -0,0 +1,63 @@ ++/* Convert string for NaN payload to corresponding NaN. ++ Copyright (C) 1997-2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++ ++/* If STR starts with an optional n-char-sequence as defined by ISO C ++ (a sequence of ASCII letters, digits and underscores), followed by ++ ENDC, return a NaN whose payload is set based on STR. Otherwise, ++ return a default NAN. If ENDPTR is not NULL, set *ENDPTR to point ++ to the character after the initial n-char-sequence. */ ++ ++internal_function ++FLOAT ++STRTOD_NAN (const STRING_TYPE *str, STRING_TYPE **endptr, STRING_TYPE endc) ++{ ++ const STRING_TYPE *cp = str; ++ ++ while ((*cp >= L_('0') && *cp <= L_('9')) ++ || (*cp >= L_('A') && *cp <= L_('Z')) ++ || (*cp >= L_('a') && *cp <= L_('z')) ++ || *cp == L_('_')) ++ ++cp; ++ ++ FLOAT retval = NAN; ++ if (*cp != endc) ++ goto out; ++ ++ /* This is a system-dependent way to specify the bitmask used for ++ the NaN. We expect it to be a number which is put in the ++ mantissa of the number. */ ++ STRING_TYPE *endp; ++ unsigned long long int mant; ++ ++ mant = STRTOULL (str, &endp, 0); ++ if (endp == cp) ++ SET_MANTISSA (retval, mant); ++ ++ out: ++ if (endptr != NULL) ++ *endptr = (STRING_TYPE *) cp; ++ return retval; ++} ++libc_hidden_def (STRTOD_NAN) +diff -rupN a/stdlib/strtod_nan_narrow.h b/stdlib/strtod_nan_narrow.h +--- a/stdlib/strtod_nan_narrow.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/stdlib/strtod_nan_narrow.h 2017-03-02 16:45:05.486639051 -0500 +@@ -0,0 +1,22 @@ ++/* Convert string for NaN payload to corresponding NaN. Narrow strings. ++ Copyright (C) 1997-2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define STRING_TYPE char ++#define L_(Ch) Ch ++#define STRTOULL(S, E, B) ____strtoull_l_internal ((S), (E), (B), 0, \ ++ _nl_C_locobj_ptr) +diff -rupN a/stdlib/strtod_nan_wide.h b/stdlib/strtod_nan_wide.h +--- a/stdlib/strtod_nan_wide.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/stdlib/strtod_nan_wide.h 2017-03-02 16:45:05.489639044 -0500 +@@ -0,0 +1,22 @@ ++/* Convert string for NaN payload to corresponding NaN. Wide strings. ++ Copyright (C) 1997-2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define STRING_TYPE wchar_t ++#define L_(Ch) L##Ch ++#define STRTOULL(S, E, B) ____wcstoull_l_internal ((S), (E), (B), 0, \ ++ _nl_C_locobj_ptr) +diff -rupN a/stdlib/strtof_l.c b/stdlib/strtof_l.c +--- a/stdlib/strtof_l.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/stdlib/strtof_l.c 2017-03-02 17:06:34.349227993 -0500 +@@ -20,27 +20,19 @@ + #include + + extern float ____strtof_l_internal (const char *, char **, int, __locale_t); +-extern unsigned long long int ____strtoull_l_internal (const char *, char **, +- int, int, __locale_t); + + #define FLOAT float + #define FLT FLT + #ifdef USE_WIDE_CHAR + # define STRTOF wcstof_l + # define __STRTOF __wcstof_l ++# define STRTOF_NAN __wcstof_nan + #else + # define STRTOF strtof_l + # define __STRTOF __strtof_l ++# define STRTOF_NAN __strtof_nan + #endif + #define MPN2FLOAT __mpn_construct_float + #define FLOAT_HUGE_VAL HUGE_VALF +-#define SET_MANTISSA(flt, mant) \ +- do { union ieee754_float u; \ +- u.f = (flt); \ +- if ((mant & 0x7fffff) == 0) \ +- mant = 0x400000; \ +- u.ieee.mantissa = (mant) & 0x7fffff; \ +- (flt) = u.f; \ +- } while (0) + + #include "strtod_l.c" +diff -rupN a/stdlib/strtof_nan.c b/stdlib/strtof_nan.c +--- a/stdlib/strtof_nan.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/stdlib/strtof_nan.c 2017-03-02 16:45:05.498639023 -0500 +@@ -0,0 +1,24 @@ ++/* Convert string for NaN payload to corresponding NaN. Narrow ++ strings, float. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#define STRTOD_NAN __strtof_nan ++#include +diff -rupN a/stdlib/strtold_nan.c b/stdlib/strtold_nan.c +--- a/stdlib/strtold_nan.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/stdlib/strtold_nan.c 2017-03-02 16:45:05.501639016 -0500 +@@ -0,0 +1,30 @@ ++/* Convert string for NaN payload to corresponding NaN. Narrow ++ strings, long double. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++/* This function is unused if long double and double have the same ++ representation. */ ++#ifndef __NO_LONG_DOUBLE_MATH ++# include ++# include ++ ++# define STRTOD_NAN __strtold_nan ++# include ++#endif +diff -rupN a/sysdeps/ieee754/ldbl-128/strtod_nan_ldouble.h b/sysdeps/ieee754/ldbl-128/strtod_nan_ldouble.h +--- a/sysdeps/ieee754/ldbl-128/strtod_nan_ldouble.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/sysdeps/ieee754/ldbl-128/strtod_nan_ldouble.h 2017-03-02 16:45:05.502639014 -0500 +@@ -0,0 +1,33 @@ ++/* Convert string for NaN payload to corresponding NaN. For ldbl-128. ++ Copyright (C) 1997-2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define FLOAT long double ++#define SET_MANTISSA(flt, mant) \ ++ do \ ++ { \ ++ union ieee854_long_double u; \ ++ u.d = (flt); \ ++ u.ieee_nan.mantissa0 = 0; \ ++ u.ieee_nan.mantissa1 = 0; \ ++ u.ieee_nan.mantissa2 = (mant) >> 32; \ ++ u.ieee_nan.mantissa3 = (mant); \ ++ if ((u.ieee.mantissa0 | u.ieee.mantissa1 \ ++ | u.ieee.mantissa2 | u.ieee.mantissa3) != 0) \ ++ (flt) = u.d; \ ++ } \ ++ while (0) +diff -rupN a/sysdeps/ieee754/ldbl-128/strtold_l.c b/sysdeps/ieee754/ldbl-128/strtold_l.c +--- a/sysdeps/ieee754/ldbl-128/strtold_l.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/sysdeps/ieee754/ldbl-128/strtold_l.c 2017-03-02 17:07:43.540018882 -0500 +@@ -25,20 +25,13 @@ + #ifdef USE_WIDE_CHAR + # define STRTOF wcstold_l + # define __STRTOF __wcstold_l ++# define STRTOF_NAN __wcstold_nan + #else + # define STRTOF strtold_l + # define __STRTOF __strtold_l ++# define STRTOF_NAN __strtold_nan + #endif + #define MPN2FLOAT __mpn_construct_long_double + #define FLOAT_HUGE_VAL HUGE_VALL +-#define SET_MANTISSA(flt, mant) \ +- do { union ieee854_long_double u; \ +- u.d = (flt); \ +- u.ieee.mantissa0 = 0x8000; \ +- u.ieee.mantissa1 = 0; \ +- u.ieee.mantissa2 = ((mant) >> 32); \ +- u.ieee.mantissa3 = (mant) & 0xffffffff; \ +- (flt) = u.d; \ +- } while (0) + + #include +diff -rupN a/sysdeps/ieee754/ldbl-128ibm/strtod_nan_ldouble.h b/sysdeps/ieee754/ldbl-128ibm/strtod_nan_ldouble.h +--- a/sysdeps/ieee754/ldbl-128ibm/strtod_nan_ldouble.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/sysdeps/ieee754/ldbl-128ibm/strtod_nan_ldouble.h 2017-03-02 16:45:05.505639007 -0500 +@@ -0,0 +1,30 @@ ++/* Convert string for NaN payload to corresponding NaN. For ldbl-128ibm. ++ Copyright (C) 1997-2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define FLOAT long double ++#define SET_MANTISSA(flt, mant) \ ++ do \ ++ { \ ++ union ibm_extended_long_double u; \ ++ u.ld = (flt); \ ++ u.d[0].ieee_nan.mantissa0 = (mant) >> 32; \ ++ u.d[0].ieee_nan.mantissa1 = (mant); \ ++ if ((u.d[0].ieee.mantissa0 | u.d[0].ieee.mantissa1) != 0) \ ++ (flt) = u.ld; \ ++ } \ ++ while (0) +diff -rupN a/sysdeps/ieee754/ldbl-128ibm/strtold_l.c b/sysdeps/ieee754/ldbl-128ibm/strtold_l.c +--- a/sysdeps/ieee754/ldbl-128ibm/strtold_l.c 2017-03-02 16:33:54.000000000 -0500 ++++ b/sysdeps/ieee754/ldbl-128ibm/strtold_l.c 2017-03-02 17:10:22.516584043 -0500 +@@ -30,25 +30,19 @@ extern long double ____new_wcstold_l (co + # define STRTOF __new_wcstold_l + # define __STRTOF ____new_wcstold_l + # define ____STRTOF_INTERNAL ____wcstold_l_internal ++# define STRTOF_NAN __wcstold_nan + #else + extern long double ____new_strtold_l (const char *, char **, __locale_t); + # define STRTOF __new_strtold_l + # define __STRTOF ____new_strtold_l + # define ____STRTOF_INTERNAL ____strtold_l_internal ++# define STRTOF_NAN __strtold_nan + #endif + extern __typeof (__STRTOF) STRTOF; + libc_hidden_proto (__STRTOF) + libc_hidden_proto (STRTOF) + #define MPN2FLOAT __mpn_construct_long_double + #define FLOAT_HUGE_VAL HUGE_VALL +-# define SET_MANTISSA(flt, mant) \ +- do { union ibm_extended_long_double u; \ +- u.ld = (flt); \ +- u.d[0].ieee_nan.mantissa0 = (mant) >> 32; \ +- u.d[0].ieee_nan.mantissa1 = (mant); \ +- if ((u.d[0].ieee.mantissa0 | u.d[0].ieee.mantissa1) != 0) \ +- (flt) = u.ld; \ +- } while (0) + + #include + +diff -rupN a/sysdeps/ieee754/ldbl-64-128/strtold_l.c b/sysdeps/ieee754/ldbl-64-128/strtold_l.c +--- a/sysdeps/ieee754/ldbl-64-128/strtold_l.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/sysdeps/ieee754/ldbl-64-128/strtold_l.c 2017-03-02 17:11:06.062475088 -0500 +@@ -30,26 +30,19 @@ extern long double ____new_wcstold_l (co + # define STRTOF __new_wcstold_l + # define __STRTOF ____new_wcstold_l + # define ____STRTOF_INTERNAL ____wcstold_l_internal ++# define STRTOF_NAN __wcstold_nan + #else + extern long double ____new_strtold_l (const char *, char **, __locale_t); + # define STRTOF __new_strtold_l + # define __STRTOF ____new_strtold_l + # define ____STRTOF_INTERNAL ____strtold_l_internal ++# define STRTOF_NAN __strtold_nan + #endif + extern __typeof (__STRTOF) STRTOF; + libc_hidden_proto (__STRTOF) + libc_hidden_proto (STRTOF) + #define MPN2FLOAT __mpn_construct_long_double + #define FLOAT_HUGE_VAL HUGE_VALL +-#define SET_MANTISSA(flt, mant) \ +- do { union ieee854_long_double u; \ +- u.d = (flt); \ +- u.ieee.mantissa0 = 0x8000; \ +- u.ieee.mantissa1 = 0; \ +- u.ieee.mantissa2 = ((mant) >> 32); \ +- u.ieee.mantissa3 = (mant) & 0xffffffff; \ +- (flt) = u.d; \ +- } while (0) + + #include + +diff -rupN a/sysdeps/ieee754/ldbl-96/strtod_nan_ldouble.h b/sysdeps/ieee754/ldbl-96/strtod_nan_ldouble.h +--- a/sysdeps/ieee754/ldbl-96/strtod_nan_ldouble.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/sysdeps/ieee754/ldbl-96/strtod_nan_ldouble.h 2017-03-02 16:45:05.521638969 -0500 +@@ -0,0 +1,30 @@ ++/* Convert string for NaN payload to corresponding NaN. For ldbl-96. ++ Copyright (C) 1997-2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define FLOAT long double ++#define SET_MANTISSA(flt, mant) \ ++ do \ ++ { \ ++ union ieee854_long_double u; \ ++ u.d = (flt); \ ++ u.ieee_nan.mantissa0 = (mant) >> 32; \ ++ u.ieee_nan.mantissa1 = (mant); \ ++ if ((u.ieee.mantissa0 | u.ieee.mantissa1) != 0) \ ++ (flt) = u.d; \ ++ } \ ++ while (0) +diff -rupN a/sysdeps/ieee754/ldbl-96/strtold_l.c b/sysdeps/ieee754/ldbl-96/strtold_l.c +--- a/sysdeps/ieee754/ldbl-96/strtold_l.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/sysdeps/ieee754/ldbl-96/strtold_l.c 2017-03-02 17:11:52.927362322 -0500 +@@ -25,20 +25,13 @@ + #ifdef USE_WIDE_CHAR + # define STRTOF wcstold_l + # define __STRTOF __wcstold_l ++# define STRTOF_NAN __wcstold_nan + #else + # define STRTOF strtold_l + # define __STRTOF __strtold_l ++# define STRTOF_NAN __strtold_nan + #endif + #define MPN2FLOAT __mpn_construct_long_double + #define FLOAT_HUGE_VAL HUGE_VALL +-#define SET_MANTISSA(flt, mant) \ +- do { union ieee854_long_double u; \ +- u.d = (flt); \ +- if ((mant & 0x7fffffffffffffffULL) == 0) \ +- mant = 0x4000000000000000ULL; \ +- u.ieee.mantissa0 = (((mant) >> 32) & 0x7fffffff) | 0x80000000; \ +- u.ieee.mantissa1 = (mant) & 0xffffffff; \ +- (flt) = u.d; \ +- } while (0) + + #include +diff -rupN a/wcsmbs/Makefile b/wcsmbs/Makefile +--- a/wcsmbs/Makefile 2017-03-02 16:33:59.000000000 -0500 ++++ b/wcsmbs/Makefile 2017-03-02 16:45:05.529638950 -0500 +@@ -32,6 +32,7 @@ routines := wcscat wcschr wcscmp wcscpy + wcstol wcstoul wcstoll wcstoull wcstod wcstold wcstof \ + wcstol_l wcstoul_l wcstoll_l wcstoull_l \ + wcstod_l wcstold_l wcstof_l \ ++ wcstod_nan wcstold_nan wcstof_nan \ + wcscoll wcsxfrm \ + wcwidth wcswidth \ + wcscoll_l wcsxfrm_l \ +diff -rupN a/wcsmbs/wcstod_l.c b/wcsmbs/wcstod_l.c +--- a/wcsmbs/wcstod_l.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/wcsmbs/wcstod_l.c 2017-03-02 16:45:05.532638943 -0500 +@@ -23,9 +23,6 @@ + + extern double ____wcstod_l_internal (const wchar_t *, wchar_t **, int, + __locale_t); +-extern unsigned long long int ____wcstoull_l_internal (const wchar_t *, +- wchar_t **, int, int, +- __locale_t); + + #define USE_WIDE_CHAR 1 + +diff -rupN a/wcsmbs/wcstod_nan.c b/wcsmbs/wcstod_nan.c +--- a/wcsmbs/wcstod_nan.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/wcsmbs/wcstod_nan.c 2017-03-02 16:45:05.535638936 -0500 +@@ -0,0 +1,23 @@ ++/* Convert string for NaN payload to corresponding NaN. Wide strings, double. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "../stdlib/strtod_nan_wide.h" ++#include "../stdlib/strtod_nan_double.h" ++ ++#define STRTOD_NAN __wcstod_nan ++#include "../stdlib/strtod_nan_main.c" +diff -rupN a/wcsmbs/wcstof_l.c b/wcsmbs/wcstof_l.c +--- a/wcsmbs/wcstof_l.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/wcsmbs/wcstof_l.c 2017-03-02 16:45:05.538638929 -0500 +@@ -25,8 +25,5 @@ + + extern float ____wcstof_l_internal (const wchar_t *, wchar_t **, int, + __locale_t); +-extern unsigned long long int ____wcstoull_l_internal (const wchar_t *, +- wchar_t **, int, int, +- __locale_t); + + #include +diff -rupN a/wcsmbs/wcstof_nan.c b/wcsmbs/wcstof_nan.c +--- a/wcsmbs/wcstof_nan.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/wcsmbs/wcstof_nan.c 2017-03-02 16:45:05.541638922 -0500 +@@ -0,0 +1,23 @@ ++/* Convert string for NaN payload to corresponding NaN. Wide strings, float. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "../stdlib/strtod_nan_wide.h" ++#include "../stdlib/strtod_nan_float.h" ++ ++#define STRTOD_NAN __wcstof_nan ++#include "../stdlib/strtod_nan_main.c" +diff -rupN a/wcsmbs/wcstold_l.c b/wcsmbs/wcstold_l.c +--- a/wcsmbs/wcstold_l.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/wcsmbs/wcstold_l.c 2017-03-02 16:45:05.544638915 -0500 +@@ -24,8 +24,5 @@ + + extern long double ____wcstold_l_internal (const wchar_t *, wchar_t **, int, + __locale_t); +-extern unsigned long long int ____wcstoull_l_internal (const wchar_t *, +- wchar_t **, int, int, +- __locale_t); + + #include +diff -rupN a/wcsmbs/wcstold_nan.c b/wcsmbs/wcstold_nan.c +--- a/wcsmbs/wcstold_nan.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/wcsmbs/wcstold_nan.c 2017-03-02 16:45:05.547638908 -0500 +@@ -0,0 +1,30 @@ ++/* Convert string for NaN payload to corresponding NaN. Wide strings, ++ long double. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++/* This function is unused if long double and double have the same ++ representation. */ ++#ifndef __NO_LONG_DOUBLE_MATH ++# include "../stdlib/strtod_nan_wide.h" ++# include ++ ++# define STRTOD_NAN __wcstold_nan ++# include "../stdlib/strtod_nan_main.c" ++#endif diff --git a/SOURCES/glibc-rh1374654.patch b/SOURCES/glibc-rh1374654.patch new file mode 100644 index 0000000..6beffc7 --- /dev/null +++ b/SOURCES/glibc-rh1374654.patch @@ -0,0 +1,209 @@ +From 0f58539030e436449f79189b6edab17d7479796e Mon Sep 17 00:00:00 2001 +From: Paul Pluzhnikov +Date: Sat, 8 Aug 2015 15:53:03 -0700 +Subject: [PATCH] Fix BZ #17905 + +diff -rupN a/catgets/Makefile b/catgets/Makefile +--- a/catgets/Makefile 2017-03-03 17:54:39.000000000 -0500 ++++ b/catgets/Makefile 2017-03-03 18:05:02.506889588 -0500 +@@ -44,13 +44,15 @@ catgets-CPPFLAGS := -DNLSPATH='"$(msgcat + + generated = de.msg test1.cat test1.h test2.cat test2.h sample.SJIS.cat \ + test-gencat.h ++generated += tst-catgets.mtrace tst-catgets-mem.out ++ + generated-dirs = de + +-tst-catgets-ENV = NLSPATH="$(objpfx)%l/%N.cat" LANG=de ++tst-catgets-ENV = NLSPATH="$(objpfx)%l/%N.cat" LANG=de MALLOC_TRACE=$(objpfx)tst-catgets.mtrace + + ifeq ($(run-built-tests),yes) + tests: $(objpfx)de/libc.cat $(objpfx)test1.cat $(objpfx)test2.cat \ +- $(objpfx)test-gencat.out ++ $(objpfx)test-gencat.out $(objpfx)tst-catgets-mem.out + # This test just checks whether the program produces any error or not. + # The result is not tested. + $(objpfx)test1.cat: test1.msg $(objpfx)gencat +@@ -78,4 +80,8 @@ $(objpfx)test-gencat.out: test-gencat.sh + $(objpfx)sample.SJIS.cat: sample.SJIS $(objpfx)gencat + GCONV_PATH=$(common-objpfx)iconvdata LC_ALL=C \ + $(built-program-cmd) -H $(objpfx)test-gencat.h < $(word 1,$^) > $@ ++ ++$(objpfx)tst-catgets-mem.out: $(objpfx)tst-catgets.out ++ $(common-objpfx)malloc/mtrace $(objpfx)tst-catgets.mtrace > $@; \ ++ $(evaluate-test) + endif +diff -rupN a/catgets/catgets.c b/catgets/catgets.c +--- a/catgets/catgets.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/catgets/catgets.c 2017-03-03 17:55:43.750147349 -0500 +@@ -16,7 +16,6 @@ + License along with the GNU C Library; if not, see + . */ + +-#include + #include + #include + #include +@@ -35,6 +34,7 @@ catopen (const char *cat_name, int flag) + __nl_catd result; + const char *env_var = NULL; + const char *nlspath = NULL; ++ char *tmp = NULL; + + if (strchr (cat_name, '/') == NULL) + { +@@ -54,7 +54,10 @@ catopen (const char *cat_name, int flag) + { + /* Append the system dependent directory. */ + size_t len = strlen (nlspath) + 1 + sizeof NLSPATH; +- char *tmp = alloca (len); ++ tmp = malloc (len); ++ ++ if (__glibc_unlikely (tmp == NULL)) ++ return (nl_catd) -1; + + __stpcpy (__stpcpy (__stpcpy (tmp, nlspath), ":"), NLSPATH); + nlspath = tmp; +@@ -65,16 +68,18 @@ catopen (const char *cat_name, int flag) + + result = (__nl_catd) malloc (sizeof (*result)); + if (result == NULL) +- /* We cannot get enough memory. */ +- return (nl_catd) -1; +- +- if (__open_catalog (cat_name, nlspath, env_var, result) != 0) ++ { ++ /* We cannot get enough memory. */ ++ result = (nl_catd) -1; ++ } ++ else if (__open_catalog (cat_name, nlspath, env_var, result) != 0) + { + /* Couldn't open the file. */ + free ((void *) result); +- return (nl_catd) -1; ++ result = (nl_catd) -1; + } + ++ free (tmp); + return (nl_catd) result; + } + +diff -rupN a/catgets/open_catalog.c b/catgets/open_catalog.c +--- a/catgets/open_catalog.c 2012-12-24 22:02:13.000000000 -0500 ++++ b/catgets/open_catalog.c 2017-03-03 17:55:43.753147332 -0500 +@@ -47,6 +47,7 @@ __open_catalog (const char *cat_name, co + size_t tab_size; + const char *lastp; + int result = -1; ++ char *buf = NULL; + + if (strchr (cat_name, '/') != NULL || nlspath == NULL) + fd = open_not_cancel_2 (cat_name, O_RDONLY); +@@ -57,23 +58,23 @@ __open_catalog (const char *cat_name, co + if (__builtin_expect (bufact + (n) >= bufmax, 0)) \ + { \ + char *old_buf = buf; \ +- bufmax += 256 + (n); \ +- buf = (char *) alloca (bufmax); \ +- memcpy (buf, old_buf, bufact); \ ++ bufmax += (bufmax < 256 + (n)) ? 256 + (n) : bufmax; \ ++ buf = realloc (buf, bufmax); \ ++ if (__glibc_unlikely (buf == NULL)) \ ++ { \ ++ free (old_buf); \ ++ return -1; \ ++ } \ + } + + /* The RUN_NLSPATH variable contains a colon separated list of + descriptions where we expect to find catalogs. We have to + recognize certain % substitutions and stop when we found the + first existing file. */ +- char *buf; + size_t bufact; +- size_t bufmax; ++ size_t bufmax = 0; + size_t len; + +- buf = NULL; +- bufmax = 0; +- + fd = -1; + while (*run_nlspath != '\0') + { +@@ -188,7 +189,10 @@ __open_catalog (const char *cat_name, co + + /* Avoid dealing with directories and block devices */ + if (__builtin_expect (fd, 0) < 0) +- return -1; ++ { ++ free (buf); ++ return -1; ++ } + + if (__builtin_expect (__fxstat64 (_STAT_VER, fd, &st), 0) < 0) + goto close_unlock_return; +@@ -325,6 +329,7 @@ __open_catalog (const char *cat_name, co + /* Release the lock again. */ + close_unlock_return: + close_not_cancel_no_status (fd); ++ free (buf); + + return result; + } +diff -rupN a/catgets/tst-catgets.c b/catgets/tst-catgets.c +--- a/catgets/tst-catgets.c 2017-03-03 17:54:38.000000000 -0500 ++++ b/catgets/tst-catgets.c 2017-03-03 17:55:43.755147321 -0500 +@@ -1,7 +1,10 @@ ++#include + #include + #include + #include ++#include + #include ++#include + + + static const char *msgs[] = +@@ -12,6 +15,33 @@ static const char *msgs[] = + }; + #define nmsgs (sizeof (msgs) / sizeof (msgs[0])) + ++ ++/* Test for unbounded alloca. */ ++static int ++do_bz17905 (void) ++{ ++ char *buf; ++ struct rlimit rl; ++ nl_catd result; ++ ++ const int sz = 1024 * 1024; ++ ++ getrlimit (RLIMIT_STACK, &rl); ++ rl.rlim_cur = sz; ++ setrlimit (RLIMIT_STACK, &rl); ++ ++ buf = malloc (sz + 1); ++ memset (buf, 'A', sz); ++ buf[sz] = '\0'; ++ setenv ("NLSPATH", buf, 1); ++ ++ result = catopen (buf, NL_CAT_LOCALE); ++ assert (result == (nl_catd) -1); ++ ++ free (buf); ++ return 0; ++} ++ + #define ROUNDS 5 + + static int +@@ -62,6 +92,7 @@ do_test (void) + } + } + ++ result += do_bz17905 (); + return result; + } + diff --git a/SOURCES/glibc-rh1374657.patch b/SOURCES/glibc-rh1374657.patch new file mode 100644 index 0000000..7fd892a --- /dev/null +++ b/SOURCES/glibc-rh1374657.patch @@ -0,0 +1,188 @@ +commit bae7c7c764413b23e61cb099ce33be4c4ee259bb +Author: Florian Weimer +Date: Thu Jan 28 13:59:11 2016 +0100 + + Improve check against integer wraparound in hcreate_r [BZ #18240] + +commit 2f5c1750558fe64bac361f52d6827ab1bcfe52bc +Author: OndÅ™ej Bílka +Date: Sat Jul 11 17:44:10 2015 +0200 + + Handle overflow in __hcreate_r + +--- glibc-2.17-c758a686/misc/hsearch_r.c ++++ glibc-2.17-c758a686/misc/hsearch_r.c +@@ -20,7 +20,7 @@ + #include + #include + #include +- ++#include + #include + + /* [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 +@@ -47,12 +47,10 @@ + isprime (unsigned int number) + { + /* no even number will be passed */ +- unsigned int div = 3; +- +- while (div * div < number && number % div != 0) +- div += 2; +- +- return number % div != 0; ++ for (unsigned int div = 3; div <= number / div; div += 2) ++ if (number % div == 0) ++ return 0; ++ return 1; + } + + +@@ -74,6 +72,12 @@ + return 0; + } + ++ if (nel >= SIZE_MAX / sizeof (_ENTRY)) ++ { ++ __set_errno (ENOMEM); ++ return 0; ++ } ++ + /* There is still another table active. Return with error. */ + if (htab->table != NULL) + return 0; +@@ -82,10 +86,19 @@ + use will not work. */ + if (nel < 3) + nel = 3; +- /* Change nel to the first prime number not smaller as nel. */ +- nel |= 1; /* make odd */ +- while (!isprime (nel)) +- nel += 2; ++ ++ /* Change nel to the first prime number in the range [nel, UINT_MAX - 2], ++ The '- 2' means 'nel += 2' cannot overflow. */ ++ for (nel |= 1; ; nel += 2) ++ { ++ if (UINT_MAX - 2 < nel) ++ { ++ __set_errno (ENOMEM); ++ return 0; ++ } ++ if (isprime (nel)) ++ break; ++ } + + htab->size = nel; + htab->filled = 0; +--- /dev/null ++++ glibc-2.17-c758a686/misc/bug18240.c +@@ -0,0 +1,97 @@ ++/* Test integer wraparound in hcreate. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static void ++test_size (size_t size) ++{ ++ int res = hcreate (size); ++ if (res == 0) ++ { ++ if (errno == ENOMEM) ++ return; ++ printf ("error: hcreate (%zu): %m\n", size); ++ exit (1); ++ } ++ char *keys[100]; ++ for (int i = 0; i < 100; ++i) ++ { ++ if (asprintf (keys + i, "%d", i) < 0) ++ { ++ printf ("error: asprintf: %m\n"); ++ exit (1); ++ } ++ ENTRY e = { keys[i], (char *) "value" }; ++ if (hsearch (e, ENTER) == NULL) ++ { ++ printf ("error: hsearch (\"%s\"): %m\n", keys[i]); ++ exit (1); ++ } ++ } ++ hdestroy (); ++ ++ for (int i = 0; i < 100; ++i) ++ free (keys[i]); ++} ++ ++static int ++do_test (void) ++{ ++ /* Limit the size of the process, so that memory allocation will ++ fail without impacting the entire system. */ ++ { ++ struct rlimit limit; ++ if (getrlimit (RLIMIT_AS, &limit) != 0) ++ { ++ printf ("getrlimit (RLIMIT_AS) failed: %m\n"); ++ return 1; ++ } ++ long target = 100 * 1024 * 1024; ++ if (limit.rlim_cur == RLIM_INFINITY || limit.rlim_cur > target) ++ { ++ limit.rlim_cur = target; ++ if (setrlimit (RLIMIT_AS, &limit) != 0) ++ { ++ printf ("setrlimit (RLIMIT_AS) failed: %m\n"); ++ return 1; ++ } ++ } ++ } ++ ++ test_size (500); ++ test_size (-1); ++ test_size (-3); ++ test_size (INT_MAX - 2); ++ test_size (INT_MAX - 1); ++ test_size (INT_MAX); ++ test_size (((unsigned) INT_MAX) + 1); ++ test_size (UINT_MAX - 2); ++ test_size (UINT_MAX - 1); ++ test_size (UINT_MAX); ++ return 0; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" +--- glibc-2.17-c758a686/misc/Makefile ++++ glibc-2.17-c758a686/misc/Makefile +@@ -76,7 +76,7 @@ + gpl2lgpl := error.c error.h + + tests := tst-dirname tst-tsearch tst-fdset tst-efgcvt tst-mntent tst-hsearch \ +- tst-error1 tst-pselect tst-insremque tst-mntent2 bug-hsearch1 ++ tst-error1 tst-pselect tst-insremque tst-mntent2 bug-hsearch1 bug18240 + ifeq ($(run-built-tests),yes) + tests: $(objpfx)tst-error1-mem + endif diff --git a/SOURCES/glibc-rh1374658.patch b/SOURCES/glibc-rh1374658.patch new file mode 100644 index 0000000..8dd0a45 --- /dev/null +++ b/SOURCES/glibc-rh1374658.patch @@ -0,0 +1,124 @@ +commit d36c75fc0d44deec29635dd239b0fbd206ca49b7 +Author: Paul Pluzhnikov +Date: Sat Sep 26 13:27:48 2015 -0700 + + Fix BZ #18985 -- out of range data to strftime() causes a segfault + +diff --git a/time/strftime_l.c b/time/strftime_l.c +index b48ef34..4eb647c 100644 +--- a/time/strftime_l.c ++++ b/time/strftime_l.c +@@ -510,13 +510,17 @@ __strftime_internal (s, maxsize, format, tp, tzset_called ut_argument + only a few elements. Dereference the pointers only if the format + requires this. Then it is ok to fail if the pointers are invalid. */ + # define a_wkday \ +- ((const CHAR_T *) _NL_CURRENT (LC_TIME, NLW(ABDAY_1) + tp->tm_wday)) ++ ((const CHAR_T *) (tp->tm_wday < 0 || tp->tm_wday > 6 \ ++ ? "?" : _NL_CURRENT (LC_TIME, NLW(ABDAY_1) + tp->tm_wday))) + # define f_wkday \ +- ((const CHAR_T *) _NL_CURRENT (LC_TIME, NLW(DAY_1) + tp->tm_wday)) ++ ((const CHAR_T *) (tp->tm_wday < 0 || tp->tm_wday > 6 \ ++ ? "?" : _NL_CURRENT (LC_TIME, NLW(DAY_1) + tp->tm_wday))) + # define a_month \ +- ((const CHAR_T *) _NL_CURRENT (LC_TIME, NLW(ABMON_1) + tp->tm_mon)) ++ ((const CHAR_T *) (tp->tm_mon < 0 || tp->tm_mon > 11 \ ++ ? "?" : _NL_CURRENT (LC_TIME, NLW(ABMON_1) + tp->tm_mon))) + # define f_month \ +- ((const CHAR_T *) _NL_CURRENT (LC_TIME, NLW(MON_1) + tp->tm_mon)) ++ ((const CHAR_T *) (tp->tm_mon < 0 || tp->tm_mon > 11 \ ++ ? "?" : _NL_CURRENT (LC_TIME, NLW(MON_1) + tp->tm_mon))) + # define ampm \ + ((const CHAR_T *) _NL_CURRENT (LC_TIME, tp->tm_hour > 11 \ + ? NLW(PM_STR) : NLW(AM_STR))) +@@ -526,8 +530,10 @@ __strftime_internal (s, maxsize, format, tp, tzset_called ut_argument + # define ap_len STRLEN (ampm) + #else + # if !HAVE_STRFTIME +-# define f_wkday (weekday_name[tp->tm_wday]) +-# define f_month (month_name[tp->tm_mon]) ++# define f_wkday (tp->tm_wday < 0 || tp->tm_wday > 6 \ ++ ? "?" : weekday_name[tp->tm_wday]) ++# define f_month (tp->tm_mon < 0 || tp->tm_mon > 11 \ ++ ? "?" : month_name[tp->tm_mon]) + # define a_wkday f_wkday + # define a_month f_month + # define ampm (L_("AMPM") + 2 * (tp->tm_hour > 11)) +@@ -1321,7 +1327,7 @@ __strftime_internal (s, maxsize, format, tp, tzset_called ut_argument + *tzset_called = true; + } + # endif +- zone = tzname[tp->tm_isdst]; ++ zone = tp->tm_isdst <= 1 ? tzname[tp->tm_isdst] : "?"; + } + #endif + if (! zone) +diff --git a/time/tst-strftime.c b/time/tst-strftime.c +index 374fba4..af3ff72 100644 +--- a/time/tst-strftime.c ++++ b/time/tst-strftime.c +@@ -4,6 +4,56 @@ + #include + + ++static int ++do_bz18985 (void) ++{ ++ char buf[1000]; ++ struct tm ttm; ++ int rc, ret = 0; ++ ++ memset (&ttm, 1, sizeof (ttm)); ++ ttm.tm_zone = NULL; /* Dereferenced directly if non-NULL. */ ++ rc = strftime (buf, sizeof (buf), "%a %A %b %B %c %z %Z", &ttm); ++ ++ if (rc == 66) ++ { ++ const char expected[] ++ = "? ? ? ? ? ? 16843009 16843009:16843009:16843009 16844909 +467836 ?"; ++ if (0 != strcmp (buf, expected)) ++ { ++ printf ("expected:\n %s\ngot:\n %s\n", expected, buf); ++ ret += 1; ++ } ++ } ++ else ++ { ++ printf ("expected 66, got %d\n", rc); ++ ret += 1; ++ } ++ ++ /* Check negative values as well. */ ++ memset (&ttm, 0xFF, sizeof (ttm)); ++ ttm.tm_zone = NULL; /* Dereferenced directly if non-NULL. */ ++ rc = strftime (buf, sizeof (buf), "%a %A %b %B %c %z %Z", &ttm); ++ ++ if (rc == 30) ++ { ++ const char expected[] = "? ? ? ? ? ? -1 -1:-1:-1 1899 "; ++ if (0 != strcmp (buf, expected)) ++ { ++ printf ("expected:\n %s\ngot:\n %s\n", expected, buf); ++ ret += 1; ++ } ++ } ++ else ++ { ++ printf ("expected 30, got %d\n", rc); ++ ret += 1; ++ } ++ ++ return ret; ++} ++ + static struct + { + const char *fmt; +@@ -104,7 +154,7 @@ do_test (void) + } + } + +- return result; ++ return result + do_bz18985 (); + } + + #define TEST_FUNCTION do_test () diff --git a/SOURCES/glibc-rh1380680-1.patch b/SOURCES/glibc-rh1380680-1.patch new file mode 100644 index 0000000..470edbd --- /dev/null +++ b/SOURCES/glibc-rh1380680-1.patch @@ -0,0 +1,190 @@ +From af67490e3e2ad2a32c1bcfac8923c025ac247518 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 15:05:28 +0100 +Subject: [PATCH 01/17] S390: Get rid of make warning: overriding recipe for + target gconv-modules. + +Upstream commit c70e9913d2fc2d0bf6a3ca98a4dece759d40a4ec + +This patch introduces a way to provide an architecture dependent gconv-modules +file. Before this patch, the gconv-modules file was normally installed from +src-dir/iconvdata/gconv-modules. The S390 Makefile had overridden the +installation recipe (with a make warning) in order to install the +gconv-module-s390 file from build-dir. +The iconvdata/Makefile provides another recipe, which copies the gconv-modules +file from src to build dir, which are used by the testcases. +Thus the testcases does not use the currently build s390-modules. + +This patch uses build-dir/iconvdata/gconv-modules for installation, which +is generated by concatenating src-dir/iconvdata/gconv-modules and the +architecture specific one. The latter one can be specified by setting the variable +sysdeps-gconv-modules in sysdeps/.../Makefile. + +The architecture specific gconv-modules file is emitted before the common one +because these modules aren't used in all possible conversions. E.g. the converting +from INTERNAL to UTF-16 used the common UTF-16.so module instead of UTF16_UTF32_Z9.so. + +This way, the s390-Makefile does not need to override the recipe for gconv-modules +and no warning is emitted anymore. +Since we no longer support empty objpfx the conditional test in iconvdata/Makefile +is removed. + +ChangeLog: + + * iconvdata/Makefile ($(inst_gconvdir)/gconv-modules): + Install file from $(objpfx)gconv-modules. + ($(objpfx)gconv-modules): Concatenate architecture specific file + in variable sysdeps-gconv-modules and gconv-modules in src dir. + * sysdeps/s390/gconv-modules: New file. + * sysdeps/s390/s390-64/Makefile: ($(inst_gconvdir)/gconv-modules): + Deleted. + ($(objpfx)gconv-modules-s390): Deleted. + (sysdeps-gconv-modules): New variable. +--- + iconvdata/Makefile | 6 ++--- + sysdeps/s390/gconv-modules | 50 ++++++++++++++++++++++++++++++++++++++++++ + sysdeps/s390/s390-64/Makefile | 51 +------------------------------------------ + 3 files changed, 53 insertions(+), 54 deletions(-) + create mode 100644 sysdeps/s390/gconv-modules + +diff --git a/iconvdata/Makefile b/iconvdata/Makefile +index a99539e..e2624de 100644 +--- a/iconvdata/Makefile ++++ b/iconvdata/Makefile +@@ -244,7 +244,7 @@ headers: $(addprefix $(objpfx), $(generated-modules:=.h)) + $(addprefix $(inst_gconvdir)/, $(modules.so)): \ + $(inst_gconvdir)/%: $(objpfx)% $(+force) + $(do-install-program) +-$(inst_gconvdir)/gconv-modules: gconv-modules $(+force) ++$(inst_gconvdir)/gconv-modules: $(objpfx)gconv-modules $(+force) + $(do-install) + ifeq (no,$(cross-compiling)) + # Update the $(prefix)/lib/gconv/gconv-modules.cache file. This is necessary +@@ -319,7 +319,5 @@ do-tests-clean common-mostlyclean: tst-tables-clean + tst-tables-clean: + -rm -f $(objpfx)tst-*.table $(objpfx)tst-EUC-TW.irreversible + +-ifdef objpfx + $(objpfx)gconv-modules: gconv-modules +- cp $^ $@ +-endif ++ cat $(sysdeps-gconv-modules) $^ > $@ +diff --git a/sysdeps/s390/gconv-modules b/sysdeps/s390/gconv-modules +new file mode 100644 +index 0000000..7021105 +--- /dev/null ++++ b/sysdeps/s390/gconv-modules +@@ -0,0 +1,50 @@ ++# GNU libc iconv configuration. ++# Copyright (C) 1997-2016 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++ ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++ ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++ ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++# All lines contain the following information: ++ ++# If the lines start with `module' ++# fromset: either a name triple or a regular expression triple. ++# toset: a name triple or an expression with \N to get regular ++# expression matching results. ++# filename: filename of the module implementing the transformation. ++# If it is not absolute the path is made absolute by prepending ++# the directory the configuration file is found in. ++# cost: optional cost of the transformation. Default is 1. ++ ++# If the lines start with `alias' ++# alias: alias name which is not really recognized. ++# name: the real name of the character set ++ ++# S/390 hardware accelerated modules ++# from to module cost ++module ISO-8859-1// IBM037// ISO-8859-1_CP037_Z900 1 ++module IBM037// ISO-8859-1// ISO-8859-1_CP037_Z900 1 ++module ISO-10646/UTF8/ UTF-32// UTF8_UTF32_Z9 1 ++module UTF-32BE// ISO-10646/UTF8/ UTF8_UTF32_Z9 1 ++module ISO-10646/UTF8/ UTF-32BE// UTF8_UTF32_Z9 1 ++module UTF-16BE// UTF-32// UTF16_UTF32_Z9 1 ++module UTF-32BE// UTF-16// UTF16_UTF32_Z9 1 ++module INTERNAL UTF-16// UTF16_UTF32_Z9 1 ++module UTF-32BE// UTF-16BE// UTF16_UTF32_Z9 1 ++module INTERNAL UTF-16BE// UTF16_UTF32_Z9 1 ++module UTF-16BE// UTF-32BE// UTF16_UTF32_Z9 1 ++module UTF-16BE// INTERNAL UTF16_UTF32_Z9 1 ++module UTF-16BE// ISO-10646/UTF8/ UTF8_UTF16_Z9 1 ++module ISO-10646/UTF8/ UTF-16// UTF8_UTF16_Z9 1 ++module ISO-10646/UTF8/ UTF-16BE// UTF8_UTF16_Z9 1 +diff --git a/sysdeps/s390/s390-64/Makefile b/sysdeps/s390/s390-64/Makefile +index 939e947..bb958bd 100644 +--- a/sysdeps/s390/s390-64/Makefile ++++ b/sysdeps/s390/s390-64/Makefile +@@ -39,54 +39,5 @@ $(patsubst %, $(inst_gconvdir)/%.so, $(s390x-iconv-modules)) : \ + $(inst_gconvdir)/%.so: $(objpfx)%.so $(+force) + $(do-install-program) + +-$(objpfx)gconv-modules-s390: gconv-modules $(+force) +- cp $< $@ +- echo >> $@ +- echo "# S/390 hardware accelerated modules" >> $@ +- echo -n "module ISO-8859-1// IBM037// " >> $@ +- echo " ISO-8859-1_CP037_Z900 1" >> $@ +- echo -n "module IBM037// ISO-8859-1// " >> $@ +- echo " ISO-8859-1_CP037_Z900 1" >> $@ +- echo -n "module ISO-10646/UTF8/ UTF-32// " >> $@ +- echo " UTF8_UTF32_Z9 1" >> $@ +- echo -n "module UTF-32BE// ISO-10646/UTF8/ " >> $@ +- echo " UTF8_UTF32_Z9 1" >> $@ +- echo -n "module ISO-10646/UTF8/ UTF-32BE// " >> $@ +- echo " UTF8_UTF32_Z9 1" >> $@ +- echo -n "module UTF-16BE// UTF-32// " >> $@ +- echo " UTF16_UTF32_Z9 1" >> $@ +- echo -n "module UTF-32BE// UTF-16// " >> $@ +- echo " UTF16_UTF32_Z9 1" >> $@ +- echo -n "module INTERNAL UTF-16// " >> $@ +- echo " UTF16_UTF32_Z9 1" >> $@ +- echo -n "module UTF-32BE// UTF-16BE// " >> $@ +- echo " UTF16_UTF32_Z9 1" >> $@ +- echo -n "module INTERNAL UTF-16BE// " >> $@ +- echo " UTF16_UTF32_Z9 1" >> $@ +- echo -n "module UTF-16BE// UTF-32BE// " >> $@ +- echo " UTF16_UTF32_Z9 1" >> $@ +- echo -n "module UTF-16BE// INTERNAL " >> $@ +- echo " UTF16_UTF32_Z9 1" >> $@ +- echo -n "module UTF-16BE// ISO-10646/UTF8/ " >> $@ +- echo " UTF8_UTF16_Z9 1" >> $@ +- echo -n "module ISO-10646/UTF8/ UTF-16// " >> $@ +- echo " UTF8_UTF16_Z9 1" >> $@ +- echo -n "module ISO-10646/UTF8/ UTF-16BE// " >> $@ +- echo " UTF8_UTF16_Z9 1" >> $@ +- +-$(inst_gconvdir)/gconv-modules: $(objpfx)gconv-modules-s390 $(+force) +- $(do-install) +-ifeq (no,$(cross-compiling)) +-# Update the $(prefix)/lib/gconv/gconv-modules.cache file. This is necessary +-# if this libc has more gconv modules than the previously installed one. +- if test -f "$(inst_gconvdir)/gconv-modules.cache"; then \ +- LC_ALL=C LANGUAGE=C \ +- $(common-objpfx)elf/ld.so --library-path $(rpath-link) \ +- $(common-objpfx)iconv/iconvconfig \ +- $(addprefix --prefix=,$(install_root)); \ +- fi +-else +- @echo '*@*@*@ You should recreate $(inst_gconvdir)/gconv-modules.cache' +-endif +- ++sysdeps-gconv-modules = ../sysdeps/s390/gconv-modules + endif +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-10.patch b/SOURCES/glibc-rh1380680-10.patch new file mode 100644 index 0000000..9955010 --- /dev/null +++ b/SOURCES/glibc-rh1380680-10.patch @@ -0,0 +1,49 @@ +From 8b42b08a1bce2899c0f66ce66ef5fc75745c8f38 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 16:19:30 +0100 +Subject: [PATCH 10/17] S390: Fix build error with gcc6 in utf8_utf16-z9.c. + +upstream commit 808d70228891ab4d4795ab3dd1e015bf63ba18d6 + +This patch fixes the build error with gcc6: +array subscript is above array bounds [-Werror=array-bounds] + +While including loop.c to construct the SINGLE(LOOPFCT) method +for converting from UTF-16 to UTF-8, the bytebuf array with length +MAX_NEEDED_INPUT is used as inptr. MAX_NEEDED_INPUT defaults to +MIN_NEEDED_INPUT if not defined before including loop.c. +Thus bytebuf has a length of 2. +This patch defines MAX_NEEDED_INPUT to MAX_NEEDED_TO, which is 4. + +ChangeLog: + + * sysdeps/s390/s390-64/utf8-utf16-z9.c + (MAX_NEEDED_INPUT): New define. + (MAX_NEEDED_OUTPUT): New define. +--- + sysdeps/s390/s390-64/utf8-utf16-z9.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/sysdeps/s390/s390-64/utf8-utf16-z9.c b/sysdeps/s390/s390-64/utf8-utf16-z9.c +index 6dad1c2..590a149 100644 +--- a/sysdeps/s390/s390-64/utf8-utf16-z9.c ++++ b/sysdeps/s390/s390-64/utf8-utf16-z9.c +@@ -183,6 +183,7 @@ gconv_end (struct __gconv_step *data) + #define MIN_NEEDED_INPUT MIN_NEEDED_FROM + #define MAX_NEEDED_INPUT MAX_NEEDED_FROM + #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO + #define LOOPFCT FROM_LOOP + /* The software implementation is based on the code in gconv_simple.c. */ + #define BODY \ +@@ -340,6 +341,7 @@ gconv_end (struct __gconv_step *data) + /* Conversion from UTF-16 to UTF-8. */ + + #define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MAX_NEEDED_INPUT MAX_NEEDED_TO + #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM + #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM + #define LOOPFCT TO_LOOP +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-11.patch b/SOURCES/glibc-rh1380680-11.patch new file mode 100644 index 0000000..d3fe103 --- /dev/null +++ b/SOURCES/glibc-rh1380680-11.patch @@ -0,0 +1,763 @@ +From ec79c95f95869a30d3f13c6229ebef1ad4931281 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 16:20:45 +0100 +Subject: [PATCH 11/17] S390: Optimize utf8-utf16 module. + +Upstream commit 5bd11b19099b3f22d821515f9c93f1ecc1a7e15e + +This patch reworks the s390 specific module to convert between utf8 and utf16. +Now ifunc is used to choose either the c or etf3eh (with convert utf instruction) +variants at runtime. Furthermore a new vector variant for z13 is introduced +which will be build and chosen if vector support is available at build / runtime. + +In case of converting utf 8 to utf16, the vector variant optimizes input of +1byte utf8 characters. The convert utf instruction is used if a multibyte utf8 +character is found. + +For the other direction utf16 to utf8, the cu21 instruction can't be re-enabled, +because it does not report an error, if the input-stream consists of a single +low surrogate utf16 char (e.g. 0xdc00). This applies to the newest z13, too. +Thus there is only the c or the new vector variant, which can handle 1..4 byte +utf8 characters. + +The c variant from utf16 to utf8 has beed fixed. If a high surrogate was at the +end of the input-buffer, then errno was set to EINVAL and the input-pointer +pointed just after the high surrogate. Now it points to the beginning of the +high surrogate. + +This patch also fixes some whitespace errors. The c variant from utf8 to utf16 +is now checking that tail-bytes starts with 0b10... and the value is not in +range of an utf16 surrogate. + +Furthermore, the etf3eh variants are handling the "UTF-xx//IGNORE" case now. +Before they ignored the ignore-case and always stopped at an error. + +ChangeLog: + + * sysdeps/s390/s390-64/utf8-utf16-z9.c: Use ifunc to select c, + etf3eh or new vector loop-variant. +--- + sysdeps/s390/s390-64/utf8-utf16-z9.c | 547 ++++++++++++++++++++++++++++------- + 1 file changed, 441 insertions(+), 106 deletions(-) + +diff --git a/sysdeps/s390/s390-64/utf8-utf16-z9.c b/sysdeps/s390/s390-64/utf8-utf16-z9.c +index 590a149..b36ee9e 100644 +--- a/sysdeps/s390/s390-64/utf8-utf16-z9.c ++++ b/sysdeps/s390/s390-64/utf8-utf16-z9.c +@@ -30,33 +30,27 @@ + #include + #include + +-/* UTF-16 big endian byte order mark. */ +-#define BOM_UTF16 0xfeff ++#if defined HAVE_S390_VX_GCC_SUPPORT ++# define ASM_CLOBBER_VR(NR) , NR ++#else ++# define ASM_CLOBBER_VR(NR) ++#endif + ++/* Defines for skeleton.c. */ + #define DEFINE_INIT 0 + #define DEFINE_FINI 0 + #define MIN_NEEDED_FROM 1 + #define MAX_NEEDED_FROM 4 + #define MIN_NEEDED_TO 2 + #define MAX_NEEDED_TO 4 +-#define FROM_LOOP from_utf8_loop +-#define TO_LOOP to_utf8_loop ++#define FROM_LOOP __from_utf8_loop ++#define TO_LOOP __to_utf8_loop + #define FROM_DIRECTION (dir == from_utf8) + #define ONE_DIRECTION 0 +-#define PREPARE_LOOP \ +- enum direction dir = ((struct utf8_data *) step->__data)->dir; \ +- int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \ +- \ +- if (emit_bom && !data->__internal_use \ +- && data->__invocation_counter == 0) \ +- { \ +- /* Emit the UTF-16 Byte Order Mark. */ \ +- if (__glibc_unlikely (outbuf + 2 > outend)) \ +- return __GCONV_FULL_OUTPUT; \ +- \ +- put16u (outbuf, BOM_UTF16); \ +- outbuf += 2; \ +- } ++ ++ ++/* UTF-16 big endian byte order mark. */ ++#define BOM_UTF16 0xfeff + + /* Direction of the transformation. */ + enum direction +@@ -151,16 +145,16 @@ gconv_end (struct __gconv_step *data) + register unsigned long long outlen __asm__("11") = outend - outptr; \ + uint64_t cc = 0; \ + \ +- __asm__ volatile (".machine push \n\t" \ +- ".machine \"z9-109\" \n\t" \ +- "0: " INSTRUCTION " \n\t" \ +- ".machine pop \n\t" \ +- " jo 0b \n\t" \ +- " ipm %2 \n" \ +- : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ +- "+d" (outlen), "+d" (inlen) \ +- : \ +- : "cc", "memory"); \ ++ __asm__ __volatile__ (".machine push \n\t" \ ++ ".machine \"z9-109\" \n\t" \ ++ "0: " INSTRUCTION " \n\t" \ ++ ".machine pop \n\t" \ ++ " jo 0b \n\t" \ ++ " ipm %2 \n" \ ++ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ ++ "+d" (outlen), "+d" (inlen) \ ++ : \ ++ : "cc", "memory"); \ + \ + inptr = pInput; \ + outptr = pOutput; \ +@@ -169,50 +163,135 @@ gconv_end (struct __gconv_step *data) + if (cc == 1) \ + { \ + result = __GCONV_FULL_OUTPUT; \ +- break; \ + } \ + else if (cc == 2) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ +- break; \ + } \ + } + ++#define PREPARE_LOOP \ ++ enum direction dir = ((struct utf8_data *) step->__data)->dir; \ ++ int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \ ++ \ ++ if (emit_bom && !data->__internal_use \ ++ && data->__invocation_counter == 0) \ ++ { \ ++ /* Emit the UTF-16 Byte Order Mark. */ \ ++ if (__glibc_unlikely (outbuf + 2 > outend)) \ ++ return __GCONV_FULL_OUTPUT; \ ++ \ ++ put16u (outbuf, BOM_UTF16); \ ++ outbuf += 2; \ ++ } ++ + /* Conversion function from UTF-8 to UTF-16. */ ++#define BODY_FROM_HW(ASM) \ ++ { \ ++ ASM; \ ++ if (__glibc_likely (inptr == inend) \ ++ || result == __GCONV_FULL_OUTPUT) \ ++ break; \ ++ \ ++ int i; \ ++ for (i = 1; inptr + i < inend && i < 5; ++i) \ ++ if ((inptr[i] & 0xc0) != 0x80) \ ++ break; \ ++ \ ++ if (__glibc_likely (inptr + i == inend \ ++ && result == __GCONV_EMPTY_INPUT)) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (i); \ ++ } ++ ++#define BODY_FROM_ETF3EH BODY_FROM_HW (HARDWARE_CONVERT ("cu12 %0, %1, 1")) ++ ++#define HW_FROM_VX \ ++ { \ ++ register const unsigned char* pInput asm ("8") = inptr; \ ++ register size_t inlen asm ("9") = inend - inptr; \ ++ register unsigned char* pOutput asm ("10") = outptr; \ ++ register size_t outlen asm("11") = outend - outptr; \ ++ unsigned long tmp, tmp2, tmp3; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ " vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \ ++ " vrepib %%v31,0x20\n\t" \ ++ /* Loop which handles UTF-8 chars <=0x7f. */ \ ++ "0: clgijl %[R_INLEN],16,20f\n\t" \ ++ " clgijl %[R_OUTLEN],32,20f\n\t" \ ++ "1: vl %%v16,0(%[R_IN])\n\t" \ ++ " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ ++ " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ ++ UTF8 chars. */ \ ++ /* Enlarge to UTF-16. */ \ ++ " vuplhb %%v18,%%v16\n\t" \ ++ " la %[R_IN],16(%[R_IN])\n\t" \ ++ " vupllb %%v19,%%v16\n\t" \ ++ " aghi %[R_INLEN],-16\n\t" \ ++ /* Store 32 bytes to buf_out. */ \ ++ " vstm %%v18,%%v19,0(%[R_OUT])\n\t" \ ++ " aghi %[R_OUTLEN],-32\n\t" \ ++ " la %[R_OUT],32(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],16,20f\n\t" \ ++ " clgijl %[R_OUTLEN],32,20f\n\t" \ ++ " j 1b\n\t" \ ++ "10:\n\t" \ ++ /* At least one byte is > 0x7f. \ ++ Store the preceding 1-byte chars. */ \ ++ " vlgvb %[R_TMP],%%v17,7\n\t" \ ++ " sllk %[R_TMP2],%[R_TMP],1\n\t" /* Compute highest \ ++ index to store. */ \ ++ " llgfr %[R_TMP3],%[R_TMP2]\n\t" \ ++ " ahi %[R_TMP2],-1\n\t" \ ++ " jl 20f\n\t" \ ++ " vuplhb %%v18,%%v16\n\t" \ ++ " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ ++ " ahi %[R_TMP2],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllb %%v19,%%v16\n\t" \ ++ " vstl %%v19,%[R_TMP2],16(%[R_OUT])\n\t" \ ++ "11: \n\t" /* Update pointers. */ \ ++ " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_TMP]\n\t" \ ++ " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ ++ " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ ++ /* Handle multibyte utf8-char with convert instruction. */ \ ++ "20: cu12 %[R_OUT],%[R_IN],1\n\t" \ ++ " jo 0b\n\t" /* Try vector implemenation again. */ \ ++ " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ ++ " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (pInput) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ ++ ); \ ++ inptr = pInput; \ ++ outptr = pOutput; \ ++ } ++#define BODY_FROM_VX BODY_FROM_HW (HW_FROM_VX) ++ + +-#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO +-#define LOOPFCT FROM_LOOP + /* The software implementation is based on the code in gconv_simple.c. */ +-#define BODY \ ++#define BODY_FROM_C \ + { \ +- if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \ +- { \ +- HARDWARE_CONVERT ("cu12 %0, %1, 1"); \ +- \ +- if (inptr != inend) \ +- { \ +- int i; \ +- for (i = 1; inptr + i < inend; ++i) \ +- if ((inptr[i] & 0xc0) != 0x80) \ +- break; \ +- \ +- if (__glibc_likely (inptr + i == inend)) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- STANDARD_FROM_LOOP_ERR_HANDLER (i); \ +- } \ +- continue; \ +- } \ +- \ + /* Next input byte. */ \ + uint16_t ch = *inptr; \ + \ +- if (__glibc_likely (ch < 0x80)) \ ++ if (__glibc_likely (ch < 0x80)) \ + { \ + /* One byte sequence. */ \ + ++inptr; \ +@@ -230,13 +309,13 @@ gconv_end (struct __gconv_step *data) + cnt = 2; \ + ch &= 0x1f; \ + } \ +- else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ ++ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ +- else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ ++ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ +@@ -257,7 +336,7 @@ gconv_end (struct __gconv_step *data) + STANDARD_FROM_LOOP_ERR_HANDLER (i); \ + } \ + \ +- if (__glibc_unlikely (inptr + cnt > inend)) \ ++ if (__glibc_unlikely (inptr + cnt > inend)) \ + { \ + /* We don't have enough input. But before we report \ + that check that all the bytes are correct. */ \ +@@ -265,7 +344,7 @@ gconv_end (struct __gconv_step *data) + if ((inptr[i] & 0xc0) != 0x80) \ + break; \ + \ +- if (__glibc_likely (inptr + i == inend)) \ ++ if (__glibc_likely (inptr + i == inend)) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ +@@ -280,23 +359,31 @@ gconv_end (struct __gconv_step *data) + low) are needed. */ \ + uint16_t zabcd, high, low; \ + \ +- if (__glibc_unlikely (outptr + 4 > outend)) \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ ++ /* Check if tail-bytes >= 0x80, < 0xc0. */ \ ++ for (i = 1; i < cnt; ++i) \ ++ { \ ++ if ((inptr[i] & 0xc0) != 0x80) \ ++ /* This is an illegal encoding. */ \ ++ goto errout; \ ++ } \ ++ \ + /* See Principles of Operations cu12. */ \ + zabcd = (((inptr[0] & 0x7) << 2) | \ +- ((inptr[1] & 0x30) >> 4)) - 1; \ ++ ((inptr[1] & 0x30) >> 4)) - 1; \ + \ + /* z-bit must be zero after subtracting 1. */ \ + if (zabcd & 0x10) \ + STANDARD_FROM_LOOP_ERR_HANDLER (4) \ + \ + high = (uint16_t)(0xd8 << 8); /* high surrogate id */ \ +- high |= zabcd << 6; /* abcd bits */ \ ++ high |= zabcd << 6; /* abcd bits */ \ + high |= (inptr[1] & 0xf) << 2; /* efgh bits */ \ + high |= (inptr[2] & 0x30) >> 4; /* ij bits */ \ + \ +@@ -326,8 +413,19 @@ gconv_end (struct __gconv_step *data) + ch <<= 6; \ + ch |= byte & 0x3f; \ + } \ +- inptr += cnt; \ + \ ++ /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ ++ If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ ++ have been represented with fewer than cnt bytes. */ \ ++ if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ ++ /* Do not accept UTF-16 surrogates. */ \ ++ || (ch >= 0xd800 && ch <= 0xdfff)) \ ++ { \ ++ /* This is an illegal encoding. */ \ ++ goto errout; \ ++ } \ ++ \ ++ inptr += cnt; \ + } \ + } \ + /* Now adjust the pointers and store the result. */ \ +@@ -335,43 +433,70 @@ gconv_end (struct __gconv_step *data) + outptr += sizeof (uint16_t); \ + } + ++/* Generate loop-function with software implementation. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO ++#define LOOPFCT __from_utf8_loop_c ++#define LOOP_NEED_FLAGS ++#define BODY BODY_FROM_C ++#include ++ ++/* Generate loop-function with hardware utf-convert instruction. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO ++#define LOOPFCT __from_utf8_loop_etf3eh + #define LOOP_NEED_FLAGS ++#define BODY BODY_FROM_ETF3EH + #include + ++#if defined HAVE_S390_VX_ASM_SUPPORT ++/* Generate loop-function with hardware vector and utf-convert instructions. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++# define MAX_NEEDED_OUTPUT MAX_NEEDED_TO ++# define LOOPFCT __from_utf8_loop_vx ++# define LOOP_NEED_FLAGS ++# define BODY BODY_FROM_VX ++# include ++#endif ++ ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__from_utf8_loop_c) ++__attribute__ ((ifunc ("__from_utf8_loop_resolver"))) ++__from_utf8_loop; ++ ++static void * ++__from_utf8_loop_resolver (unsigned long int dl_hwcap) ++{ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __from_utf8_loop_vx; ++ else ++#endif ++ if (dl_hwcap & HWCAP_S390_ETF3EH) ++ return __from_utf8_loop_etf3eh; ++ else ++ return __from_utf8_loop_c; ++} ++ ++strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) ++ + /* Conversion from UTF-16 to UTF-8. */ + +-#define MIN_NEEDED_INPUT MIN_NEEDED_TO +-#define MAX_NEEDED_INPUT MAX_NEEDED_TO +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-#define LOOPFCT TO_LOOP + /* The software routine is based on the functionality of the S/390 + hardware instruction (cu21) as described in the Principles of + Operation. */ +-#define BODY \ ++#define BODY_TO_C \ + { \ +- /* The hardware instruction currently fails to report an error for \ +- isolated low surrogates so we have to disable the instruction \ +- until this gets resolved. */ \ +- if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */ \ +- { \ +- HARDWARE_CONVERT ("cu21 %0, %1, 1"); \ +- if (inptr != inend) \ +- { \ +- /* Check if the third byte is \ +- a valid start of a UTF-16 surrogate. */ \ +- if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc) \ +- STANDARD_TO_LOOP_ERR_HANDLER (3); \ +- \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- continue; \ +- } \ +- \ + uint16_t c = get16 (inptr); \ + \ +- if (__glibc_likely (c <= 0x007f)) \ ++ if (__glibc_likely (c <= 0x007f)) \ + { \ + /* Single byte UTF-8 char. */ \ + *outptr = c & 0xff; \ +@@ -379,20 +504,20 @@ gconv_end (struct __gconv_step *data) + } \ + else if (c >= 0x0080 && c <= 0x07ff) \ + { \ +- /* Two byte UTF-8 char. */ \ ++ /* Two byte UTF-8 char. */ \ + \ +- if (__glibc_unlikely (outptr + 2 > outend)) \ ++ if (__glibc_unlikely (outptr + 2 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ +- outptr[0] = 0xc0; \ +- outptr[0] |= c >> 6; \ ++ outptr[0] = 0xc0; \ ++ outptr[0] |= c >> 6; \ + \ +- outptr[1] = 0x80; \ +- outptr[1] |= c & 0x3f; \ ++ outptr[1] = 0x80; \ ++ outptr[1] |= c & 0x3f; \ + \ + outptr += 2; \ + } \ +@@ -400,7 +525,7 @@ gconv_end (struct __gconv_step *data) + { \ + /* Three byte UTF-8 char. */ \ + \ +- if (__glibc_unlikely (outptr + 3 > outend)) \ ++ if (__glibc_unlikely (outptr + 3 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ +@@ -419,22 +544,22 @@ gconv_end (struct __gconv_step *data) + } \ + else if (c >= 0xd800 && c <= 0xdbff) \ + { \ +- /* Four byte UTF-8 char. */ \ ++ /* Four byte UTF-8 char. */ \ + uint16_t low, uvwxy; \ + \ +- if (__glibc_unlikely (outptr + 4 > outend)) \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ +- inptr += 2; \ +- if (__glibc_unlikely (inptr + 2 > inend)) \ ++ if (__glibc_unlikely (inptr + 4 > inend)) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ ++ inptr += 2; \ + low = get16 (inptr); \ + \ + if ((low & 0xfc00) != 0xdc00) \ +@@ -461,11 +586,221 @@ gconv_end (struct __gconv_step *data) + } \ + else \ + { \ +- STANDARD_TO_LOOP_ERR_HANDLER (2); \ ++ STANDARD_TO_LOOP_ERR_HANDLER (2); \ + } \ + inptr += 2; \ + } +-#define LOOP_NEED_FLAGS +-#include ++ ++#define BODY_TO_VX \ ++ { \ ++ size_t inlen = inend - inptr; \ ++ size_t outlen = outend - outptr; \ ++ unsigned long tmp, tmp2, tmp3; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ /* Setup to check for values <= 0x7f. */ \ ++ " larl %[R_TMP],9f\n\t" \ ++ " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \ ++ /* Loop which handles UTF-16 chars <=0x7f. */ \ ++ "0: clgijl %[R_INLEN],32,2f\n\t" \ ++ " clgijl %[R_OUTLEN],16,2f\n\t" \ ++ "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \ ++ " lghi %[R_TMP2],0\n\t" \ ++ /* Check for > 1byte UTF-8 chars. */ \ ++ " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \ ++ " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ ++ UTF8 chars. */ \ ++ " vstrchs %%v19,%%v17,%%v30,%%v31\n\t" \ ++ " jno 11f\n\t" /* Jump away if not all bytes are 1byte \ ++ UTF8 chars. */ \ ++ /* Shorten to UTF-8. */ \ ++ " vpkh %%v18,%%v16,%%v17\n\t" \ ++ " la %[R_IN],32(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-32\n\t" \ ++ /* Store 16 bytes to buf_out. */ \ ++ " vst %%v18,0(%[R_OUT])\n\t" \ ++ " aghi %[R_OUTLEN],-16\n\t" \ ++ " la %[R_OUT],16(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],32,2f\n\t" \ ++ " clgijl %[R_OUTLEN],16,2f\n\t" \ ++ " j 1b\n\t" \ ++ /* Setup to check for ch > 0x7f. (v30, v31) */ \ ++ "9: .short 0x7f,0x7f,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ " .short 0x2000,0x2000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ /* At least one byte is > 0x7f. \ ++ Store the preceding 1-byte chars. */ \ ++ "11: lghi %[R_TMP2],16\n\t" /* match was found in v17. */ \ ++ "10:\n\t" \ ++ " vlgvb %[R_TMP],%%v19,7\n\t" \ ++ /* Shorten to UTF-8. */ \ ++ " vpkh %%v18,%%v16,%%v17\n\t" \ ++ " ar %[R_TMP],%[R_TMP2]\n\t" /* Number of in bytes. */ \ ++ " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ ++ " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ ++ " jl 13f\n\t" \ ++ " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ ++ /* Update pointers. */ \ ++ " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_TMP]\n\t" \ ++ " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ ++ " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ ++ "13: \n\t" \ ++ /* Calculate remaining uint16_t values in loaded vrs. */ \ ++ " lghi %[R_TMP2],16\n\t" \ ++ " slgr %[R_TMP2],%[R_TMP3]\n\t" \ ++ " llh %[R_TMP],0(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-2\n\t" \ ++ " j 22f\n\t" \ ++ /* Handle remaining bytes. */ \ ++ "2: \n\t" \ ++ /* Zero, one or more bytes available? */ \ ++ " clgfi %[R_INLEN],1\n\t" \ ++ " locghie %[R_RES],%[RES_IN_FULL]\n\t" /* Only one byte. */ \ ++ " jle 99f\n\t" /* End if less than two bytes. */ \ ++ /* Calculate remaining uint16_t values in inptr. */ \ ++ " srlg %[R_TMP2],%[R_INLEN],1\n\t" \ ++ /* Handle multibyte utf8-char. */ \ ++ "20: llh %[R_TMP],0(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-2\n\t" \ ++ /* Test if ch is 1-byte UTF-8 char. */ \ ++ "21: clijh %[R_TMP],0x7f,22f\n\t" \ ++ /* Handle 1-byte UTF-8 char. */ \ ++ "31: slgfi %[R_OUTLEN],1\n\t" \ ++ " jl 90f \n\t" \ ++ " stc %[R_TMP],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],2(%[R_IN])\n\t" \ ++ " la %[R_OUT],1(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Test if ch is 2-byte UTF-8 char. */ \ ++ "22: clfi %[R_TMP],0x7ff\n\t" \ ++ " jh 23f\n\t" \ ++ /* Handle 2-byte UTF-8 char. */ \ ++ "32: slgfi %[R_OUTLEN],2\n\t" \ ++ " jl 90f \n\t" \ ++ " llill %[R_TMP3],0xc080\n\t" \ ++ " la %[R_IN],2(%[R_IN])\n\t" \ ++ " risbgn %[R_TMP3],%[R_TMP],51,55,2\n\t" /* 1. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 2. byte. */ \ ++ " sth %[R_TMP3],0(%[R_OUT])\n\t" \ ++ " la %[R_OUT],2(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Test if ch is 3-byte UTF-8 char. */ \ ++ "23: clfi %[R_TMP],0xd7ff\n\t" \ ++ " jh 24f\n\t" \ ++ /* Handle 3-byte UTF-8 char. */ \ ++ "33: slgfi %[R_OUTLEN],3\n\t" \ ++ " jl 90f \n\t" \ ++ " llilf %[R_TMP3],0xe08080\n\t" \ ++ " la %[R_IN],2(%[R_IN])\n\t" \ ++ " risbgn %[R_TMP3],%[R_TMP],44,47,4\n\t" /* 1. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],50,55,2\n\t" /* 2. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 3. byte. */ \ ++ " stcm %[R_TMP3],7,0(%[R_OUT])\n\t" \ ++ " la %[R_OUT],3(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Test if ch is 4-byte UTF-8 char. */ \ ++ "24: clfi %[R_TMP],0xdfff\n\t" \ ++ " jh 33b\n\t" /* Handle this 3-byte UTF-8 char. */ \ ++ " clfi %[R_TMP],0xdbff\n\t" \ ++ " locghih %[R_RES],%[RES_IN_ILL]\n\t" \ ++ " jh 99f\n\t" /* Jump away if this is a low surrogate \ ++ without a preceding high surrogate. */ \ ++ /* Handle 4-byte UTF-8 char. */ \ ++ "34: slgfi %[R_OUTLEN],4\n\t" \ ++ " jl 90f \n\t" \ ++ " slgfi %[R_INLEN],2\n\t" \ ++ " locghil %[R_RES],%[RES_IN_FULL]\n\t" \ ++ " jl 99f\n\t" /* Jump away if low surrogate is missing. */ \ ++ " llilf %[R_TMP3],0xf0808080\n\t" \ ++ " aghi %[R_TMP],0x40\n\t" \ ++ " risbgn %[R_TMP3],%[R_TMP],37,39,16\n\t" /* 1. byte: uvw */ \ ++ " risbgn %[R_TMP3],%[R_TMP],42,43,14\n\t" /* 2. byte: xy */ \ ++ " risbgn %[R_TMP3],%[R_TMP],44,47,14\n\t" /* 2. byte: efgh */ \ ++ " risbgn %[R_TMP3],%[R_TMP],50,51,12\n\t" /* 3. byte: ij */ \ ++ " llh %[R_TMP],2(%[R_IN])\n\t" /* Load low surrogate. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],52,55,2\n\t" /* 3. byte: klmn */ \ ++ " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 4. byte: opqrst */ \ ++ " nilf %[R_TMP],0xfc00\n\t" \ ++ " clfi %[R_TMP],0xdc00\n\t" /* Check if it starts with 0xdc00. */ \ ++ " locghine %[R_RES],%[RES_IN_ILL]\n\t" \ ++ " jne 99f\n\t" /* Jump away if low surrogate is invalid. */ \ ++ " st %[R_TMP3],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],4(%[R_IN])\n\t" \ ++ " la %[R_OUT],4(%[R_OUT])\n\t" \ ++ " aghi %[R_TMP2],-2\n\t" \ ++ " jh 20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Exit with __GCONV_FULL_OUTPUT. */ \ ++ "90: lghi %[R_RES],%[RES_OUT_FULL]\n\t" \ ++ "99: \n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (inptr) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ ++ ); \ ++ if (__glibc_likely (inptr == inend) \ ++ || result != __GCONV_ILLEGAL_INPUT) \ ++ break; \ ++ \ ++ STANDARD_TO_LOOP_ERR_HANDLER (2); \ ++ } ++ ++/* Generate loop-function with software implementation. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MAX_NEEDED_INPUT MAX_NEEDED_TO ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++#if defined HAVE_S390_VX_ASM_SUPPORT ++# define LOOPFCT __to_utf8_loop_c ++# define BODY BODY_TO_C ++# define LOOP_NEED_FLAGS ++# include ++ ++/* Generate loop-function with software implementation. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_TO ++# define MAX_NEEDED_INPUT MAX_NEEDED_TO ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++# define LOOPFCT __to_utf8_loop_vx ++# define BODY BODY_TO_VX ++# define LOOP_NEED_FLAGS ++# include ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__to_utf8_loop_c) ++__attribute__ ((ifunc ("__to_utf8_loop_resolver"))) ++__to_utf8_loop; ++ ++static void * ++__to_utf8_loop_resolver (unsigned long int dl_hwcap) ++{ ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __to_utf8_loop_vx; ++ else ++ return __to_utf8_loop_c; ++} ++ ++strong_alias (__to_utf8_loop_c_single, __to_utf8_loop_single) ++ ++#else ++# define LOOPFCT TO_LOOP ++# define BODY BODY_TO_C ++# define LOOP_NEED_FLAGS ++# include ++#endif /* !HAVE_S390_VX_ASM_SUPPORT */ + + #include +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-12.patch b/SOURCES/glibc-rh1380680-12.patch new file mode 100644 index 0000000..1fbc333 --- /dev/null +++ b/SOURCES/glibc-rh1380680-12.patch @@ -0,0 +1,609 @@ +From c806cab89b52a644b5c563b8f1c8ae59abfc2c13 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 16:22:17 +0100 +Subject: [PATCH 12/17] S390: Optimize utf16-utf32 module. + +Upstream commit 6896776c3c9c32fd22324e6de6737dd69ae73213 + +This patch reworks the s390 specific module to convert between utf16 and utf32. +Now ifunc is used to choose either the c or etf3eh (with convert utf +instruction) variants at runtime. +Furthermore a new vector variant for z13 is introduced which will be build +and chosen if vector support is available at build / runtime. + +In case of converting utf 32 to utf16, the vector variant optimizes input of +2byte utf16 characters. The convert utf instruction is used if an utf16 +surrogate is found. + +For the other direction utf16 to utf32, the cu24 instruction can't be re- +enabled, because it does not report an error, if the input-stream consists of +a single low surrogate utf16 char (e.g. 0xdc00). This applies to the newest z13, +too. Thus there is only the c or the new vector variant, which can handle utf16 +surrogate characters. + +This patch also fixes some whitespace errors. Furthermore, the etf3eh variant is +handling the "UTF-xx//IGNORE" case now. Before they ignored the ignore-case and +always stopped at an error. + +ChangeLog: + + * sysdeps/s390/s390-64/utf16-utf32-z9.c: Use ifunc to select c, + etf3eh or new vector loop-variant. +--- + sysdeps/s390/s390-64/utf16-utf32-z9.c | 471 +++++++++++++++++++++++++++------- + 1 file changed, 379 insertions(+), 92 deletions(-) + +diff --git a/sysdeps/s390/s390-64/utf16-utf32-z9.c b/sysdeps/s390/s390-64/utf16-utf32-z9.c +index e6a033d..33594f1 100644 +--- a/sysdeps/s390/s390-64/utf16-utf32-z9.c ++++ b/sysdeps/s390/s390-64/utf16-utf32-z9.c +@@ -30,47 +30,27 @@ + #include + #include + ++#if defined HAVE_S390_VX_GCC_SUPPORT ++# define ASM_CLOBBER_VR(NR) , NR ++#else ++# define ASM_CLOBBER_VR(NR) ++#endif ++ + /* UTF-32 big endian byte order mark. */ + #define BOM_UTF32 0x0000feffu + + /* UTF-16 big endian byte order mark. */ +-#define BOM_UTF16 0xfeff ++#define BOM_UTF16 0xfeff + + #define DEFINE_INIT 0 + #define DEFINE_FINI 0 + #define MIN_NEEDED_FROM 2 + #define MAX_NEEDED_FROM 4 + #define MIN_NEEDED_TO 4 +-#define FROM_LOOP from_utf16_loop +-#define TO_LOOP to_utf16_loop ++#define FROM_LOOP __from_utf16_loop ++#define TO_LOOP __to_utf16_loop + #define FROM_DIRECTION (dir == from_utf16) + #define ONE_DIRECTION 0 +-#define PREPARE_LOOP \ +- enum direction dir = ((struct utf16_data *) step->__data)->dir; \ +- int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \ +- \ +- if (emit_bom && !data->__internal_use \ +- && data->__invocation_counter == 0) \ +- { \ +- if (dir == to_utf16) \ +- { \ +- /* Emit the UTF-16 Byte Order Mark. */ \ +- if (__glibc_unlikely (outbuf + 2 > outend)) \ +- return __GCONV_FULL_OUTPUT; \ +- \ +- put16u (outbuf, BOM_UTF16); \ +- outbuf += 2; \ +- } \ +- else \ +- { \ +- /* Emit the UTF-32 Byte Order Mark. */ \ +- if (__glibc_unlikely (outbuf + 4 > outend)) \ +- return __GCONV_FULL_OUTPUT; \ +- \ +- put32u (outbuf, BOM_UTF32); \ +- outbuf += 4; \ +- } \ +- } + + /* Direction of the transformation. */ + enum direction +@@ -169,16 +149,16 @@ gconv_end (struct __gconv_step *data) + register unsigned long long outlen __asm__("11") = outend - outptr; \ + uint64_t cc = 0; \ + \ +- __asm__ volatile (".machine push \n\t" \ +- ".machine \"z9-109\" \n\t" \ +- "0: " INSTRUCTION " \n\t" \ +- ".machine pop \n\t" \ +- " jo 0b \n\t" \ +- " ipm %2 \n" \ +- : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ +- "+d" (outlen), "+d" (inlen) \ +- : \ +- : "cc", "memory"); \ ++ __asm__ __volatile__ (".machine push \n\t" \ ++ ".machine \"z9-109\" \n\t" \ ++ "0: " INSTRUCTION " \n\t" \ ++ ".machine pop \n\t" \ ++ " jo 0b \n\t" \ ++ " ipm %2 \n" \ ++ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ ++ "+d" (outlen), "+d" (inlen) \ ++ : \ ++ : "cc", "memory"); \ + \ + inptr = pInput; \ + outptr = pOutput; \ +@@ -187,44 +167,46 @@ gconv_end (struct __gconv_step *data) + if (cc == 1) \ + { \ + result = __GCONV_FULL_OUTPUT; \ +- break; \ + } \ + else if (cc == 2) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ +- break; \ + } \ + } + ++#define PREPARE_LOOP \ ++ enum direction dir = ((struct utf16_data *) step->__data)->dir; \ ++ int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \ ++ \ ++ if (emit_bom && !data->__internal_use \ ++ && data->__invocation_counter == 0) \ ++ { \ ++ if (dir == to_utf16) \ ++ { \ ++ /* Emit the UTF-16 Byte Order Mark. */ \ ++ if (__glibc_unlikely (outbuf + 2 > outend)) \ ++ return __GCONV_FULL_OUTPUT; \ ++ \ ++ put16u (outbuf, BOM_UTF16); \ ++ outbuf += 2; \ ++ } \ ++ else \ ++ { \ ++ /* Emit the UTF-32 Byte Order Mark. */ \ ++ if (__glibc_unlikely (outbuf + 4 > outend)) \ ++ return __GCONV_FULL_OUTPUT; \ ++ \ ++ put32u (outbuf, BOM_UTF32); \ ++ outbuf += 4; \ ++ } \ ++ } ++ + /* Conversion function from UTF-16 to UTF-32 internal/BE. */ + +-#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-#define LOOPFCT FROM_LOOP + /* The software routine is copied from utf-16.c (minus bytes + swapping). */ +-#define BODY \ ++#define BODY_FROM_C \ + { \ +- /* The hardware instruction currently fails to report an error for \ +- isolated low surrogates so we have to disable the instruction \ +- until this gets resolved. */ \ +- if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */ \ +- { \ +- HARDWARE_CONVERT ("cu24 %0, %1, 1"); \ +- if (inptr != inend) \ +- { \ +- /* Check if the third byte is \ +- a valid start of a UTF-16 surrogate. */ \ +- if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc) \ +- STANDARD_FROM_LOOP_ERR_HANDLER (3); \ +- \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- continue; \ +- } \ +- \ + uint16_t u1 = get16 (inptr); \ + \ + if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \ +@@ -235,15 +217,15 @@ gconv_end (struct __gconv_step *data) + } \ + else \ + { \ +- /* An isolated low-surrogate was found. This has to be \ ++ /* An isolated low-surrogate was found. This has to be \ + considered ill-formed. */ \ +- if (__glibc_unlikely (u1 >= 0xdc00)) \ ++ if (__glibc_unlikely (u1 >= 0xdc00)) \ + { \ + STANDARD_FROM_LOOP_ERR_HANDLER (2); \ + } \ + /* It's a surrogate character. At least the first word says \ + it is. */ \ +- if (__glibc_unlikely (inptr + 4 > inend)) \ ++ if (__glibc_unlikely (inptr + 4 > inend)) \ + { \ + /* We don't have enough input for another complete input \ + character. */ \ +@@ -266,48 +248,200 @@ gconv_end (struct __gconv_step *data) + } \ + outptr += 4; \ + } +-#define LOOP_NEED_FLAGS +-#include ++ ++#define BODY_FROM_VX \ ++ { \ ++ size_t inlen = inend - inptr; \ ++ size_t outlen = outend - outptr; \ ++ unsigned long tmp, tmp2, tmp3; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ /* Setup to check for surrogates. */ \ ++ " larl %[R_TMP],9f\n\t" \ ++ " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \ ++ /* Loop which handles UTF-16 chars <0xd800, >0xdfff. */ \ ++ "0: clgijl %[R_INLEN],16,2f\n\t" \ ++ " clgijl %[R_OUTLEN],32,2f\n\t" \ ++ "1: vl %%v16,0(%[R_IN])\n\t" \ ++ /* Check for surrogate chars. */ \ ++ " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \ ++ " jno 10f\n\t" \ ++ /* Enlarge to UTF-32. */ \ ++ " vuplhh %%v17,%%v16\n\t" \ ++ " la %[R_IN],16(%[R_IN])\n\t" \ ++ " vupllh %%v18,%%v16\n\t" \ ++ " aghi %[R_INLEN],-16\n\t" \ ++ /* Store 32 bytes to buf_out. */ \ ++ " vstm %%v17,%%v18,0(%[R_OUT])\n\t" \ ++ " aghi %[R_OUTLEN],-32\n\t" \ ++ " la %[R_OUT],32(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],16,2f\n\t" \ ++ " clgijl %[R_OUTLEN],32,2f\n\t" \ ++ " j 1b\n\t" \ ++ /* Setup to check for ch >= 0xd800 && ch <= 0xdfff. (v30, v31) */ \ ++ "9: .short 0xd800,0xdfff,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ " .short 0xa000,0xc000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ /* At least on uint16_t is in range of surrogates. \ ++ Store the preceding chars. */ \ ++ "10: vlgvb %[R_TMP],%%v19,7\n\t" \ ++ " vuplhh %%v17,%%v16\n\t" \ ++ " sllg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ ++ " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ ++ " jl 12f\n\t" \ ++ " vstl %%v17,%[R_TMP2],0(%[R_OUT])\n\t" \ ++ " vupllh %%v18,%%v16\n\t" \ ++ " ahi %[R_TMP2],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vstl %%v18,%[R_TMP2],16(%[R_OUT])\n\t" \ ++ "11: \n\t" /* Update pointers. */ \ ++ " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_TMP]\n\t" \ ++ " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ ++ " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ ++ /* Calculate remaining uint16_t values in loaded vrs. */ \ ++ "12: lghi %[R_TMP2],16\n\t" \ ++ " sgr %[R_TMP2],%[R_TMP]\n\t" \ ++ " srl %[R_TMP2],1\n\t" \ ++ " llh %[R_TMP],0(%[R_IN])\n\t" \ ++ " aghi %[R_OUTLEN],-4\n\t" \ ++ " j 16f\n\t" \ ++ /* Handle remaining bytes. */ \ ++ "2: \n\t" \ ++ /* Zero, one or more bytes available? */ \ ++ " clgfi %[R_INLEN],1\n\t" \ ++ " je 97f\n\t" /* Only one byte available. */ \ ++ " jl 99f\n\t" /* End if no bytes available. */ \ ++ /* Calculate remaining uint16_t values in inptr. */ \ ++ " srlg %[R_TMP2],%[R_INLEN],1\n\t" \ ++ /* Handle remaining uint16_t values. */ \ ++ "13: llh %[R_TMP],0(%[R_IN])\n\t" \ ++ " slgfi %[R_OUTLEN],4\n\t" \ ++ " jl 96f \n\t" \ ++ " clfi %[R_TMP],0xd800\n\t" \ ++ " jhe 15f\n\t" \ ++ "14: st %[R_TMP],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],2(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-2\n\t" \ ++ " la %[R_OUT],4(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],13b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Handle UTF-16 surrogate pair. */ \ ++ "15: clfi %[R_TMP],0xdfff\n\t" \ ++ " jh 14b\n\t" /* Jump away if ch > 0xdfff. */ \ ++ "16: clfi %[R_TMP],0xdc00\n\t" \ ++ " jhe 98f\n\t" /* Jump away in case of low-surrogate. */ \ ++ " slgfi %[R_INLEN],4\n\t" \ ++ " jl 97f\n\t" /* Big enough input? */ \ ++ " llh %[R_TMP3],2(%[R_IN])\n\t" /* Load low surrogate. */ \ ++ " slfi %[R_TMP],0xd7c0\n\t" \ ++ " sll %[R_TMP],10\n\t" \ ++ " risbgn %[R_TMP],%[R_TMP3],54,63,0\n\t" /* Insert klmnopqrst. */ \ ++ " nilf %[R_TMP3],0xfc00\n\t" \ ++ " clfi %[R_TMP3],0xdc00\n\t" /* Check if it starts with 0xdc00. */ \ ++ " jne 98f\n\t" \ ++ " st %[R_TMP],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],4(%[R_IN])\n\t" \ ++ " la %[R_OUT],4(%[R_OUT])\n\t" \ ++ " aghi %[R_TMP2],-2\n\t" \ ++ " jh 13b\n\t" /* Handle remaining uint16_t values. */ \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ "96: \n\t" /* Return full output. */ \ ++ " lghi %[R_RES],%[RES_OUT_FULL]\n\t" \ ++ " j 99f\n\t" \ ++ "97: \n\t" /* Return incomplete input. */ \ ++ " lghi %[R_RES],%[RES_IN_FULL]\n\t" \ ++ " j 99f\n\t" \ ++ "98:\n\t" /* Return Illegal character. */ \ ++ " lghi %[R_RES],%[RES_IN_ILL]\n\t" \ ++ "99:\n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (inptr) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ ++ ); \ ++ if (__glibc_likely (inptr == inend) \ ++ || result != __GCONV_ILLEGAL_INPUT) \ ++ break; \ ++ \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (2); \ ++ } ++ ++ ++/* Generate loop-function with software routing. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#if defined HAVE_S390_VX_ASM_SUPPORT ++# define LOOPFCT __from_utf16_loop_c ++# define LOOP_NEED_FLAGS ++# define BODY BODY_FROM_C ++# include ++ ++/* Generate loop-function with hardware vector instructions. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++# define LOOPFCT __from_utf16_loop_vx ++# define LOOP_NEED_FLAGS ++# define BODY BODY_FROM_VX ++# include ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__from_utf16_loop_c) ++__attribute__ ((ifunc ("__from_utf16_loop_resolver"))) ++__from_utf16_loop; ++ ++static void * ++__from_utf16_loop_resolver (unsigned long int dl_hwcap) ++{ ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __from_utf16_loop_vx; ++ else ++ return __from_utf16_loop_c; ++} ++ ++strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single) ++#else ++# define LOOPFCT FROM_LOOP ++# define LOOP_NEED_FLAGS ++# define BODY BODY_FROM_C ++# include ++#endif + + /* Conversion from UTF-32 internal/BE to UTF-16. */ + +-#define MIN_NEEDED_INPUT MIN_NEEDED_TO +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-#define LOOPFCT TO_LOOP + /* The software routine is copied from utf-16.c (minus bytes + swapping). */ +-#define BODY \ ++#define BODY_TO_C \ + { \ +- if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \ +- { \ +- HARDWARE_CONVERT ("cu42 %0, %1"); \ +- \ +- if (inptr != inend) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- continue; \ +- } \ +- \ + uint32_t c = get32 (inptr); \ + \ + if (__builtin_expect (c <= 0xd7ff, 1) \ + || (c >=0xdc00 && c <= 0xffff)) \ + { \ +- /* Two UTF-16 chars. */ \ +- put16 (outptr, c); \ ++ /* Two UTF-16 chars. */ \ ++ put16 (outptr, c); \ + } \ + else if (__builtin_expect (c >= 0x10000, 1) \ + && __builtin_expect (c <= 0x10ffff, 1)) \ + { \ + /* Four UTF-16 chars. */ \ +- uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \ ++ uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \ + uint16_t out; \ + \ + /* Generate a surrogate character. */ \ +- if (__glibc_unlikely (outptr + 4 > outend)) \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ +@@ -326,12 +460,165 @@ gconv_end (struct __gconv_step *data) + } \ + else \ + { \ +- STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + outptr += 2; \ + inptr += 4; \ + } ++ ++#define BODY_TO_ETF3EH \ ++ { \ ++ HARDWARE_CONVERT ("cu42 %0, %1"); \ ++ \ ++ if (__glibc_likely (inptr == inend) \ ++ || result == __GCONV_FULL_OUTPUT) \ ++ break; \ ++ \ ++ if (inptr + 4 > inend) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } ++ ++#define BODY_TO_VX \ ++ { \ ++ register const unsigned char* pInput asm ("8") = inptr; \ ++ register size_t inlen asm ("9") = inend - inptr; \ ++ register unsigned char* pOutput asm ("10") = outptr; \ ++ register size_t outlen asm("11") = outend - outptr; \ ++ unsigned long tmp, tmp2, tmp3; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ /* Setup to check for surrogates. */ \ ++ " larl %[R_TMP],9f\n\t" \ ++ " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \ ++ /* Loop which handles UTF-16 chars \ ++ ch < 0xd800 || (ch > 0xdfff && ch < 0x10000). */ \ ++ "0: clgijl %[R_INLEN],32,20f\n\t" \ ++ " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \ ++ " lghi %[R_TMP2],0\n\t" \ ++ /* Shorten to UTF-16. */ \ ++ " vpkf %%v18,%%v16,%%v17\n\t" \ ++ /* Check for surrogate chars. */ \ ++ " vstrcfs %%v19,%%v16,%%v30,%%v31\n\t" \ ++ " jno 10f\n\t" \ ++ " vstrcfs %%v19,%%v17,%%v30,%%v31\n\t" \ ++ " jno 11f\n\t" \ ++ /* Store 16 bytes to buf_out. */ \ ++ " vst %%v18,0(%[R_OUT])\n\t" \ ++ " la %[R_IN],32(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-32\n\t" \ ++ " aghi %[R_OUTLEN],-16\n\t" \ ++ " la %[R_OUT],16(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],32,20f\n\t" \ ++ " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ " j 1b\n\t" \ ++ /* Setup to check for ch >= 0xd800 && ch <= 0xdfff \ ++ and check for ch >= 0x10000. (v30, v31) */ \ ++ "9: .long 0xd800,0xdfff,0x10000,0x10000\n\t" \ ++ " .long 0xa0000000,0xc0000000, 0xa0000000,0xa0000000\n\t" \ ++ /* At least on UTF32 char is in range of surrogates. \ ++ Store the preceding characters. */ \ ++ "11: ahi %[R_TMP2],16\n\t" \ ++ "10: vlgvb %[R_TMP],%%v19,7\n\t" \ ++ " agr %[R_TMP],%[R_TMP2]\n\t" \ ++ " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ ++ " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ ++ " jl 20f\n\t" \ ++ " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ ++ /* Update pointers. */ \ ++ " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_TMP]\n\t" \ ++ " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ ++ " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ ++ /* Handles UTF16 surrogates with convert instruction. */ \ ++ "20: cu42 %[R_OUT],%[R_IN]\n\t" \ ++ " jo 0b\n\t" /* Try vector implemenation again. */ \ ++ " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ ++ " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (pInput) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ ++ ); \ ++ inptr = pInput; \ ++ outptr = pOutput; \ ++ \ ++ if (__glibc_likely (inptr == inend) \ ++ || result == __GCONV_FULL_OUTPUT) \ ++ break; \ ++ if (inptr + 4 > inend) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } ++ ++/* Generate loop-function with software routing. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++#define LOOPFCT __to_utf16_loop_c ++#define LOOP_NEED_FLAGS ++#define BODY BODY_TO_C ++#include ++ ++/* Generate loop-function with hardware utf-convert instruction. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++#define LOOPFCT __to_utf16_loop_etf3eh + #define LOOP_NEED_FLAGS ++#define BODY BODY_TO_ETF3EH + #include + ++#if defined HAVE_S390_VX_ASM_SUPPORT ++/* Generate loop-function with hardware vector instructions. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_TO ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++# define LOOPFCT __to_utf16_loop_vx ++# define LOOP_NEED_FLAGS ++# define BODY BODY_TO_VX ++# include ++#endif ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__to_utf16_loop_c) ++__attribute__ ((ifunc ("__to_utf16_loop_resolver"))) ++__to_utf16_loop; ++ ++static void * ++__to_utf16_loop_resolver (unsigned long int dl_hwcap) ++{ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __to_utf16_loop_vx; ++ else ++#endif ++ if (dl_hwcap & HWCAP_S390_ETF3EH) ++ return __to_utf16_loop_etf3eh; ++ else ++ return __to_utf16_loop_c; ++} ++ ++strong_alias (__to_utf16_loop_c_single, __to_utf16_loop_single) ++ ++ + #include +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-13.patch b/SOURCES/glibc-rh1380680-13.patch new file mode 100644 index 0000000..fa43d8b --- /dev/null +++ b/SOURCES/glibc-rh1380680-13.patch @@ -0,0 +1,5227 @@ +From 3d1533c907648137e92cbbd9ae1427995aff4584 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 16:28:26 +0100 +Subject: [PATCH 13/17] S390: Use s390-64 specific ionv-modules on s390-32, + too. + +Upstream commit ee518b7070b1bcb41382b6db10f513e071b2c20e + +This patch reworks the existing s390 64bit specific iconv modules in order +to use them on s390 31bit, too. + +Thus the parts for subdirectory iconvdata in sysdeps/s390/s390-64/Makefile +were moved to sysdeps/s390/Makefile so that they apply on 31bit, too. +All those modules are moved from sysdeps/s390/s390-64 directory to sysdeps/s390. + +The iso-8859-1 to/from cp037 module was adjusted, to use brct (branch relative +on count) instruction on 31bit s390 instead of brctg, because the brctg is a +zarch instruction and is not available on a 31bit kernel. + +The utf modules are using zarch instructions, thus the directive machinemode +zarch_nohighgprs was added to the inline assemblies to omit the high-gprs flag +in the shared libraries. Otherwise they can't be loaded on a 31bit kernel. +The ifunc resolvers were adjusted in order to call the etf3eh or vector variants +only if zarch instructions are available (64bit kernel in 31bit compat-mode). +Furthermore some variable types were changed. E.g. unsigned long long would be +a register pair on s390 31bit, but we want only one single register. +For variables of type size_t the register contents have to be enlarged from a +32bit to a 64bit value on 31bit, because the inline assemblies uses 64bit values +in such cases. + +ChangeLog: + + * sysdeps/s390/s390-64/Makefile (iconvdata-subdirectory): + Move to ... + * sysdeps/s390/Makefile: ... here. + * sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c: Move to ... + * sysdeps/s390/iso-8859-1_cp037_z900.c: ... here. + (BRANCH_ON_COUNT): New define. + (TR_LOOP): Use BRANCH_ON_COUNT instead of brctg. + * sysdeps/s390/s390-64/utf16-utf32-z9.c: Move to ... + * sysdeps/s390/utf16-utf32-z9.c: ... here and adjust to + run on s390-32, too. + * sysdeps/s390/s390-64/utf8-utf16-z9.c: Move to ... + * sysdeps/s390/utf8-utf16-z9.c: ... here and adjust to + run on s390-32, too. + * sysdeps/s390/s390-64/utf8-utf32-z9.c: Move to ... + * sysdeps/s390/utf8-utf32-z9.c: ... here and adjust to + run on s390-32, too. +--- + sysdeps/s390/Makefile | 31 + + sysdeps/s390/iso-8859-1_cp037_z900.c | 262 +++++++++ + sysdeps/s390/s390-64/Makefile | 32 -- + sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c | 256 --------- + sysdeps/s390/s390-64/utf16-utf32-z9.c | 624 -------------------- + sysdeps/s390/s390-64/utf8-utf16-z9.c | 806 -------------------------- + sysdeps/s390/s390-64/utf8-utf32-z9.c | 807 -------------------------- + sysdeps/s390/utf16-utf32-z9.c | 636 +++++++++++++++++++++ + sysdeps/s390/utf8-utf16-z9.c | 818 ++++++++++++++++++++++++++ + sysdeps/s390/utf8-utf32-z9.c | 820 +++++++++++++++++++++++++++ + 10 files changed, 2567 insertions(+), 2525 deletions(-) + create mode 100644 sysdeps/s390/Makefile + create mode 100644 sysdeps/s390/iso-8859-1_cp037_z900.c + delete mode 100644 sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c + delete mode 100644 sysdeps/s390/s390-64/utf16-utf32-z9.c + delete mode 100644 sysdeps/s390/s390-64/utf8-utf16-z9.c + delete mode 100644 sysdeps/s390/s390-64/utf8-utf32-z9.c + create mode 100644 sysdeps/s390/utf16-utf32-z9.c + create mode 100644 sysdeps/s390/utf8-utf16-z9.c + create mode 100644 sysdeps/s390/utf8-utf32-z9.c + +diff --git a/sysdeps/s390/Makefile b/sysdeps/s390/Makefile +new file mode 100644 +index 0000000..d508365 +--- /dev/null ++++ b/sysdeps/s390/Makefile +@@ -0,0 +1,31 @@ ++ifeq ($(subdir),iconvdata) ++ISO-8859-1_CP037_Z900-routines := iso-8859-1_cp037_z900 ++ISO-8859-1_CP037_Z900-map := gconv.map ++ ++UTF8_UTF32_Z9-routines := utf8-utf32-z9 ++UTF8_UTF32_Z9-map := gconv.map ++ ++UTF16_UTF32_Z9-routines := utf16-utf32-z9 ++UTF16_UTF32_Z9-map := gconv.map ++ ++UTF8_UTF16_Z9-routines := utf8-utf16-z9 ++UTF8_UTF16_Z9-map := gconv.map ++ ++s390x-iconv-modules = ISO-8859-1_CP037_Z900 UTF8_UTF16_Z9 UTF16_UTF32_Z9 UTF8_UTF32_Z9 ++ ++extra-modules-left += $(s390x-iconv-modules) ++include extra-module.mk ++ ++cpp-srcs-left := $(foreach mod,$(s390x-iconv-modules),$($(mod)-routines)) ++lib := iconvdata ++include $(patsubst %,$(..)cppflags-iterator.mk,$(cpp-srcs-left)) ++ ++extra-objs += $(addsuffix .so, $(s390x-iconv-modules)) ++install-others += $(patsubst %, $(inst_gconvdir)/%.so, $(s390x-iconv-modules)) ++ ++$(patsubst %, $(inst_gconvdir)/%.so, $(s390x-iconv-modules)) : \ ++$(inst_gconvdir)/%.so: $(objpfx)%.so $(+force) ++ $(do-install-program) ++ ++sysdeps-gconv-modules = ../sysdeps/s390/gconv-modules ++endif +diff --git a/sysdeps/s390/iso-8859-1_cp037_z900.c b/sysdeps/s390/iso-8859-1_cp037_z900.c +new file mode 100644 +index 0000000..fc25dff +--- /dev/null ++++ b/sysdeps/s390/iso-8859-1_cp037_z900.c +@@ -0,0 +1,262 @@ ++/* Conversion between ISO 8859-1 and IBM037. ++ ++ This module uses the translate instruction. ++ Copyright (C) 1997-2016 Free Software Foundation, Inc. ++ ++ Author: Andreas Krebbel ++ Based on the work by Ulrich Drepper , 1997. ++ ++ Thanks to Daniel Appich who covered the relevant performance work ++ in his diploma thesis. ++ ++ This is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ This is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++// conversion table from ISO-8859-1 to IBM037 ++static const unsigned char table_iso8859_1_to_cp037[256] ++__attribute__ ((aligned (8))) = ++{ ++ [0x00] = 0x00, [0x01] = 0x01, [0x02] = 0x02, [0x03] = 0x03, ++ [0x04] = 0x37, [0x05] = 0x2D, [0x06] = 0x2E, [0x07] = 0x2F, ++ [0x08] = 0x16, [0x09] = 0x05, [0x0A] = 0x25, [0x0B] = 0x0B, ++ [0x0C] = 0x0C, [0x0D] = 0x0D, [0x0E] = 0x0E, [0x0F] = 0x0F, ++ [0x10] = 0x10, [0x11] = 0x11, [0x12] = 0x12, [0x13] = 0x13, ++ [0x14] = 0x3C, [0x15] = 0x3D, [0x16] = 0x32, [0x17] = 0x26, ++ [0x18] = 0x18, [0x19] = 0x19, [0x1A] = 0x3F, [0x1B] = 0x27, ++ [0x1C] = 0x1C, [0x1D] = 0x1D, [0x1E] = 0x1E, [0x1F] = 0x1F, ++ [0x20] = 0x40, [0x21] = 0x5A, [0x22] = 0x7F, [0x23] = 0x7B, ++ [0x24] = 0x5B, [0x25] = 0x6C, [0x26] = 0x50, [0x27] = 0x7D, ++ [0x28] = 0x4D, [0x29] = 0x5D, [0x2A] = 0x5C, [0x2B] = 0x4E, ++ [0x2C] = 0x6B, [0x2D] = 0x60, [0x2E] = 0x4B, [0x2F] = 0x61, ++ [0x30] = 0xF0, [0x31] = 0xF1, [0x32] = 0xF2, [0x33] = 0xF3, ++ [0x34] = 0xF4, [0x35] = 0xF5, [0x36] = 0xF6, [0x37] = 0xF7, ++ [0x38] = 0xF8, [0x39] = 0xF9, [0x3A] = 0x7A, [0x3B] = 0x5E, ++ [0x3C] = 0x4C, [0x3D] = 0x7E, [0x3E] = 0x6E, [0x3F] = 0x6F, ++ [0x40] = 0x7C, [0x41] = 0xC1, [0x42] = 0xC2, [0x43] = 0xC3, ++ [0x44] = 0xC4, [0x45] = 0xC5, [0x46] = 0xC6, [0x47] = 0xC7, ++ [0x48] = 0xC8, [0x49] = 0xC9, [0x4A] = 0xD1, [0x4B] = 0xD2, ++ [0x4C] = 0xD3, [0x4D] = 0xD4, [0x4E] = 0xD5, [0x4F] = 0xD6, ++ [0x50] = 0xD7, [0x51] = 0xD8, [0x52] = 0xD9, [0x53] = 0xE2, ++ [0x54] = 0xE3, [0x55] = 0xE4, [0x56] = 0xE5, [0x57] = 0xE6, ++ [0x58] = 0xE7, [0x59] = 0xE8, [0x5A] = 0xE9, [0x5B] = 0xBA, ++ [0x5C] = 0xE0, [0x5D] = 0xBB, [0x5E] = 0xB0, [0x5F] = 0x6D, ++ [0x60] = 0x79, [0x61] = 0x81, [0x62] = 0x82, [0x63] = 0x83, ++ [0x64] = 0x84, [0x65] = 0x85, [0x66] = 0x86, [0x67] = 0x87, ++ [0x68] = 0x88, [0x69] = 0x89, [0x6A] = 0x91, [0x6B] = 0x92, ++ [0x6C] = 0x93, [0x6D] = 0x94, [0x6E] = 0x95, [0x6F] = 0x96, ++ [0x70] = 0x97, [0x71] = 0x98, [0x72] = 0x99, [0x73] = 0xA2, ++ [0x74] = 0xA3, [0x75] = 0xA4, [0x76] = 0xA5, [0x77] = 0xA6, ++ [0x78] = 0xA7, [0x79] = 0xA8, [0x7A] = 0xA9, [0x7B] = 0xC0, ++ [0x7C] = 0x4F, [0x7D] = 0xD0, [0x7E] = 0xA1, [0x7F] = 0x07, ++ [0x80] = 0x20, [0x81] = 0x21, [0x82] = 0x22, [0x83] = 0x23, ++ [0x84] = 0x24, [0x85] = 0x15, [0x86] = 0x06, [0x87] = 0x17, ++ [0x88] = 0x28, [0x89] = 0x29, [0x8A] = 0x2A, [0x8B] = 0x2B, ++ [0x8C] = 0x2C, [0x8D] = 0x09, [0x8E] = 0x0A, [0x8F] = 0x1B, ++ [0x90] = 0x30, [0x91] = 0x31, [0x92] = 0x1A, [0x93] = 0x33, ++ [0x94] = 0x34, [0x95] = 0x35, [0x96] = 0x36, [0x97] = 0x08, ++ [0x98] = 0x38, [0x99] = 0x39, [0x9A] = 0x3A, [0x9B] = 0x3B, ++ [0x9C] = 0x04, [0x9D] = 0x14, [0x9E] = 0x3E, [0x9F] = 0xFF, ++ [0xA0] = 0x41, [0xA1] = 0xAA, [0xA2] = 0x4A, [0xA3] = 0xB1, ++ [0xA4] = 0x9F, [0xA5] = 0xB2, [0xA6] = 0x6A, [0xA7] = 0xB5, ++ [0xA8] = 0xBD, [0xA9] = 0xB4, [0xAA] = 0x9A, [0xAB] = 0x8A, ++ [0xAC] = 0x5F, [0xAD] = 0xCA, [0xAE] = 0xAF, [0xAF] = 0xBC, ++ [0xB0] = 0x90, [0xB1] = 0x8F, [0xB2] = 0xEA, [0xB3] = 0xFA, ++ [0xB4] = 0xBE, [0xB5] = 0xA0, [0xB6] = 0xB6, [0xB7] = 0xB3, ++ [0xB8] = 0x9D, [0xB9] = 0xDA, [0xBA] = 0x9B, [0xBB] = 0x8B, ++ [0xBC] = 0xB7, [0xBD] = 0xB8, [0xBE] = 0xB9, [0xBF] = 0xAB, ++ [0xC0] = 0x64, [0xC1] = 0x65, [0xC2] = 0x62, [0xC3] = 0x66, ++ [0xC4] = 0x63, [0xC5] = 0x67, [0xC6] = 0x9E, [0xC7] = 0x68, ++ [0xC8] = 0x74, [0xC9] = 0x71, [0xCA] = 0x72, [0xCB] = 0x73, ++ [0xCC] = 0x78, [0xCD] = 0x75, [0xCE] = 0x76, [0xCF] = 0x77, ++ [0xD0] = 0xAC, [0xD1] = 0x69, [0xD2] = 0xED, [0xD3] = 0xEE, ++ [0xD4] = 0xEB, [0xD5] = 0xEF, [0xD6] = 0xEC, [0xD7] = 0xBF, ++ [0xD8] = 0x80, [0xD9] = 0xFD, [0xDA] = 0xFE, [0xDB] = 0xFB, ++ [0xDC] = 0xFC, [0xDD] = 0xAD, [0xDE] = 0xAE, [0xDF] = 0x59, ++ [0xE0] = 0x44, [0xE1] = 0x45, [0xE2] = 0x42, [0xE3] = 0x46, ++ [0xE4] = 0x43, [0xE5] = 0x47, [0xE6] = 0x9C, [0xE7] = 0x48, ++ [0xE8] = 0x54, [0xE9] = 0x51, [0xEA] = 0x52, [0xEB] = 0x53, ++ [0xEC] = 0x58, [0xED] = 0x55, [0xEE] = 0x56, [0xEF] = 0x57, ++ [0xF0] = 0x8C, [0xF1] = 0x49, [0xF2] = 0xCD, [0xF3] = 0xCE, ++ [0xF4] = 0xCB, [0xF5] = 0xCF, [0xF6] = 0xCC, [0xF7] = 0xE1, ++ [0xF8] = 0x70, [0xF9] = 0xDD, [0xFA] = 0xDE, [0xFB] = 0xDB, ++ [0xFC] = 0xDC, [0xFD] = 0x8D, [0xFE] = 0x8E, [0xFF] = 0xDF ++}; ++ ++// conversion table from IBM037 to ISO-8859-1 ++static const unsigned char table_cp037_iso8859_1[256] ++__attribute__ ((aligned (8))) = ++{ ++ [0x00] = 0x00, [0x01] = 0x01, [0x02] = 0x02, [0x03] = 0x03, ++ [0x04] = 0x9C, [0x05] = 0x09, [0x06] = 0x86, [0x07] = 0x7F, ++ [0x08] = 0x97, [0x09] = 0x8D, [0x0A] = 0x8E, [0x0B] = 0x0B, ++ [0x0C] = 0x0C, [0x0D] = 0x0D, [0x0E] = 0x0E, [0x0F] = 0x0F, ++ [0x10] = 0x10, [0x11] = 0x11, [0x12] = 0x12, [0x13] = 0x13, ++ [0x14] = 0x9D, [0x15] = 0x85, [0x16] = 0x08, [0x17] = 0x87, ++ [0x18] = 0x18, [0x19] = 0x19, [0x1A] = 0x92, [0x1B] = 0x8F, ++ [0x1C] = 0x1C, [0x1D] = 0x1D, [0x1E] = 0x1E, [0x1F] = 0x1F, ++ [0x20] = 0x80, [0x21] = 0x81, [0x22] = 0x82, [0x23] = 0x83, ++ [0x24] = 0x84, [0x25] = 0x0A, [0x26] = 0x17, [0x27] = 0x1B, ++ [0x28] = 0x88, [0x29] = 0x89, [0x2A] = 0x8A, [0x2B] = 0x8B, ++ [0x2C] = 0x8C, [0x2D] = 0x05, [0x2E] = 0x06, [0x2F] = 0x07, ++ [0x30] = 0x90, [0x31] = 0x91, [0x32] = 0x16, [0x33] = 0x93, ++ [0x34] = 0x94, [0x35] = 0x95, [0x36] = 0x96, [0x37] = 0x04, ++ [0x38] = 0x98, [0x39] = 0x99, [0x3A] = 0x9A, [0x3B] = 0x9B, ++ [0x3C] = 0x14, [0x3D] = 0x15, [0x3E] = 0x9E, [0x3F] = 0x1A, ++ [0x40] = 0x20, [0x41] = 0xA0, [0x42] = 0xE2, [0x43] = 0xE4, ++ [0x44] = 0xE0, [0x45] = 0xE1, [0x46] = 0xE3, [0x47] = 0xE5, ++ [0x48] = 0xE7, [0x49] = 0xF1, [0x4A] = 0xA2, [0x4B] = 0x2E, ++ [0x4C] = 0x3C, [0x4D] = 0x28, [0x4E] = 0x2B, [0x4F] = 0x7C, ++ [0x50] = 0x26, [0x51] = 0xE9, [0x52] = 0xEA, [0x53] = 0xEB, ++ [0x54] = 0xE8, [0x55] = 0xED, [0x56] = 0xEE, [0x57] = 0xEF, ++ [0x58] = 0xEC, [0x59] = 0xDF, [0x5A] = 0x21, [0x5B] = 0x24, ++ [0x5C] = 0x2A, [0x5D] = 0x29, [0x5E] = 0x3B, [0x5F] = 0xAC, ++ [0x60] = 0x2D, [0x61] = 0x2F, [0x62] = 0xC2, [0x63] = 0xC4, ++ [0x64] = 0xC0, [0x65] = 0xC1, [0x66] = 0xC3, [0x67] = 0xC5, ++ [0x68] = 0xC7, [0x69] = 0xD1, [0x6A] = 0xA6, [0x6B] = 0x2C, ++ [0x6C] = 0x25, [0x6D] = 0x5F, [0x6E] = 0x3E, [0x6F] = 0x3F, ++ [0x70] = 0xF8, [0x71] = 0xC9, [0x72] = 0xCA, [0x73] = 0xCB, ++ [0x74] = 0xC8, [0x75] = 0xCD, [0x76] = 0xCE, [0x77] = 0xCF, ++ [0x78] = 0xCC, [0x79] = 0x60, [0x7A] = 0x3A, [0x7B] = 0x23, ++ [0x7C] = 0x40, [0x7D] = 0x27, [0x7E] = 0x3D, [0x7F] = 0x22, ++ [0x80] = 0xD8, [0x81] = 0x61, [0x82] = 0x62, [0x83] = 0x63, ++ [0x84] = 0x64, [0x85] = 0x65, [0x86] = 0x66, [0x87] = 0x67, ++ [0x88] = 0x68, [0x89] = 0x69, [0x8A] = 0xAB, [0x8B] = 0xBB, ++ [0x8C] = 0xF0, [0x8D] = 0xFD, [0x8E] = 0xFE, [0x8F] = 0xB1, ++ [0x90] = 0xB0, [0x91] = 0x6A, [0x92] = 0x6B, [0x93] = 0x6C, ++ [0x94] = 0x6D, [0x95] = 0x6E, [0x96] = 0x6F, [0x97] = 0x70, ++ [0x98] = 0x71, [0x99] = 0x72, [0x9A] = 0xAA, [0x9B] = 0xBA, ++ [0x9C] = 0xE6, [0x9D] = 0xB8, [0x9E] = 0xC6, [0x9F] = 0xA4, ++ [0xA0] = 0xB5, [0xA1] = 0x7E, [0xA2] = 0x73, [0xA3] = 0x74, ++ [0xA4] = 0x75, [0xA5] = 0x76, [0xA6] = 0x77, [0xA7] = 0x78, ++ [0xA8] = 0x79, [0xA9] = 0x7A, [0xAA] = 0xA1, [0xAB] = 0xBF, ++ [0xAC] = 0xD0, [0xAD] = 0xDD, [0xAE] = 0xDE, [0xAF] = 0xAE, ++ [0xB0] = 0x5E, [0xB1] = 0xA3, [0xB2] = 0xA5, [0xB3] = 0xB7, ++ [0xB4] = 0xA9, [0xB5] = 0xA7, [0xB6] = 0xB6, [0xB7] = 0xBC, ++ [0xB8] = 0xBD, [0xB9] = 0xBE, [0xBA] = 0x5B, [0xBB] = 0x5D, ++ [0xBC] = 0xAF, [0xBD] = 0xA8, [0xBE] = 0xB4, [0xBF] = 0xD7, ++ [0xC0] = 0x7B, [0xC1] = 0x41, [0xC2] = 0x42, [0xC3] = 0x43, ++ [0xC4] = 0x44, [0xC5] = 0x45, [0xC6] = 0x46, [0xC7] = 0x47, ++ [0xC8] = 0x48, [0xC9] = 0x49, [0xCA] = 0xAD, [0xCB] = 0xF4, ++ [0xCC] = 0xF6, [0xCD] = 0xF2, [0xCE] = 0xF3, [0xCF] = 0xF5, ++ [0xD0] = 0x7D, [0xD1] = 0x4A, [0xD2] = 0x4B, [0xD3] = 0x4C, ++ [0xD4] = 0x4D, [0xD5] = 0x4E, [0xD6] = 0x4F, [0xD7] = 0x50, ++ [0xD8] = 0x51, [0xD9] = 0x52, [0xDA] = 0xB9, [0xDB] = 0xFB, ++ [0xDC] = 0xFC, [0xDD] = 0xF9, [0xDE] = 0xFA, [0xDF] = 0xFF, ++ [0xE0] = 0x5C, [0xE1] = 0xF7, [0xE2] = 0x53, [0xE3] = 0x54, ++ [0xE4] = 0x55, [0xE5] = 0x56, [0xE6] = 0x57, [0xE7] = 0x58, ++ [0xE8] = 0x59, [0xE9] = 0x5A, [0xEA] = 0xB2, [0xEB] = 0xD4, ++ [0xEC] = 0xD6, [0xED] = 0xD2, [0xEE] = 0xD3, [0xEF] = 0xD5, ++ [0xF0] = 0x30, [0xF1] = 0x31, [0xF2] = 0x32, [0xF3] = 0x33, ++ [0xF4] = 0x34, [0xF5] = 0x35, [0xF6] = 0x36, [0xF7] = 0x37, ++ [0xF8] = 0x38, [0xF9] = 0x39, [0xFA] = 0xB3, [0xFB] = 0xDB, ++ [0xFC] = 0xDC, [0xFD] = 0xD9, [0xFE] = 0xDA, [0xFF] = 0x9F ++}; ++ ++/* Definitions used in the body of the `gconv' function. */ ++#define CHARSET_NAME "ISO-8859-1//" ++#define FROM_LOOP iso8859_1_to_cp037_z900 ++#define TO_LOOP cp037_to_iso8859_1_z900 ++#define DEFINE_INIT 1 ++#define DEFINE_FINI 1 ++#define MIN_NEEDED_FROM 1 ++#define MIN_NEEDED_TO 1 ++ ++# if defined __s390x__ ++# define BRANCH_ON_COUNT(REG,LBL) "brctg %" #REG "," #LBL "\n\t" ++# else ++# define BRANCH_ON_COUNT(REG,LBL) "brct %" #REG "," #LBL "\n\t" ++# endif ++ ++#define TR_LOOP(TABLE) \ ++ { \ ++ size_t length = (inend - inptr < outend - outptr \ ++ ? inend - inptr : outend - outptr); \ ++ \ ++ /* Process in 256 byte blocks. */ \ ++ if (__builtin_expect (length >= 256, 0)) \ ++ { \ ++ size_t blocks = length / 256; \ ++ __asm__ __volatile__("0: mvc 0(256,%[R_OUT]),0(%[R_IN])\n\t" \ ++ " tr 0(256,%[R_OUT]),0(%[R_TBL])\n\t" \ ++ " la %[R_IN],256(%[R_IN])\n\t" \ ++ " la %[R_OUT],256(%[R_OUT])\n\t" \ ++ BRANCH_ON_COUNT ([R_LI], 0b) \ ++ : /* outputs */ [R_IN] "+a" (inptr) \ ++ , [R_OUT] "+a" (outptr), [R_LI] "+d" (blocks) \ ++ : /* inputs */ [R_TBL] "a" (TABLE) \ ++ : /* clobber list */ "memory" \ ++ ); \ ++ length = length % 256; \ ++ } \ ++ \ ++ /* Process remaining 0...248 bytes in 8byte blocks. */ \ ++ if (length >= 8) \ ++ { \ ++ size_t blocks = length / 8; \ ++ for (int i = 0; i < blocks; i++) \ ++ { \ ++ outptr[0] = TABLE[inptr[0]]; \ ++ outptr[1] = TABLE[inptr[1]]; \ ++ outptr[2] = TABLE[inptr[2]]; \ ++ outptr[3] = TABLE[inptr[3]]; \ ++ outptr[4] = TABLE[inptr[4]]; \ ++ outptr[5] = TABLE[inptr[5]]; \ ++ outptr[6] = TABLE[inptr[6]]; \ ++ outptr[7] = TABLE[inptr[7]]; \ ++ inptr += 8; \ ++ outptr += 8; \ ++ } \ ++ length = length % 8; \ ++ } \ ++ \ ++ /* Process remaining 0...7 bytes. */ \ ++ switch (length) \ ++ { \ ++ case 7: outptr[6] = TABLE[inptr[6]]; \ ++ case 6: outptr[5] = TABLE[inptr[5]]; \ ++ case 5: outptr[4] = TABLE[inptr[4]]; \ ++ case 4: outptr[3] = TABLE[inptr[3]]; \ ++ case 3: outptr[2] = TABLE[inptr[2]]; \ ++ case 2: outptr[1] = TABLE[inptr[1]]; \ ++ case 1: outptr[0] = TABLE[inptr[0]]; \ ++ case 0: break; \ ++ } \ ++ inptr += length; \ ++ outptr += length; \ ++ } ++ ++ ++/* First define the conversion function from ISO 8859-1 to CP037. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define LOOPFCT FROM_LOOP ++#define BODY TR_LOOP (table_iso8859_1_to_cp037) ++ ++#include ++ ++ ++/* Next, define the conversion function from CP037 to ISO 8859-1. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++#define LOOPFCT TO_LOOP ++#define BODY TR_LOOP (table_cp037_iso8859_1); ++ ++#include ++ ++ ++/* Now define the toplevel functions. */ ++#include +diff --git a/sysdeps/s390/s390-64/Makefile b/sysdeps/s390/s390-64/Makefile +index bb958bd..0a50514 100644 +--- a/sysdeps/s390/s390-64/Makefile ++++ b/sysdeps/s390/s390-64/Makefile +@@ -9,35 +9,3 @@ CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused + CFLAGS-dl-load.c += -Wno-unused + CFLAGS-dl-reloc.c += -Wno-unused + endif +- +-ifeq ($(subdir),iconvdata) +-ISO-8859-1_CP037_Z900-routines := iso-8859-1_cp037_z900 +-ISO-8859-1_CP037_Z900-map := gconv.map +- +-UTF8_UTF32_Z9-routines := utf8-utf32-z9 +-UTF8_UTF32_Z9-map := gconv.map +- +-UTF16_UTF32_Z9-routines := utf16-utf32-z9 +-UTF16_UTF32_Z9-map := gconv.map +- +-UTF8_UTF16_Z9-routines := utf8-utf16-z9 +-UTF8_UTF16_Z9-map := gconv.map +- +-s390x-iconv-modules = ISO-8859-1_CP037_Z900 UTF8_UTF16_Z9 UTF16_UTF32_Z9 UTF8_UTF32_Z9 +- +-extra-modules-left += $(s390x-iconv-modules) +-include extra-module.mk +- +-cpp-srcs-left := $(foreach mod,$(s390x-iconv-modules),$($(mod)-routines)) +-lib := iconvdata +-include $(patsubst %,$(..)cppflags-iterator.mk,$(cpp-srcs-left)) +- +-extra-objs += $(addsuffix .so, $(s390x-iconv-modules)) +-install-others += $(patsubst %, $(inst_gconvdir)/%.so, $(s390x-iconv-modules)) +- +-$(patsubst %, $(inst_gconvdir)/%.so, $(s390x-iconv-modules)) : \ +-$(inst_gconvdir)/%.so: $(objpfx)%.so $(+force) +- $(do-install-program) +- +-sysdeps-gconv-modules = ../sysdeps/s390/gconv-modules +-endif +diff --git a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c +deleted file mode 100644 +index 3b63e6a..0000000 +--- a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c ++++ /dev/null +@@ -1,256 +0,0 @@ +-/* Conversion between ISO 8859-1 and IBM037. +- +- This module uses the translate instruction. +- Copyright (C) 1997-2016 Free Software Foundation, Inc. +- +- Author: Andreas Krebbel +- Based on the work by Ulrich Drepper , 1997. +- +- Thanks to Daniel Appich who covered the relevant performance work +- in his diploma thesis. +- +- This is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- This is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +- +-// conversion table from ISO-8859-1 to IBM037 +-static const unsigned char table_iso8859_1_to_cp037[256] +-__attribute__ ((aligned (8))) = +-{ +- [0x00] = 0x00, [0x01] = 0x01, [0x02] = 0x02, [0x03] = 0x03, +- [0x04] = 0x37, [0x05] = 0x2D, [0x06] = 0x2E, [0x07] = 0x2F, +- [0x08] = 0x16, [0x09] = 0x05, [0x0A] = 0x25, [0x0B] = 0x0B, +- [0x0C] = 0x0C, [0x0D] = 0x0D, [0x0E] = 0x0E, [0x0F] = 0x0F, +- [0x10] = 0x10, [0x11] = 0x11, [0x12] = 0x12, [0x13] = 0x13, +- [0x14] = 0x3C, [0x15] = 0x3D, [0x16] = 0x32, [0x17] = 0x26, +- [0x18] = 0x18, [0x19] = 0x19, [0x1A] = 0x3F, [0x1B] = 0x27, +- [0x1C] = 0x1C, [0x1D] = 0x1D, [0x1E] = 0x1E, [0x1F] = 0x1F, +- [0x20] = 0x40, [0x21] = 0x5A, [0x22] = 0x7F, [0x23] = 0x7B, +- [0x24] = 0x5B, [0x25] = 0x6C, [0x26] = 0x50, [0x27] = 0x7D, +- [0x28] = 0x4D, [0x29] = 0x5D, [0x2A] = 0x5C, [0x2B] = 0x4E, +- [0x2C] = 0x6B, [0x2D] = 0x60, [0x2E] = 0x4B, [0x2F] = 0x61, +- [0x30] = 0xF0, [0x31] = 0xF1, [0x32] = 0xF2, [0x33] = 0xF3, +- [0x34] = 0xF4, [0x35] = 0xF5, [0x36] = 0xF6, [0x37] = 0xF7, +- [0x38] = 0xF8, [0x39] = 0xF9, [0x3A] = 0x7A, [0x3B] = 0x5E, +- [0x3C] = 0x4C, [0x3D] = 0x7E, [0x3E] = 0x6E, [0x3F] = 0x6F, +- [0x40] = 0x7C, [0x41] = 0xC1, [0x42] = 0xC2, [0x43] = 0xC3, +- [0x44] = 0xC4, [0x45] = 0xC5, [0x46] = 0xC6, [0x47] = 0xC7, +- [0x48] = 0xC8, [0x49] = 0xC9, [0x4A] = 0xD1, [0x4B] = 0xD2, +- [0x4C] = 0xD3, [0x4D] = 0xD4, [0x4E] = 0xD5, [0x4F] = 0xD6, +- [0x50] = 0xD7, [0x51] = 0xD8, [0x52] = 0xD9, [0x53] = 0xE2, +- [0x54] = 0xE3, [0x55] = 0xE4, [0x56] = 0xE5, [0x57] = 0xE6, +- [0x58] = 0xE7, [0x59] = 0xE8, [0x5A] = 0xE9, [0x5B] = 0xBA, +- [0x5C] = 0xE0, [0x5D] = 0xBB, [0x5E] = 0xB0, [0x5F] = 0x6D, +- [0x60] = 0x79, [0x61] = 0x81, [0x62] = 0x82, [0x63] = 0x83, +- [0x64] = 0x84, [0x65] = 0x85, [0x66] = 0x86, [0x67] = 0x87, +- [0x68] = 0x88, [0x69] = 0x89, [0x6A] = 0x91, [0x6B] = 0x92, +- [0x6C] = 0x93, [0x6D] = 0x94, [0x6E] = 0x95, [0x6F] = 0x96, +- [0x70] = 0x97, [0x71] = 0x98, [0x72] = 0x99, [0x73] = 0xA2, +- [0x74] = 0xA3, [0x75] = 0xA4, [0x76] = 0xA5, [0x77] = 0xA6, +- [0x78] = 0xA7, [0x79] = 0xA8, [0x7A] = 0xA9, [0x7B] = 0xC0, +- [0x7C] = 0x4F, [0x7D] = 0xD0, [0x7E] = 0xA1, [0x7F] = 0x07, +- [0x80] = 0x20, [0x81] = 0x21, [0x82] = 0x22, [0x83] = 0x23, +- [0x84] = 0x24, [0x85] = 0x15, [0x86] = 0x06, [0x87] = 0x17, +- [0x88] = 0x28, [0x89] = 0x29, [0x8A] = 0x2A, [0x8B] = 0x2B, +- [0x8C] = 0x2C, [0x8D] = 0x09, [0x8E] = 0x0A, [0x8F] = 0x1B, +- [0x90] = 0x30, [0x91] = 0x31, [0x92] = 0x1A, [0x93] = 0x33, +- [0x94] = 0x34, [0x95] = 0x35, [0x96] = 0x36, [0x97] = 0x08, +- [0x98] = 0x38, [0x99] = 0x39, [0x9A] = 0x3A, [0x9B] = 0x3B, +- [0x9C] = 0x04, [0x9D] = 0x14, [0x9E] = 0x3E, [0x9F] = 0xFF, +- [0xA0] = 0x41, [0xA1] = 0xAA, [0xA2] = 0x4A, [0xA3] = 0xB1, +- [0xA4] = 0x9F, [0xA5] = 0xB2, [0xA6] = 0x6A, [0xA7] = 0xB5, +- [0xA8] = 0xBD, [0xA9] = 0xB4, [0xAA] = 0x9A, [0xAB] = 0x8A, +- [0xAC] = 0x5F, [0xAD] = 0xCA, [0xAE] = 0xAF, [0xAF] = 0xBC, +- [0xB0] = 0x90, [0xB1] = 0x8F, [0xB2] = 0xEA, [0xB3] = 0xFA, +- [0xB4] = 0xBE, [0xB5] = 0xA0, [0xB6] = 0xB6, [0xB7] = 0xB3, +- [0xB8] = 0x9D, [0xB9] = 0xDA, [0xBA] = 0x9B, [0xBB] = 0x8B, +- [0xBC] = 0xB7, [0xBD] = 0xB8, [0xBE] = 0xB9, [0xBF] = 0xAB, +- [0xC0] = 0x64, [0xC1] = 0x65, [0xC2] = 0x62, [0xC3] = 0x66, +- [0xC4] = 0x63, [0xC5] = 0x67, [0xC6] = 0x9E, [0xC7] = 0x68, +- [0xC8] = 0x74, [0xC9] = 0x71, [0xCA] = 0x72, [0xCB] = 0x73, +- [0xCC] = 0x78, [0xCD] = 0x75, [0xCE] = 0x76, [0xCF] = 0x77, +- [0xD0] = 0xAC, [0xD1] = 0x69, [0xD2] = 0xED, [0xD3] = 0xEE, +- [0xD4] = 0xEB, [0xD5] = 0xEF, [0xD6] = 0xEC, [0xD7] = 0xBF, +- [0xD8] = 0x80, [0xD9] = 0xFD, [0xDA] = 0xFE, [0xDB] = 0xFB, +- [0xDC] = 0xFC, [0xDD] = 0xAD, [0xDE] = 0xAE, [0xDF] = 0x59, +- [0xE0] = 0x44, [0xE1] = 0x45, [0xE2] = 0x42, [0xE3] = 0x46, +- [0xE4] = 0x43, [0xE5] = 0x47, [0xE6] = 0x9C, [0xE7] = 0x48, +- [0xE8] = 0x54, [0xE9] = 0x51, [0xEA] = 0x52, [0xEB] = 0x53, +- [0xEC] = 0x58, [0xED] = 0x55, [0xEE] = 0x56, [0xEF] = 0x57, +- [0xF0] = 0x8C, [0xF1] = 0x49, [0xF2] = 0xCD, [0xF3] = 0xCE, +- [0xF4] = 0xCB, [0xF5] = 0xCF, [0xF6] = 0xCC, [0xF7] = 0xE1, +- [0xF8] = 0x70, [0xF9] = 0xDD, [0xFA] = 0xDE, [0xFB] = 0xDB, +- [0xFC] = 0xDC, [0xFD] = 0x8D, [0xFE] = 0x8E, [0xFF] = 0xDF +-}; +- +-// conversion table from IBM037 to ISO-8859-1 +-static const unsigned char table_cp037_iso8859_1[256] +-__attribute__ ((aligned (8))) = +-{ +- [0x00] = 0x00, [0x01] = 0x01, [0x02] = 0x02, [0x03] = 0x03, +- [0x04] = 0x9C, [0x05] = 0x09, [0x06] = 0x86, [0x07] = 0x7F, +- [0x08] = 0x97, [0x09] = 0x8D, [0x0A] = 0x8E, [0x0B] = 0x0B, +- [0x0C] = 0x0C, [0x0D] = 0x0D, [0x0E] = 0x0E, [0x0F] = 0x0F, +- [0x10] = 0x10, [0x11] = 0x11, [0x12] = 0x12, [0x13] = 0x13, +- [0x14] = 0x9D, [0x15] = 0x85, [0x16] = 0x08, [0x17] = 0x87, +- [0x18] = 0x18, [0x19] = 0x19, [0x1A] = 0x92, [0x1B] = 0x8F, +- [0x1C] = 0x1C, [0x1D] = 0x1D, [0x1E] = 0x1E, [0x1F] = 0x1F, +- [0x20] = 0x80, [0x21] = 0x81, [0x22] = 0x82, [0x23] = 0x83, +- [0x24] = 0x84, [0x25] = 0x0A, [0x26] = 0x17, [0x27] = 0x1B, +- [0x28] = 0x88, [0x29] = 0x89, [0x2A] = 0x8A, [0x2B] = 0x8B, +- [0x2C] = 0x8C, [0x2D] = 0x05, [0x2E] = 0x06, [0x2F] = 0x07, +- [0x30] = 0x90, [0x31] = 0x91, [0x32] = 0x16, [0x33] = 0x93, +- [0x34] = 0x94, [0x35] = 0x95, [0x36] = 0x96, [0x37] = 0x04, +- [0x38] = 0x98, [0x39] = 0x99, [0x3A] = 0x9A, [0x3B] = 0x9B, +- [0x3C] = 0x14, [0x3D] = 0x15, [0x3E] = 0x9E, [0x3F] = 0x1A, +- [0x40] = 0x20, [0x41] = 0xA0, [0x42] = 0xE2, [0x43] = 0xE4, +- [0x44] = 0xE0, [0x45] = 0xE1, [0x46] = 0xE3, [0x47] = 0xE5, +- [0x48] = 0xE7, [0x49] = 0xF1, [0x4A] = 0xA2, [0x4B] = 0x2E, +- [0x4C] = 0x3C, [0x4D] = 0x28, [0x4E] = 0x2B, [0x4F] = 0x7C, +- [0x50] = 0x26, [0x51] = 0xE9, [0x52] = 0xEA, [0x53] = 0xEB, +- [0x54] = 0xE8, [0x55] = 0xED, [0x56] = 0xEE, [0x57] = 0xEF, +- [0x58] = 0xEC, [0x59] = 0xDF, [0x5A] = 0x21, [0x5B] = 0x24, +- [0x5C] = 0x2A, [0x5D] = 0x29, [0x5E] = 0x3B, [0x5F] = 0xAC, +- [0x60] = 0x2D, [0x61] = 0x2F, [0x62] = 0xC2, [0x63] = 0xC4, +- [0x64] = 0xC0, [0x65] = 0xC1, [0x66] = 0xC3, [0x67] = 0xC5, +- [0x68] = 0xC7, [0x69] = 0xD1, [0x6A] = 0xA6, [0x6B] = 0x2C, +- [0x6C] = 0x25, [0x6D] = 0x5F, [0x6E] = 0x3E, [0x6F] = 0x3F, +- [0x70] = 0xF8, [0x71] = 0xC9, [0x72] = 0xCA, [0x73] = 0xCB, +- [0x74] = 0xC8, [0x75] = 0xCD, [0x76] = 0xCE, [0x77] = 0xCF, +- [0x78] = 0xCC, [0x79] = 0x60, [0x7A] = 0x3A, [0x7B] = 0x23, +- [0x7C] = 0x40, [0x7D] = 0x27, [0x7E] = 0x3D, [0x7F] = 0x22, +- [0x80] = 0xD8, [0x81] = 0x61, [0x82] = 0x62, [0x83] = 0x63, +- [0x84] = 0x64, [0x85] = 0x65, [0x86] = 0x66, [0x87] = 0x67, +- [0x88] = 0x68, [0x89] = 0x69, [0x8A] = 0xAB, [0x8B] = 0xBB, +- [0x8C] = 0xF0, [0x8D] = 0xFD, [0x8E] = 0xFE, [0x8F] = 0xB1, +- [0x90] = 0xB0, [0x91] = 0x6A, [0x92] = 0x6B, [0x93] = 0x6C, +- [0x94] = 0x6D, [0x95] = 0x6E, [0x96] = 0x6F, [0x97] = 0x70, +- [0x98] = 0x71, [0x99] = 0x72, [0x9A] = 0xAA, [0x9B] = 0xBA, +- [0x9C] = 0xE6, [0x9D] = 0xB8, [0x9E] = 0xC6, [0x9F] = 0xA4, +- [0xA0] = 0xB5, [0xA1] = 0x7E, [0xA2] = 0x73, [0xA3] = 0x74, +- [0xA4] = 0x75, [0xA5] = 0x76, [0xA6] = 0x77, [0xA7] = 0x78, +- [0xA8] = 0x79, [0xA9] = 0x7A, [0xAA] = 0xA1, [0xAB] = 0xBF, +- [0xAC] = 0xD0, [0xAD] = 0xDD, [0xAE] = 0xDE, [0xAF] = 0xAE, +- [0xB0] = 0x5E, [0xB1] = 0xA3, [0xB2] = 0xA5, [0xB3] = 0xB7, +- [0xB4] = 0xA9, [0xB5] = 0xA7, [0xB6] = 0xB6, [0xB7] = 0xBC, +- [0xB8] = 0xBD, [0xB9] = 0xBE, [0xBA] = 0x5B, [0xBB] = 0x5D, +- [0xBC] = 0xAF, [0xBD] = 0xA8, [0xBE] = 0xB4, [0xBF] = 0xD7, +- [0xC0] = 0x7B, [0xC1] = 0x41, [0xC2] = 0x42, [0xC3] = 0x43, +- [0xC4] = 0x44, [0xC5] = 0x45, [0xC6] = 0x46, [0xC7] = 0x47, +- [0xC8] = 0x48, [0xC9] = 0x49, [0xCA] = 0xAD, [0xCB] = 0xF4, +- [0xCC] = 0xF6, [0xCD] = 0xF2, [0xCE] = 0xF3, [0xCF] = 0xF5, +- [0xD0] = 0x7D, [0xD1] = 0x4A, [0xD2] = 0x4B, [0xD3] = 0x4C, +- [0xD4] = 0x4D, [0xD5] = 0x4E, [0xD6] = 0x4F, [0xD7] = 0x50, +- [0xD8] = 0x51, [0xD9] = 0x52, [0xDA] = 0xB9, [0xDB] = 0xFB, +- [0xDC] = 0xFC, [0xDD] = 0xF9, [0xDE] = 0xFA, [0xDF] = 0xFF, +- [0xE0] = 0x5C, [0xE1] = 0xF7, [0xE2] = 0x53, [0xE3] = 0x54, +- [0xE4] = 0x55, [0xE5] = 0x56, [0xE6] = 0x57, [0xE7] = 0x58, +- [0xE8] = 0x59, [0xE9] = 0x5A, [0xEA] = 0xB2, [0xEB] = 0xD4, +- [0xEC] = 0xD6, [0xED] = 0xD2, [0xEE] = 0xD3, [0xEF] = 0xD5, +- [0xF0] = 0x30, [0xF1] = 0x31, [0xF2] = 0x32, [0xF3] = 0x33, +- [0xF4] = 0x34, [0xF5] = 0x35, [0xF6] = 0x36, [0xF7] = 0x37, +- [0xF8] = 0x38, [0xF9] = 0x39, [0xFA] = 0xB3, [0xFB] = 0xDB, +- [0xFC] = 0xDC, [0xFD] = 0xD9, [0xFE] = 0xDA, [0xFF] = 0x9F +-}; +- +-/* Definitions used in the body of the `gconv' function. */ +-#define CHARSET_NAME "ISO-8859-1//" +-#define FROM_LOOP iso8859_1_to_cp037_z900 +-#define TO_LOOP cp037_to_iso8859_1_z900 +-#define DEFINE_INIT 1 +-#define DEFINE_FINI 1 +-#define MIN_NEEDED_FROM 1 +-#define MIN_NEEDED_TO 1 +- +-#define TR_LOOP(TABLE) \ +- { \ +- size_t length = (inend - inptr < outend - outptr \ +- ? inend - inptr : outend - outptr); \ +- \ +- /* Process in 256 byte blocks. */ \ +- if (__builtin_expect (length >= 256, 0)) \ +- { \ +- size_t blocks = length / 256; \ +- __asm__ __volatile__("0: mvc 0(256,%[R_OUT]),0(%[R_IN])\n\t" \ +- " tr 0(256,%[R_OUT]),0(%[R_TBL])\n\t" \ +- " la %[R_IN],256(%[R_IN])\n\t" \ +- " la %[R_OUT],256(%[R_OUT])\n\t" \ +- " brctg %[R_LI],0b\n\t" \ +- : /* outputs */ [R_IN] "+a" (inptr) \ +- , [R_OUT] "+a" (outptr), [R_LI] "+d" (blocks) \ +- : /* inputs */ [R_TBL] "a" (TABLE) \ +- : /* clobber list */ "memory" \ +- ); \ +- length = length % 256; \ +- } \ +- \ +- /* Process remaining 0...248 bytes in 8byte blocks. */ \ +- if (length >= 8) \ +- { \ +- size_t blocks = length / 8; \ +- for (int i = 0; i < blocks; i++) \ +- { \ +- outptr[0] = TABLE[inptr[0]]; \ +- outptr[1] = TABLE[inptr[1]]; \ +- outptr[2] = TABLE[inptr[2]]; \ +- outptr[3] = TABLE[inptr[3]]; \ +- outptr[4] = TABLE[inptr[4]]; \ +- outptr[5] = TABLE[inptr[5]]; \ +- outptr[6] = TABLE[inptr[6]]; \ +- outptr[7] = TABLE[inptr[7]]; \ +- inptr += 8; \ +- outptr += 8; \ +- } \ +- length = length % 8; \ +- } \ +- \ +- /* Process remaining 0...7 bytes. */ \ +- switch (length) \ +- { \ +- case 7: outptr[6] = TABLE[inptr[6]]; \ +- case 6: outptr[5] = TABLE[inptr[5]]; \ +- case 5: outptr[4] = TABLE[inptr[4]]; \ +- case 4: outptr[3] = TABLE[inptr[3]]; \ +- case 3: outptr[2] = TABLE[inptr[2]]; \ +- case 2: outptr[1] = TABLE[inptr[1]]; \ +- case 1: outptr[0] = TABLE[inptr[0]]; \ +- case 0: break; \ +- } \ +- inptr += length; \ +- outptr += length; \ +- } +- +- +-/* First define the conversion function from ISO 8859-1 to CP037. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-#define LOOPFCT FROM_LOOP +-#define BODY TR_LOOP (table_iso8859_1_to_cp037) +- +-#include +- +- +-/* Next, define the conversion function from CP037 to ISO 8859-1. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_TO +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-#define LOOPFCT TO_LOOP +-#define BODY TR_LOOP (table_cp037_iso8859_1); +- +-#include +- +- +-/* Now define the toplevel functions. */ +-#include +diff --git a/sysdeps/s390/s390-64/utf16-utf32-z9.c b/sysdeps/s390/s390-64/utf16-utf32-z9.c +deleted file mode 100644 +index 33594f1..0000000 +--- a/sysdeps/s390/s390-64/utf16-utf32-z9.c ++++ /dev/null +@@ -1,624 +0,0 @@ +-/* Conversion between UTF-16 and UTF-32 BE/internal. +- +- This module uses the Z9-109 variants of the Convert Unicode +- instructions. +- Copyright (C) 1997-2009 Free Software Foundation, Inc. +- +- Author: Andreas Krebbel +- Based on the work by Ulrich Drepper , 1997. +- +- Thanks to Daniel Appich who covered the relevant performance work +- in his diploma thesis. +- +- This is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- This is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +- +-#if defined HAVE_S390_VX_GCC_SUPPORT +-# define ASM_CLOBBER_VR(NR) , NR +-#else +-# define ASM_CLOBBER_VR(NR) +-#endif +- +-/* UTF-32 big endian byte order mark. */ +-#define BOM_UTF32 0x0000feffu +- +-/* UTF-16 big endian byte order mark. */ +-#define BOM_UTF16 0xfeff +- +-#define DEFINE_INIT 0 +-#define DEFINE_FINI 0 +-#define MIN_NEEDED_FROM 2 +-#define MAX_NEEDED_FROM 4 +-#define MIN_NEEDED_TO 4 +-#define FROM_LOOP __from_utf16_loop +-#define TO_LOOP __to_utf16_loop +-#define FROM_DIRECTION (dir == from_utf16) +-#define ONE_DIRECTION 0 +- +-/* Direction of the transformation. */ +-enum direction +-{ +- illegal_dir, +- to_utf16, +- from_utf16 +-}; +- +-struct utf16_data +-{ +- enum direction dir; +- int emit_bom; +-}; +- +- +-extern int gconv_init (struct __gconv_step *step); +-int +-gconv_init (struct __gconv_step *step) +-{ +- /* Determine which direction. */ +- struct utf16_data *new_data; +- enum direction dir = illegal_dir; +- int emit_bom; +- int result; +- +- emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0 +- || __strcasecmp (step->__to_name, "UTF-16//") == 0); +- +- if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0 +- && (__strcasecmp (step->__to_name, "UTF-32//") == 0 +- || __strcasecmp (step->__to_name, "UTF-32BE//") == 0 +- || __strcasecmp (step->__to_name, "INTERNAL") == 0)) +- { +- dir = from_utf16; +- } +- else if ((__strcasecmp (step->__to_name, "UTF-16//") == 0 +- || __strcasecmp (step->__to_name, "UTF-16BE//") == 0) +- && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0 +- || __strcasecmp (step->__from_name, "INTERNAL") == 0)) +- { +- dir = to_utf16; +- } +- +- result = __GCONV_NOCONV; +- if (dir != illegal_dir) +- { +- new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data)); +- +- result = __GCONV_NOMEM; +- if (new_data != NULL) +- { +- new_data->dir = dir; +- new_data->emit_bom = emit_bom; +- step->__data = new_data; +- +- if (dir == from_utf16) +- { +- step->__min_needed_from = MIN_NEEDED_FROM; +- step->__max_needed_from = MIN_NEEDED_FROM; +- step->__min_needed_to = MIN_NEEDED_TO; +- step->__max_needed_to = MIN_NEEDED_TO; +- } +- else +- { +- step->__min_needed_from = MIN_NEEDED_TO; +- step->__max_needed_from = MIN_NEEDED_TO; +- step->__min_needed_to = MIN_NEEDED_FROM; +- step->__max_needed_to = MIN_NEEDED_FROM; +- } +- +- step->__stateful = 0; +- +- result = __GCONV_OK; +- } +- } +- +- return result; +-} +- +- +-extern void gconv_end (struct __gconv_step *data); +-void +-gconv_end (struct __gconv_step *data) +-{ +- free (data->__data); +-} +- +-/* The macro for the hardware loop. This is used for both +- directions. */ +-#define HARDWARE_CONVERT(INSTRUCTION) \ +- { \ +- register const unsigned char* pInput __asm__ ("8") = inptr; \ +- register unsigned long long inlen __asm__ ("9") = inend - inptr; \ +- register unsigned char* pOutput __asm__ ("10") = outptr; \ +- register unsigned long long outlen __asm__("11") = outend - outptr; \ +- uint64_t cc = 0; \ +- \ +- __asm__ __volatile__ (".machine push \n\t" \ +- ".machine \"z9-109\" \n\t" \ +- "0: " INSTRUCTION " \n\t" \ +- ".machine pop \n\t" \ +- " jo 0b \n\t" \ +- " ipm %2 \n" \ +- : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ +- "+d" (outlen), "+d" (inlen) \ +- : \ +- : "cc", "memory"); \ +- \ +- inptr = pInput; \ +- outptr = pOutput; \ +- cc >>= 28; \ +- \ +- if (cc == 1) \ +- { \ +- result = __GCONV_FULL_OUTPUT; \ +- } \ +- else if (cc == 2) \ +- { \ +- result = __GCONV_ILLEGAL_INPUT; \ +- } \ +- } +- +-#define PREPARE_LOOP \ +- enum direction dir = ((struct utf16_data *) step->__data)->dir; \ +- int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \ +- \ +- if (emit_bom && !data->__internal_use \ +- && data->__invocation_counter == 0) \ +- { \ +- if (dir == to_utf16) \ +- { \ +- /* Emit the UTF-16 Byte Order Mark. */ \ +- if (__glibc_unlikely (outbuf + 2 > outend)) \ +- return __GCONV_FULL_OUTPUT; \ +- \ +- put16u (outbuf, BOM_UTF16); \ +- outbuf += 2; \ +- } \ +- else \ +- { \ +- /* Emit the UTF-32 Byte Order Mark. */ \ +- if (__glibc_unlikely (outbuf + 4 > outend)) \ +- return __GCONV_FULL_OUTPUT; \ +- \ +- put32u (outbuf, BOM_UTF32); \ +- outbuf += 4; \ +- } \ +- } +- +-/* Conversion function from UTF-16 to UTF-32 internal/BE. */ +- +-/* The software routine is copied from utf-16.c (minus bytes +- swapping). */ +-#define BODY_FROM_C \ +- { \ +- uint16_t u1 = get16 (inptr); \ +- \ +- if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \ +- { \ +- /* No surrogate. */ \ +- put32 (outptr, u1); \ +- inptr += 2; \ +- } \ +- else \ +- { \ +- /* An isolated low-surrogate was found. This has to be \ +- considered ill-formed. */ \ +- if (__glibc_unlikely (u1 >= 0xdc00)) \ +- { \ +- STANDARD_FROM_LOOP_ERR_HANDLER (2); \ +- } \ +- /* It's a surrogate character. At least the first word says \ +- it is. */ \ +- if (__glibc_unlikely (inptr + 4 > inend)) \ +- { \ +- /* We don't have enough input for another complete input \ +- character. */ \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- \ +- inptr += 2; \ +- uint16_t u2 = get16 (inptr); \ +- if (__builtin_expect (u2 < 0xdc00, 0) \ +- || __builtin_expect (u2 > 0xdfff, 0)) \ +- { \ +- /* This is no valid second word for a surrogate. */ \ +- inptr -= 2; \ +- STANDARD_FROM_LOOP_ERR_HANDLER (2); \ +- } \ +- \ +- put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \ +- inptr += 2; \ +- } \ +- outptr += 4; \ +- } +- +-#define BODY_FROM_VX \ +- { \ +- size_t inlen = inend - inptr; \ +- size_t outlen = outend - outptr; \ +- unsigned long tmp, tmp2, tmp3; \ +- asm volatile (".machine push\n\t" \ +- ".machine \"z13\"\n\t" \ +- ".machinemode \"zarch_nohighgprs\"\n\t" \ +- /* Setup to check for surrogates. */ \ +- " larl %[R_TMP],9f\n\t" \ +- " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \ +- /* Loop which handles UTF-16 chars <0xd800, >0xdfff. */ \ +- "0: clgijl %[R_INLEN],16,2f\n\t" \ +- " clgijl %[R_OUTLEN],32,2f\n\t" \ +- "1: vl %%v16,0(%[R_IN])\n\t" \ +- /* Check for surrogate chars. */ \ +- " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \ +- " jno 10f\n\t" \ +- /* Enlarge to UTF-32. */ \ +- " vuplhh %%v17,%%v16\n\t" \ +- " la %[R_IN],16(%[R_IN])\n\t" \ +- " vupllh %%v18,%%v16\n\t" \ +- " aghi %[R_INLEN],-16\n\t" \ +- /* Store 32 bytes to buf_out. */ \ +- " vstm %%v17,%%v18,0(%[R_OUT])\n\t" \ +- " aghi %[R_OUTLEN],-32\n\t" \ +- " la %[R_OUT],32(%[R_OUT])\n\t" \ +- " clgijl %[R_INLEN],16,2f\n\t" \ +- " clgijl %[R_OUTLEN],32,2f\n\t" \ +- " j 1b\n\t" \ +- /* Setup to check for ch >= 0xd800 && ch <= 0xdfff. (v30, v31) */ \ +- "9: .short 0xd800,0xdfff,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ +- " .short 0xa000,0xc000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ +- /* At least on uint16_t is in range of surrogates. \ +- Store the preceding chars. */ \ +- "10: vlgvb %[R_TMP],%%v19,7\n\t" \ +- " vuplhh %%v17,%%v16\n\t" \ +- " sllg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ +- " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ +- " jl 12f\n\t" \ +- " vstl %%v17,%[R_TMP2],0(%[R_OUT])\n\t" \ +- " vupllh %%v18,%%v16\n\t" \ +- " ahi %[R_TMP2],-16\n\t" \ +- " jl 11f\n\t" \ +- " vstl %%v18,%[R_TMP2],16(%[R_OUT])\n\t" \ +- "11: \n\t" /* Update pointers. */ \ +- " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ +- " slgr %[R_INLEN],%[R_TMP]\n\t" \ +- " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ +- " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ +- /* Calculate remaining uint16_t values in loaded vrs. */ \ +- "12: lghi %[R_TMP2],16\n\t" \ +- " sgr %[R_TMP2],%[R_TMP]\n\t" \ +- " srl %[R_TMP2],1\n\t" \ +- " llh %[R_TMP],0(%[R_IN])\n\t" \ +- " aghi %[R_OUTLEN],-4\n\t" \ +- " j 16f\n\t" \ +- /* Handle remaining bytes. */ \ +- "2: \n\t" \ +- /* Zero, one or more bytes available? */ \ +- " clgfi %[R_INLEN],1\n\t" \ +- " je 97f\n\t" /* Only one byte available. */ \ +- " jl 99f\n\t" /* End if no bytes available. */ \ +- /* Calculate remaining uint16_t values in inptr. */ \ +- " srlg %[R_TMP2],%[R_INLEN],1\n\t" \ +- /* Handle remaining uint16_t values. */ \ +- "13: llh %[R_TMP],0(%[R_IN])\n\t" \ +- " slgfi %[R_OUTLEN],4\n\t" \ +- " jl 96f \n\t" \ +- " clfi %[R_TMP],0xd800\n\t" \ +- " jhe 15f\n\t" \ +- "14: st %[R_TMP],0(%[R_OUT])\n\t" \ +- " la %[R_IN],2(%[R_IN])\n\t" \ +- " aghi %[R_INLEN],-2\n\t" \ +- " la %[R_OUT],4(%[R_OUT])\n\t" \ +- " brctg %[R_TMP2],13b\n\t" \ +- " j 0b\n\t" /* Switch to vx-loop. */ \ +- /* Handle UTF-16 surrogate pair. */ \ +- "15: clfi %[R_TMP],0xdfff\n\t" \ +- " jh 14b\n\t" /* Jump away if ch > 0xdfff. */ \ +- "16: clfi %[R_TMP],0xdc00\n\t" \ +- " jhe 98f\n\t" /* Jump away in case of low-surrogate. */ \ +- " slgfi %[R_INLEN],4\n\t" \ +- " jl 97f\n\t" /* Big enough input? */ \ +- " llh %[R_TMP3],2(%[R_IN])\n\t" /* Load low surrogate. */ \ +- " slfi %[R_TMP],0xd7c0\n\t" \ +- " sll %[R_TMP],10\n\t" \ +- " risbgn %[R_TMP],%[R_TMP3],54,63,0\n\t" /* Insert klmnopqrst. */ \ +- " nilf %[R_TMP3],0xfc00\n\t" \ +- " clfi %[R_TMP3],0xdc00\n\t" /* Check if it starts with 0xdc00. */ \ +- " jne 98f\n\t" \ +- " st %[R_TMP],0(%[R_OUT])\n\t" \ +- " la %[R_IN],4(%[R_IN])\n\t" \ +- " la %[R_OUT],4(%[R_OUT])\n\t" \ +- " aghi %[R_TMP2],-2\n\t" \ +- " jh 13b\n\t" /* Handle remaining uint16_t values. */ \ +- " j 0b\n\t" /* Switch to vx-loop. */ \ +- "96: \n\t" /* Return full output. */ \ +- " lghi %[R_RES],%[RES_OUT_FULL]\n\t" \ +- " j 99f\n\t" \ +- "97: \n\t" /* Return incomplete input. */ \ +- " lghi %[R_RES],%[RES_IN_FULL]\n\t" \ +- " j 99f\n\t" \ +- "98:\n\t" /* Return Illegal character. */ \ +- " lghi %[R_RES],%[RES_IN_ILL]\n\t" \ +- "99:\n\t" \ +- ".machine pop" \ +- : /* outputs */ [R_IN] "+a" (inptr) \ +- , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \ +- , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ +- , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ +- , [R_RES] "+d" (result) \ +- : /* inputs */ \ +- [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ +- , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ +- , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \ +- : /* clobber list */ "memory", "cc" \ +- ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ +- ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ +- ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ +- ); \ +- if (__glibc_likely (inptr == inend) \ +- || result != __GCONV_ILLEGAL_INPUT) \ +- break; \ +- \ +- STANDARD_FROM_LOOP_ERR_HANDLER (2); \ +- } +- +- +-/* Generate loop-function with software routing. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-#if defined HAVE_S390_VX_ASM_SUPPORT +-# define LOOPFCT __from_utf16_loop_c +-# define LOOP_NEED_FLAGS +-# define BODY BODY_FROM_C +-# include +- +-/* Generate loop-function with hardware vector instructions. */ +-# define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-# define MAX_NEEDED_INPUT MAX_NEEDED_FROM +-# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-# define LOOPFCT __from_utf16_loop_vx +-# define LOOP_NEED_FLAGS +-# define BODY BODY_FROM_VX +-# include +- +-/* Generate ifunc'ed loop function. */ +-__typeof(__from_utf16_loop_c) +-__attribute__ ((ifunc ("__from_utf16_loop_resolver"))) +-__from_utf16_loop; +- +-static void * +-__from_utf16_loop_resolver (unsigned long int dl_hwcap) +-{ +- if (dl_hwcap & HWCAP_S390_VX) +- return __from_utf16_loop_vx; +- else +- return __from_utf16_loop_c; +-} +- +-strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single) +-#else +-# define LOOPFCT FROM_LOOP +-# define LOOP_NEED_FLAGS +-# define BODY BODY_FROM_C +-# include +-#endif +- +-/* Conversion from UTF-32 internal/BE to UTF-16. */ +- +-/* The software routine is copied from utf-16.c (minus bytes +- swapping). */ +-#define BODY_TO_C \ +- { \ +- uint32_t c = get32 (inptr); \ +- \ +- if (__builtin_expect (c <= 0xd7ff, 1) \ +- || (c >=0xdc00 && c <= 0xffff)) \ +- { \ +- /* Two UTF-16 chars. */ \ +- put16 (outptr, c); \ +- } \ +- else if (__builtin_expect (c >= 0x10000, 1) \ +- && __builtin_expect (c <= 0x10ffff, 1)) \ +- { \ +- /* Four UTF-16 chars. */ \ +- uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \ +- uint16_t out; \ +- \ +- /* Generate a surrogate character. */ \ +- if (__glibc_unlikely (outptr + 4 > outend)) \ +- { \ +- /* Overflow in the output buffer. */ \ +- result = __GCONV_FULL_OUTPUT; \ +- break; \ +- } \ +- \ +- out = 0xd800; \ +- out |= (zabcd & 0xff) << 6; \ +- out |= (c >> 10) & 0x3f; \ +- put16 (outptr, out); \ +- outptr += 2; \ +- \ +- out = 0xdc00; \ +- out |= c & 0x3ff; \ +- put16 (outptr, out); \ +- } \ +- else \ +- { \ +- STANDARD_TO_LOOP_ERR_HANDLER (4); \ +- } \ +- outptr += 2; \ +- inptr += 4; \ +- } +- +-#define BODY_TO_ETF3EH \ +- { \ +- HARDWARE_CONVERT ("cu42 %0, %1"); \ +- \ +- if (__glibc_likely (inptr == inend) \ +- || result == __GCONV_FULL_OUTPUT) \ +- break; \ +- \ +- if (inptr + 4 > inend) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- \ +- STANDARD_TO_LOOP_ERR_HANDLER (4); \ +- } +- +-#define BODY_TO_VX \ +- { \ +- register const unsigned char* pInput asm ("8") = inptr; \ +- register size_t inlen asm ("9") = inend - inptr; \ +- register unsigned char* pOutput asm ("10") = outptr; \ +- register size_t outlen asm("11") = outend - outptr; \ +- unsigned long tmp, tmp2, tmp3; \ +- asm volatile (".machine push\n\t" \ +- ".machine \"z13\"\n\t" \ +- ".machinemode \"zarch_nohighgprs\"\n\t" \ +- /* Setup to check for surrogates. */ \ +- " larl %[R_TMP],9f\n\t" \ +- " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \ +- /* Loop which handles UTF-16 chars \ +- ch < 0xd800 || (ch > 0xdfff && ch < 0x10000). */ \ +- "0: clgijl %[R_INLEN],32,20f\n\t" \ +- " clgijl %[R_OUTLEN],16,20f\n\t" \ +- "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \ +- " lghi %[R_TMP2],0\n\t" \ +- /* Shorten to UTF-16. */ \ +- " vpkf %%v18,%%v16,%%v17\n\t" \ +- /* Check for surrogate chars. */ \ +- " vstrcfs %%v19,%%v16,%%v30,%%v31\n\t" \ +- " jno 10f\n\t" \ +- " vstrcfs %%v19,%%v17,%%v30,%%v31\n\t" \ +- " jno 11f\n\t" \ +- /* Store 16 bytes to buf_out. */ \ +- " vst %%v18,0(%[R_OUT])\n\t" \ +- " la %[R_IN],32(%[R_IN])\n\t" \ +- " aghi %[R_INLEN],-32\n\t" \ +- " aghi %[R_OUTLEN],-16\n\t" \ +- " la %[R_OUT],16(%[R_OUT])\n\t" \ +- " clgijl %[R_INLEN],32,20f\n\t" \ +- " clgijl %[R_OUTLEN],16,20f\n\t" \ +- " j 1b\n\t" \ +- /* Setup to check for ch >= 0xd800 && ch <= 0xdfff \ +- and check for ch >= 0x10000. (v30, v31) */ \ +- "9: .long 0xd800,0xdfff,0x10000,0x10000\n\t" \ +- " .long 0xa0000000,0xc0000000, 0xa0000000,0xa0000000\n\t" \ +- /* At least on UTF32 char is in range of surrogates. \ +- Store the preceding characters. */ \ +- "11: ahi %[R_TMP2],16\n\t" \ +- "10: vlgvb %[R_TMP],%%v19,7\n\t" \ +- " agr %[R_TMP],%[R_TMP2]\n\t" \ +- " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ +- " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ +- " jl 20f\n\t" \ +- " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ +- /* Update pointers. */ \ +- " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ +- " slgr %[R_INLEN],%[R_TMP]\n\t" \ +- " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ +- " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ +- /* Handles UTF16 surrogates with convert instruction. */ \ +- "20: cu42 %[R_OUT],%[R_IN]\n\t" \ +- " jo 0b\n\t" /* Try vector implemenation again. */ \ +- " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ +- " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ +- ".machine pop" \ +- : /* outputs */ [R_IN] "+a" (pInput) \ +- , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ +- , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ +- , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ +- , [R_RES] "+d" (result) \ +- : /* inputs */ \ +- [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ +- , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ +- , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \ +- : /* clobber list */ "memory", "cc" \ +- ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ +- ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ +- ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ +- ); \ +- inptr = pInput; \ +- outptr = pOutput; \ +- \ +- if (__glibc_likely (inptr == inend) \ +- || result == __GCONV_FULL_OUTPUT) \ +- break; \ +- if (inptr + 4 > inend) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- STANDARD_TO_LOOP_ERR_HANDLER (4); \ +- } +- +-/* Generate loop-function with software routing. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_TO +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-#define LOOPFCT __to_utf16_loop_c +-#define LOOP_NEED_FLAGS +-#define BODY BODY_TO_C +-#include +- +-/* Generate loop-function with hardware utf-convert instruction. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_TO +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-#define LOOPFCT __to_utf16_loop_etf3eh +-#define LOOP_NEED_FLAGS +-#define BODY BODY_TO_ETF3EH +-#include +- +-#if defined HAVE_S390_VX_ASM_SUPPORT +-/* Generate loop-function with hardware vector instructions. */ +-# define MIN_NEEDED_INPUT MIN_NEEDED_TO +-# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-# define LOOPFCT __to_utf16_loop_vx +-# define LOOP_NEED_FLAGS +-# define BODY BODY_TO_VX +-# include +-#endif +- +-/* Generate ifunc'ed loop function. */ +-__typeof(__to_utf16_loop_c) +-__attribute__ ((ifunc ("__to_utf16_loop_resolver"))) +-__to_utf16_loop; +- +-static void * +-__to_utf16_loop_resolver (unsigned long int dl_hwcap) +-{ +-#if defined HAVE_S390_VX_ASM_SUPPORT +- if (dl_hwcap & HWCAP_S390_VX) +- return __to_utf16_loop_vx; +- else +-#endif +- if (dl_hwcap & HWCAP_S390_ETF3EH) +- return __to_utf16_loop_etf3eh; +- else +- return __to_utf16_loop_c; +-} +- +-strong_alias (__to_utf16_loop_c_single, __to_utf16_loop_single) +- +- +-#include +diff --git a/sysdeps/s390/s390-64/utf8-utf16-z9.c b/sysdeps/s390/s390-64/utf8-utf16-z9.c +deleted file mode 100644 +index b36ee9e..0000000 +--- a/sysdeps/s390/s390-64/utf8-utf16-z9.c ++++ /dev/null +@@ -1,806 +0,0 @@ +-/* Conversion between UTF-16 and UTF-32 BE/internal. +- +- This module uses the Z9-109 variants of the Convert Unicode +- instructions. +- Copyright (C) 1997-2009 Free Software Foundation, Inc. +- +- Author: Andreas Krebbel +- Based on the work by Ulrich Drepper , 1997. +- +- Thanks to Daniel Appich who covered the relevant performance work +- in his diploma thesis. +- +- This is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- This is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +- +-#if defined HAVE_S390_VX_GCC_SUPPORT +-# define ASM_CLOBBER_VR(NR) , NR +-#else +-# define ASM_CLOBBER_VR(NR) +-#endif +- +-/* Defines for skeleton.c. */ +-#define DEFINE_INIT 0 +-#define DEFINE_FINI 0 +-#define MIN_NEEDED_FROM 1 +-#define MAX_NEEDED_FROM 4 +-#define MIN_NEEDED_TO 2 +-#define MAX_NEEDED_TO 4 +-#define FROM_LOOP __from_utf8_loop +-#define TO_LOOP __to_utf8_loop +-#define FROM_DIRECTION (dir == from_utf8) +-#define ONE_DIRECTION 0 +- +- +-/* UTF-16 big endian byte order mark. */ +-#define BOM_UTF16 0xfeff +- +-/* Direction of the transformation. */ +-enum direction +-{ +- illegal_dir, +- to_utf8, +- from_utf8 +-}; +- +-struct utf8_data +-{ +- enum direction dir; +- int emit_bom; +-}; +- +- +-extern int gconv_init (struct __gconv_step *step); +-int +-gconv_init (struct __gconv_step *step) +-{ +- /* Determine which direction. */ +- struct utf8_data *new_data; +- enum direction dir = illegal_dir; +- int emit_bom; +- int result; +- +- emit_bom = (__strcasecmp (step->__to_name, "UTF-16//") == 0); +- +- if (__strcasecmp (step->__from_name, "ISO-10646/UTF8/") == 0 +- && (__strcasecmp (step->__to_name, "UTF-16//") == 0 +- || __strcasecmp (step->__to_name, "UTF-16BE//") == 0)) +- { +- dir = from_utf8; +- } +- else if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0 +- && __strcasecmp (step->__to_name, "ISO-10646/UTF8/") == 0) +- { +- dir = to_utf8; +- } +- +- result = __GCONV_NOCONV; +- if (dir != illegal_dir) +- { +- new_data = (struct utf8_data *) malloc (sizeof (struct utf8_data)); +- +- result = __GCONV_NOMEM; +- if (new_data != NULL) +- { +- new_data->dir = dir; +- new_data->emit_bom = emit_bom; +- step->__data = new_data; +- +- if (dir == from_utf8) +- { +- step->__min_needed_from = MIN_NEEDED_FROM; +- step->__max_needed_from = MIN_NEEDED_FROM; +- step->__min_needed_to = MIN_NEEDED_TO; +- step->__max_needed_to = MIN_NEEDED_TO; +- } +- else +- { +- step->__min_needed_from = MIN_NEEDED_TO; +- step->__max_needed_from = MIN_NEEDED_TO; +- step->__min_needed_to = MIN_NEEDED_FROM; +- step->__max_needed_to = MIN_NEEDED_FROM; +- } +- +- step->__stateful = 0; +- +- result = __GCONV_OK; +- } +- } +- +- return result; +-} +- +- +-extern void gconv_end (struct __gconv_step *data); +-void +-gconv_end (struct __gconv_step *data) +-{ +- free (data->__data); +-} +- +-/* The macro for the hardware loop. This is used for both +- directions. */ +-#define HARDWARE_CONVERT(INSTRUCTION) \ +- { \ +- register const unsigned char* pInput __asm__ ("8") = inptr; \ +- register unsigned long long inlen __asm__ ("9") = inend - inptr; \ +- register unsigned char* pOutput __asm__ ("10") = outptr; \ +- register unsigned long long outlen __asm__("11") = outend - outptr; \ +- uint64_t cc = 0; \ +- \ +- __asm__ __volatile__ (".machine push \n\t" \ +- ".machine \"z9-109\" \n\t" \ +- "0: " INSTRUCTION " \n\t" \ +- ".machine pop \n\t" \ +- " jo 0b \n\t" \ +- " ipm %2 \n" \ +- : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ +- "+d" (outlen), "+d" (inlen) \ +- : \ +- : "cc", "memory"); \ +- \ +- inptr = pInput; \ +- outptr = pOutput; \ +- cc >>= 28; \ +- \ +- if (cc == 1) \ +- { \ +- result = __GCONV_FULL_OUTPUT; \ +- } \ +- else if (cc == 2) \ +- { \ +- result = __GCONV_ILLEGAL_INPUT; \ +- } \ +- } +- +-#define PREPARE_LOOP \ +- enum direction dir = ((struct utf8_data *) step->__data)->dir; \ +- int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \ +- \ +- if (emit_bom && !data->__internal_use \ +- && data->__invocation_counter == 0) \ +- { \ +- /* Emit the UTF-16 Byte Order Mark. */ \ +- if (__glibc_unlikely (outbuf + 2 > outend)) \ +- return __GCONV_FULL_OUTPUT; \ +- \ +- put16u (outbuf, BOM_UTF16); \ +- outbuf += 2; \ +- } +- +-/* Conversion function from UTF-8 to UTF-16. */ +-#define BODY_FROM_HW(ASM) \ +- { \ +- ASM; \ +- if (__glibc_likely (inptr == inend) \ +- || result == __GCONV_FULL_OUTPUT) \ +- break; \ +- \ +- int i; \ +- for (i = 1; inptr + i < inend && i < 5; ++i) \ +- if ((inptr[i] & 0xc0) != 0x80) \ +- break; \ +- \ +- if (__glibc_likely (inptr + i == inend \ +- && result == __GCONV_EMPTY_INPUT)) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- STANDARD_FROM_LOOP_ERR_HANDLER (i); \ +- } +- +-#define BODY_FROM_ETF3EH BODY_FROM_HW (HARDWARE_CONVERT ("cu12 %0, %1, 1")) +- +-#define HW_FROM_VX \ +- { \ +- register const unsigned char* pInput asm ("8") = inptr; \ +- register size_t inlen asm ("9") = inend - inptr; \ +- register unsigned char* pOutput asm ("10") = outptr; \ +- register size_t outlen asm("11") = outend - outptr; \ +- unsigned long tmp, tmp2, tmp3; \ +- asm volatile (".machine push\n\t" \ +- ".machine \"z13\"\n\t" \ +- ".machinemode \"zarch_nohighgprs\"\n\t" \ +- " vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \ +- " vrepib %%v31,0x20\n\t" \ +- /* Loop which handles UTF-8 chars <=0x7f. */ \ +- "0: clgijl %[R_INLEN],16,20f\n\t" \ +- " clgijl %[R_OUTLEN],32,20f\n\t" \ +- "1: vl %%v16,0(%[R_IN])\n\t" \ +- " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ +- " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ +- UTF8 chars. */ \ +- /* Enlarge to UTF-16. */ \ +- " vuplhb %%v18,%%v16\n\t" \ +- " la %[R_IN],16(%[R_IN])\n\t" \ +- " vupllb %%v19,%%v16\n\t" \ +- " aghi %[R_INLEN],-16\n\t" \ +- /* Store 32 bytes to buf_out. */ \ +- " vstm %%v18,%%v19,0(%[R_OUT])\n\t" \ +- " aghi %[R_OUTLEN],-32\n\t" \ +- " la %[R_OUT],32(%[R_OUT])\n\t" \ +- " clgijl %[R_INLEN],16,20f\n\t" \ +- " clgijl %[R_OUTLEN],32,20f\n\t" \ +- " j 1b\n\t" \ +- "10:\n\t" \ +- /* At least one byte is > 0x7f. \ +- Store the preceding 1-byte chars. */ \ +- " vlgvb %[R_TMP],%%v17,7\n\t" \ +- " sllk %[R_TMP2],%[R_TMP],1\n\t" /* Compute highest \ +- index to store. */ \ +- " llgfr %[R_TMP3],%[R_TMP2]\n\t" \ +- " ahi %[R_TMP2],-1\n\t" \ +- " jl 20f\n\t" \ +- " vuplhb %%v18,%%v16\n\t" \ +- " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ +- " ahi %[R_TMP2],-16\n\t" \ +- " jl 11f\n\t" \ +- " vupllb %%v19,%%v16\n\t" \ +- " vstl %%v19,%[R_TMP2],16(%[R_OUT])\n\t" \ +- "11: \n\t" /* Update pointers. */ \ +- " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ +- " slgr %[R_INLEN],%[R_TMP]\n\t" \ +- " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ +- " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ +- /* Handle multibyte utf8-char with convert instruction. */ \ +- "20: cu12 %[R_OUT],%[R_IN],1\n\t" \ +- " jo 0b\n\t" /* Try vector implemenation again. */ \ +- " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ +- " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ +- ".machine pop" \ +- : /* outputs */ [R_IN] "+a" (pInput) \ +- , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ +- , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ +- , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ +- , [R_RES] "+d" (result) \ +- : /* inputs */ \ +- [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ +- , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ +- : /* clobber list */ "memory", "cc" \ +- ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ +- ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ +- ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ +- ); \ +- inptr = pInput; \ +- outptr = pOutput; \ +- } +-#define BODY_FROM_VX BODY_FROM_HW (HW_FROM_VX) +- +- +-/* The software implementation is based on the code in gconv_simple.c. */ +-#define BODY_FROM_C \ +- { \ +- /* Next input byte. */ \ +- uint16_t ch = *inptr; \ +- \ +- if (__glibc_likely (ch < 0x80)) \ +- { \ +- /* One byte sequence. */ \ +- ++inptr; \ +- } \ +- else \ +- { \ +- uint_fast32_t cnt; \ +- uint_fast32_t i; \ +- \ +- if (ch >= 0xc2 && ch < 0xe0) \ +- { \ +- /* We expect two bytes. The first byte cannot be 0xc0 \ +- or 0xc1, otherwise the wide character could have been \ +- represented using a single byte. */ \ +- cnt = 2; \ +- ch &= 0x1f; \ +- } \ +- else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ +- { \ +- /* We expect three bytes. */ \ +- cnt = 3; \ +- ch &= 0x0f; \ +- } \ +- else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ +- { \ +- /* We expect four bytes. */ \ +- cnt = 4; \ +- ch &= 0x07; \ +- } \ +- else \ +- { \ +- /* Search the end of this ill-formed UTF-8 character. This \ +- is the next byte with (x & 0xc0) != 0x80. */ \ +- i = 0; \ +- do \ +- ++i; \ +- while (inptr + i < inend \ +- && (*(inptr + i) & 0xc0) == 0x80 \ +- && i < 5); \ +- \ +- errout: \ +- STANDARD_FROM_LOOP_ERR_HANDLER (i); \ +- } \ +- \ +- if (__glibc_unlikely (inptr + cnt > inend)) \ +- { \ +- /* We don't have enough input. But before we report \ +- that check that all the bytes are correct. */ \ +- for (i = 1; inptr + i < inend; ++i) \ +- if ((inptr[i] & 0xc0) != 0x80) \ +- break; \ +- \ +- if (__glibc_likely (inptr + i == inend)) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- \ +- goto errout; \ +- } \ +- \ +- if (cnt == 4) \ +- { \ +- /* For 4 byte UTF-8 chars two UTF-16 chars (high and \ +- low) are needed. */ \ +- uint16_t zabcd, high, low; \ +- \ +- if (__glibc_unlikely (outptr + 4 > outend)) \ +- { \ +- /* Overflow in the output buffer. */ \ +- result = __GCONV_FULL_OUTPUT; \ +- break; \ +- } \ +- \ +- /* Check if tail-bytes >= 0x80, < 0xc0. */ \ +- for (i = 1; i < cnt; ++i) \ +- { \ +- if ((inptr[i] & 0xc0) != 0x80) \ +- /* This is an illegal encoding. */ \ +- goto errout; \ +- } \ +- \ +- /* See Principles of Operations cu12. */ \ +- zabcd = (((inptr[0] & 0x7) << 2) | \ +- ((inptr[1] & 0x30) >> 4)) - 1; \ +- \ +- /* z-bit must be zero after subtracting 1. */ \ +- if (zabcd & 0x10) \ +- STANDARD_FROM_LOOP_ERR_HANDLER (4) \ +- \ +- high = (uint16_t)(0xd8 << 8); /* high surrogate id */ \ +- high |= zabcd << 6; /* abcd bits */ \ +- high |= (inptr[1] & 0xf) << 2; /* efgh bits */ \ +- high |= (inptr[2] & 0x30) >> 4; /* ij bits */ \ +- \ +- low = (uint16_t)(0xdc << 8); /* low surrogate id */ \ +- low |= ((uint16_t)inptr[2] & 0xc) << 6; /* kl bits */ \ +- low |= (inptr[2] & 0x3) << 6; /* mn bits */ \ +- low |= inptr[3] & 0x3f; /* opqrst bits */ \ +- \ +- put16 (outptr, high); \ +- outptr += 2; \ +- put16 (outptr, low); \ +- outptr += 2; \ +- inptr += 4; \ +- continue; \ +- } \ +- else \ +- { \ +- /* Read the possible remaining bytes. */ \ +- for (i = 1; i < cnt; ++i) \ +- { \ +- uint16_t byte = inptr[i]; \ +- \ +- if ((byte & 0xc0) != 0x80) \ +- /* This is an illegal encoding. */ \ +- break; \ +- \ +- ch <<= 6; \ +- ch |= byte & 0x3f; \ +- } \ +- \ +- /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ +- If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ +- have been represented with fewer than cnt bytes. */ \ +- if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ +- /* Do not accept UTF-16 surrogates. */ \ +- || (ch >= 0xd800 && ch <= 0xdfff)) \ +- { \ +- /* This is an illegal encoding. */ \ +- goto errout; \ +- } \ +- \ +- inptr += cnt; \ +- } \ +- } \ +- /* Now adjust the pointers and store the result. */ \ +- *((uint16_t *) outptr) = ch; \ +- outptr += sizeof (uint16_t); \ +- } +- +-/* Generate loop-function with software implementation. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO +-#define LOOPFCT __from_utf8_loop_c +-#define LOOP_NEED_FLAGS +-#define BODY BODY_FROM_C +-#include +- +-/* Generate loop-function with hardware utf-convert instruction. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO +-#define LOOPFCT __from_utf8_loop_etf3eh +-#define LOOP_NEED_FLAGS +-#define BODY BODY_FROM_ETF3EH +-#include +- +-#if defined HAVE_S390_VX_ASM_SUPPORT +-/* Generate loop-function with hardware vector and utf-convert instructions. */ +-# define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-# define MAX_NEEDED_INPUT MAX_NEEDED_FROM +-# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-# define MAX_NEEDED_OUTPUT MAX_NEEDED_TO +-# define LOOPFCT __from_utf8_loop_vx +-# define LOOP_NEED_FLAGS +-# define BODY BODY_FROM_VX +-# include +-#endif +- +- +-/* Generate ifunc'ed loop function. */ +-__typeof(__from_utf8_loop_c) +-__attribute__ ((ifunc ("__from_utf8_loop_resolver"))) +-__from_utf8_loop; +- +-static void * +-__from_utf8_loop_resolver (unsigned long int dl_hwcap) +-{ +-#if defined HAVE_S390_VX_ASM_SUPPORT +- if (dl_hwcap & HWCAP_S390_VX) +- return __from_utf8_loop_vx; +- else +-#endif +- if (dl_hwcap & HWCAP_S390_ETF3EH) +- return __from_utf8_loop_etf3eh; +- else +- return __from_utf8_loop_c; +-} +- +-strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) +- +-/* Conversion from UTF-16 to UTF-8. */ +- +-/* The software routine is based on the functionality of the S/390 +- hardware instruction (cu21) as described in the Principles of +- Operation. */ +-#define BODY_TO_C \ +- { \ +- uint16_t c = get16 (inptr); \ +- \ +- if (__glibc_likely (c <= 0x007f)) \ +- { \ +- /* Single byte UTF-8 char. */ \ +- *outptr = c & 0xff; \ +- outptr++; \ +- } \ +- else if (c >= 0x0080 && c <= 0x07ff) \ +- { \ +- /* Two byte UTF-8 char. */ \ +- \ +- if (__glibc_unlikely (outptr + 2 > outend)) \ +- { \ +- /* Overflow in the output buffer. */ \ +- result = __GCONV_FULL_OUTPUT; \ +- break; \ +- } \ +- \ +- outptr[0] = 0xc0; \ +- outptr[0] |= c >> 6; \ +- \ +- outptr[1] = 0x80; \ +- outptr[1] |= c & 0x3f; \ +- \ +- outptr += 2; \ +- } \ +- else if ((c >= 0x0800 && c <= 0xd7ff) || c > 0xdfff) \ +- { \ +- /* Three byte UTF-8 char. */ \ +- \ +- if (__glibc_unlikely (outptr + 3 > outend)) \ +- { \ +- /* Overflow in the output buffer. */ \ +- result = __GCONV_FULL_OUTPUT; \ +- break; \ +- } \ +- outptr[0] = 0xe0; \ +- outptr[0] |= c >> 12; \ +- \ +- outptr[1] = 0x80; \ +- outptr[1] |= (c >> 6) & 0x3f; \ +- \ +- outptr[2] = 0x80; \ +- outptr[2] |= c & 0x3f; \ +- \ +- outptr += 3; \ +- } \ +- else if (c >= 0xd800 && c <= 0xdbff) \ +- { \ +- /* Four byte UTF-8 char. */ \ +- uint16_t low, uvwxy; \ +- \ +- if (__glibc_unlikely (outptr + 4 > outend)) \ +- { \ +- /* Overflow in the output buffer. */ \ +- result = __GCONV_FULL_OUTPUT; \ +- break; \ +- } \ +- if (__glibc_unlikely (inptr + 4 > inend)) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- \ +- inptr += 2; \ +- low = get16 (inptr); \ +- \ +- if ((low & 0xfc00) != 0xdc00) \ +- { \ +- inptr -= 2; \ +- STANDARD_TO_LOOP_ERR_HANDLER (2); \ +- } \ +- uvwxy = ((c >> 6) & 0xf) + 1; \ +- outptr[0] = 0xf0; \ +- outptr[0] |= uvwxy >> 2; \ +- \ +- outptr[1] = 0x80; \ +- outptr[1] |= (uvwxy << 4) & 0x30; \ +- outptr[1] |= (c >> 2) & 0x0f; \ +- \ +- outptr[2] = 0x80; \ +- outptr[2] |= (c & 0x03) << 4; \ +- outptr[2] |= (low >> 6) & 0x0f; \ +- \ +- outptr[3] = 0x80; \ +- outptr[3] |= low & 0x3f; \ +- \ +- outptr += 4; \ +- } \ +- else \ +- { \ +- STANDARD_TO_LOOP_ERR_HANDLER (2); \ +- } \ +- inptr += 2; \ +- } +- +-#define BODY_TO_VX \ +- { \ +- size_t inlen = inend - inptr; \ +- size_t outlen = outend - outptr; \ +- unsigned long tmp, tmp2, tmp3; \ +- asm volatile (".machine push\n\t" \ +- ".machine \"z13\"\n\t" \ +- ".machinemode \"zarch_nohighgprs\"\n\t" \ +- /* Setup to check for values <= 0x7f. */ \ +- " larl %[R_TMP],9f\n\t" \ +- " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \ +- /* Loop which handles UTF-16 chars <=0x7f. */ \ +- "0: clgijl %[R_INLEN],32,2f\n\t" \ +- " clgijl %[R_OUTLEN],16,2f\n\t" \ +- "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \ +- " lghi %[R_TMP2],0\n\t" \ +- /* Check for > 1byte UTF-8 chars. */ \ +- " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \ +- " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ +- UTF8 chars. */ \ +- " vstrchs %%v19,%%v17,%%v30,%%v31\n\t" \ +- " jno 11f\n\t" /* Jump away if not all bytes are 1byte \ +- UTF8 chars. */ \ +- /* Shorten to UTF-8. */ \ +- " vpkh %%v18,%%v16,%%v17\n\t" \ +- " la %[R_IN],32(%[R_IN])\n\t" \ +- " aghi %[R_INLEN],-32\n\t" \ +- /* Store 16 bytes to buf_out. */ \ +- " vst %%v18,0(%[R_OUT])\n\t" \ +- " aghi %[R_OUTLEN],-16\n\t" \ +- " la %[R_OUT],16(%[R_OUT])\n\t" \ +- " clgijl %[R_INLEN],32,2f\n\t" \ +- " clgijl %[R_OUTLEN],16,2f\n\t" \ +- " j 1b\n\t" \ +- /* Setup to check for ch > 0x7f. (v30, v31) */ \ +- "9: .short 0x7f,0x7f,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ +- " .short 0x2000,0x2000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ +- /* At least one byte is > 0x7f. \ +- Store the preceding 1-byte chars. */ \ +- "11: lghi %[R_TMP2],16\n\t" /* match was found in v17. */ \ +- "10:\n\t" \ +- " vlgvb %[R_TMP],%%v19,7\n\t" \ +- /* Shorten to UTF-8. */ \ +- " vpkh %%v18,%%v16,%%v17\n\t" \ +- " ar %[R_TMP],%[R_TMP2]\n\t" /* Number of in bytes. */ \ +- " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ +- " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ +- " jl 13f\n\t" \ +- " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ +- /* Update pointers. */ \ +- " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ +- " slgr %[R_INLEN],%[R_TMP]\n\t" \ +- " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ +- " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ +- "13: \n\t" \ +- /* Calculate remaining uint16_t values in loaded vrs. */ \ +- " lghi %[R_TMP2],16\n\t" \ +- " slgr %[R_TMP2],%[R_TMP3]\n\t" \ +- " llh %[R_TMP],0(%[R_IN])\n\t" \ +- " aghi %[R_INLEN],-2\n\t" \ +- " j 22f\n\t" \ +- /* Handle remaining bytes. */ \ +- "2: \n\t" \ +- /* Zero, one or more bytes available? */ \ +- " clgfi %[R_INLEN],1\n\t" \ +- " locghie %[R_RES],%[RES_IN_FULL]\n\t" /* Only one byte. */ \ +- " jle 99f\n\t" /* End if less than two bytes. */ \ +- /* Calculate remaining uint16_t values in inptr. */ \ +- " srlg %[R_TMP2],%[R_INLEN],1\n\t" \ +- /* Handle multibyte utf8-char. */ \ +- "20: llh %[R_TMP],0(%[R_IN])\n\t" \ +- " aghi %[R_INLEN],-2\n\t" \ +- /* Test if ch is 1-byte UTF-8 char. */ \ +- "21: clijh %[R_TMP],0x7f,22f\n\t" \ +- /* Handle 1-byte UTF-8 char. */ \ +- "31: slgfi %[R_OUTLEN],1\n\t" \ +- " jl 90f \n\t" \ +- " stc %[R_TMP],0(%[R_OUT])\n\t" \ +- " la %[R_IN],2(%[R_IN])\n\t" \ +- " la %[R_OUT],1(%[R_OUT])\n\t" \ +- " brctg %[R_TMP2],20b\n\t" \ +- " j 0b\n\t" /* Switch to vx-loop. */ \ +- /* Test if ch is 2-byte UTF-8 char. */ \ +- "22: clfi %[R_TMP],0x7ff\n\t" \ +- " jh 23f\n\t" \ +- /* Handle 2-byte UTF-8 char. */ \ +- "32: slgfi %[R_OUTLEN],2\n\t" \ +- " jl 90f \n\t" \ +- " llill %[R_TMP3],0xc080\n\t" \ +- " la %[R_IN],2(%[R_IN])\n\t" \ +- " risbgn %[R_TMP3],%[R_TMP],51,55,2\n\t" /* 1. byte. */ \ +- " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 2. byte. */ \ +- " sth %[R_TMP3],0(%[R_OUT])\n\t" \ +- " la %[R_OUT],2(%[R_OUT])\n\t" \ +- " brctg %[R_TMP2],20b\n\t" \ +- " j 0b\n\t" /* Switch to vx-loop. */ \ +- /* Test if ch is 3-byte UTF-8 char. */ \ +- "23: clfi %[R_TMP],0xd7ff\n\t" \ +- " jh 24f\n\t" \ +- /* Handle 3-byte UTF-8 char. */ \ +- "33: slgfi %[R_OUTLEN],3\n\t" \ +- " jl 90f \n\t" \ +- " llilf %[R_TMP3],0xe08080\n\t" \ +- " la %[R_IN],2(%[R_IN])\n\t" \ +- " risbgn %[R_TMP3],%[R_TMP],44,47,4\n\t" /* 1. byte. */ \ +- " risbgn %[R_TMP3],%[R_TMP],50,55,2\n\t" /* 2. byte. */ \ +- " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 3. byte. */ \ +- " stcm %[R_TMP3],7,0(%[R_OUT])\n\t" \ +- " la %[R_OUT],3(%[R_OUT])\n\t" \ +- " brctg %[R_TMP2],20b\n\t" \ +- " j 0b\n\t" /* Switch to vx-loop. */ \ +- /* Test if ch is 4-byte UTF-8 char. */ \ +- "24: clfi %[R_TMP],0xdfff\n\t" \ +- " jh 33b\n\t" /* Handle this 3-byte UTF-8 char. */ \ +- " clfi %[R_TMP],0xdbff\n\t" \ +- " locghih %[R_RES],%[RES_IN_ILL]\n\t" \ +- " jh 99f\n\t" /* Jump away if this is a low surrogate \ +- without a preceding high surrogate. */ \ +- /* Handle 4-byte UTF-8 char. */ \ +- "34: slgfi %[R_OUTLEN],4\n\t" \ +- " jl 90f \n\t" \ +- " slgfi %[R_INLEN],2\n\t" \ +- " locghil %[R_RES],%[RES_IN_FULL]\n\t" \ +- " jl 99f\n\t" /* Jump away if low surrogate is missing. */ \ +- " llilf %[R_TMP3],0xf0808080\n\t" \ +- " aghi %[R_TMP],0x40\n\t" \ +- " risbgn %[R_TMP3],%[R_TMP],37,39,16\n\t" /* 1. byte: uvw */ \ +- " risbgn %[R_TMP3],%[R_TMP],42,43,14\n\t" /* 2. byte: xy */ \ +- " risbgn %[R_TMP3],%[R_TMP],44,47,14\n\t" /* 2. byte: efgh */ \ +- " risbgn %[R_TMP3],%[R_TMP],50,51,12\n\t" /* 3. byte: ij */ \ +- " llh %[R_TMP],2(%[R_IN])\n\t" /* Load low surrogate. */ \ +- " risbgn %[R_TMP3],%[R_TMP],52,55,2\n\t" /* 3. byte: klmn */ \ +- " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 4. byte: opqrst */ \ +- " nilf %[R_TMP],0xfc00\n\t" \ +- " clfi %[R_TMP],0xdc00\n\t" /* Check if it starts with 0xdc00. */ \ +- " locghine %[R_RES],%[RES_IN_ILL]\n\t" \ +- " jne 99f\n\t" /* Jump away if low surrogate is invalid. */ \ +- " st %[R_TMP3],0(%[R_OUT])\n\t" \ +- " la %[R_IN],4(%[R_IN])\n\t" \ +- " la %[R_OUT],4(%[R_OUT])\n\t" \ +- " aghi %[R_TMP2],-2\n\t" \ +- " jh 20b\n\t" \ +- " j 0b\n\t" /* Switch to vx-loop. */ \ +- /* Exit with __GCONV_FULL_OUTPUT. */ \ +- "90: lghi %[R_RES],%[RES_OUT_FULL]\n\t" \ +- "99: \n\t" \ +- ".machine pop" \ +- : /* outputs */ [R_IN] "+a" (inptr) \ +- , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \ +- , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ +- , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ +- , [R_RES] "+d" (result) \ +- : /* inputs */ \ +- [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ +- , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ +- , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \ +- : /* clobber list */ "memory", "cc" \ +- ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ +- ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ +- ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ +- ); \ +- if (__glibc_likely (inptr == inend) \ +- || result != __GCONV_ILLEGAL_INPUT) \ +- break; \ +- \ +- STANDARD_TO_LOOP_ERR_HANDLER (2); \ +- } +- +-/* Generate loop-function with software implementation. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_TO +-#define MAX_NEEDED_INPUT MAX_NEEDED_TO +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-#if defined HAVE_S390_VX_ASM_SUPPORT +-# define LOOPFCT __to_utf8_loop_c +-# define BODY BODY_TO_C +-# define LOOP_NEED_FLAGS +-# include +- +-/* Generate loop-function with software implementation. */ +-# define MIN_NEEDED_INPUT MIN_NEEDED_TO +-# define MAX_NEEDED_INPUT MAX_NEEDED_TO +-# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-# define LOOPFCT __to_utf8_loop_vx +-# define BODY BODY_TO_VX +-# define LOOP_NEED_FLAGS +-# include +- +-/* Generate ifunc'ed loop function. */ +-__typeof(__to_utf8_loop_c) +-__attribute__ ((ifunc ("__to_utf8_loop_resolver"))) +-__to_utf8_loop; +- +-static void * +-__to_utf8_loop_resolver (unsigned long int dl_hwcap) +-{ +- if (dl_hwcap & HWCAP_S390_VX) +- return __to_utf8_loop_vx; +- else +- return __to_utf8_loop_c; +-} +- +-strong_alias (__to_utf8_loop_c_single, __to_utf8_loop_single) +- +-#else +-# define LOOPFCT TO_LOOP +-# define BODY BODY_TO_C +-# define LOOP_NEED_FLAGS +-# include +-#endif /* !HAVE_S390_VX_ASM_SUPPORT */ +- +-#include +diff --git a/sysdeps/s390/s390-64/utf8-utf32-z9.c b/sysdeps/s390/s390-64/utf8-utf32-z9.c +deleted file mode 100644 +index 1ce5ac5..0000000 +--- a/sysdeps/s390/s390-64/utf8-utf32-z9.c ++++ /dev/null +@@ -1,807 +0,0 @@ +-/* Conversion between UTF-8 and UTF-32 BE/internal. +- +- This module uses the Z9-109 variants of the Convert Unicode +- instructions. +- Copyright (C) 1997-2009 Free Software Foundation, Inc. +- +- Author: Andreas Krebbel +- Based on the work by Ulrich Drepper , 1997. +- +- Thanks to Daniel Appich who covered the relevant performance work +- in his diploma thesis. +- +- This is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- This is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +- +-#if defined HAVE_S390_VX_GCC_SUPPORT +-# define ASM_CLOBBER_VR(NR) , NR +-#else +-# define ASM_CLOBBER_VR(NR) +-#endif +- +-/* Defines for skeleton.c. */ +-#define DEFINE_INIT 0 +-#define DEFINE_FINI 0 +-#define MIN_NEEDED_FROM 1 +-#define MAX_NEEDED_FROM 6 +-#define MIN_NEEDED_TO 4 +-#define FROM_LOOP __from_utf8_loop +-#define TO_LOOP __to_utf8_loop +-#define FROM_DIRECTION (dir == from_utf8) +-#define ONE_DIRECTION 0 +- +-/* UTF-32 big endian byte order mark. */ +-#define BOM 0x0000feffu +- +-/* Direction of the transformation. */ +-enum direction +-{ +- illegal_dir, +- to_utf8, +- from_utf8 +-}; +- +-struct utf8_data +-{ +- enum direction dir; +- int emit_bom; +-}; +- +- +-extern int gconv_init (struct __gconv_step *step); +-int +-gconv_init (struct __gconv_step *step) +-{ +- /* Determine which direction. */ +- struct utf8_data *new_data; +- enum direction dir = illegal_dir; +- int emit_bom; +- int result; +- +- emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0); +- +- if (__strcasecmp (step->__from_name, "ISO-10646/UTF8/") == 0 +- && (__strcasecmp (step->__to_name, "UTF-32//") == 0 +- || __strcasecmp (step->__to_name, "UTF-32BE//") == 0 +- || __strcasecmp (step->__to_name, "INTERNAL") == 0)) +- { +- dir = from_utf8; +- } +- else if (__strcasecmp (step->__to_name, "ISO-10646/UTF8/") == 0 +- && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0 +- || __strcasecmp (step->__from_name, "INTERNAL") == 0)) +- { +- dir = to_utf8; +- } +- +- result = __GCONV_NOCONV; +- if (dir != illegal_dir) +- { +- new_data = (struct utf8_data *) malloc (sizeof (struct utf8_data)); +- +- result = __GCONV_NOMEM; +- if (new_data != NULL) +- { +- new_data->dir = dir; +- new_data->emit_bom = emit_bom; +- step->__data = new_data; +- +- if (dir == from_utf8) +- { +- step->__min_needed_from = MIN_NEEDED_FROM; +- step->__max_needed_from = MIN_NEEDED_FROM; +- step->__min_needed_to = MIN_NEEDED_TO; +- step->__max_needed_to = MIN_NEEDED_TO; +- } +- else +- { +- step->__min_needed_from = MIN_NEEDED_TO; +- step->__max_needed_from = MIN_NEEDED_TO; +- step->__min_needed_to = MIN_NEEDED_FROM; +- step->__max_needed_to = MIN_NEEDED_FROM; +- } +- +- step->__stateful = 0; +- +- result = __GCONV_OK; +- } +- } +- +- return result; +-} +- +- +-extern void gconv_end (struct __gconv_step *data); +-void +-gconv_end (struct __gconv_step *data) +-{ +- free (data->__data); +-} +- +-/* The macro for the hardware loop. This is used for both +- directions. */ +-#define HARDWARE_CONVERT(INSTRUCTION) \ +- { \ +- register const unsigned char* pInput __asm__ ("8") = inptr; \ +- register unsigned long long inlen __asm__ ("9") = inend - inptr; \ +- register unsigned char* pOutput __asm__ ("10") = outptr; \ +- register unsigned long long outlen __asm__("11") = outend - outptr; \ +- uint64_t cc = 0; \ +- \ +- __asm__ __volatile__ (".machine push \n\t" \ +- ".machine \"z9-109\" \n\t" \ +- "0: " INSTRUCTION " \n\t" \ +- ".machine pop \n\t" \ +- " jo 0b \n\t" \ +- " ipm %2 \n" \ +- : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ +- "+d" (outlen), "+d" (inlen) \ +- : \ +- : "cc", "memory"); \ +- \ +- inptr = pInput; \ +- outptr = pOutput; \ +- cc >>= 28; \ +- \ +- if (cc == 1) \ +- { \ +- result = __GCONV_FULL_OUTPUT; \ +- } \ +- else if (cc == 2) \ +- { \ +- result = __GCONV_ILLEGAL_INPUT; \ +- } \ +- } +- +-#define PREPARE_LOOP \ +- enum direction dir = ((struct utf8_data *) step->__data)->dir; \ +- int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \ +- \ +- if (emit_bom && !data->__internal_use \ +- && data->__invocation_counter == 0) \ +- { \ +- /* Emit the Byte Order Mark. */ \ +- if (__glibc_unlikely (outbuf + 4 > outend)) \ +- return __GCONV_FULL_OUTPUT; \ +- \ +- put32u (outbuf, BOM); \ +- outbuf += 4; \ +- } +- +-/* Conversion function from UTF-8 to UTF-32 internal/BE. */ +- +-#define STORE_REST_COMMON \ +- { \ +- /* We store the remaining bytes while converting them into the UCS4 \ +- format. We can assume that the first byte in the buffer is \ +- correct and that it requires a larger number of bytes than there \ +- are in the input buffer. */ \ +- wint_t ch = **inptrp; \ +- size_t cnt, r; \ +- \ +- state->__count = inend - *inptrp; \ +- \ +- assert (ch != 0xc0 && ch != 0xc1); \ +- if (ch >= 0xc2 && ch < 0xe0) \ +- { \ +- /* We expect two bytes. The first byte cannot be 0xc0 or \ +- 0xc1, otherwise the wide character could have been \ +- represented using a single byte. */ \ +- cnt = 2; \ +- ch &= 0x1f; \ +- } \ +- else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ +- { \ +- /* We expect three bytes. */ \ +- cnt = 3; \ +- ch &= 0x0f; \ +- } \ +- else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ +- { \ +- /* We expect four bytes. */ \ +- cnt = 4; \ +- ch &= 0x07; \ +- } \ +- else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ +- { \ +- /* We expect five bytes. */ \ +- cnt = 5; \ +- ch &= 0x03; \ +- } \ +- else \ +- { \ +- /* We expect six bytes. */ \ +- cnt = 6; \ +- ch &= 0x01; \ +- } \ +- \ +- /* The first byte is already consumed. */ \ +- r = cnt - 1; \ +- while (++(*inptrp) < inend) \ +- { \ +- ch <<= 6; \ +- ch |= **inptrp & 0x3f; \ +- --r; \ +- } \ +- \ +- /* Shift for the so far missing bytes. */ \ +- ch <<= r * 6; \ +- \ +- /* Store the number of bytes expected for the entire sequence. */ \ +- state->__count |= cnt << 8; \ +- \ +- /* Store the value. */ \ +- state->__value.__wch = ch; \ +- } +- +-#define UNPACK_BYTES_COMMON \ +- { \ +- static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \ +- wint_t wch = state->__value.__wch; \ +- size_t ntotal = state->__count >> 8; \ +- \ +- inlen = state->__count & 255; \ +- \ +- bytebuf[0] = inmask[ntotal - 2]; \ +- \ +- do \ +- { \ +- if (--ntotal < inlen) \ +- bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ +- wch >>= 6; \ +- } \ +- while (ntotal > 1); \ +- \ +- bytebuf[0] |= wch; \ +- } +- +-#define CLEAR_STATE_COMMON \ +- state->__count = 0 +- +-#define BODY_FROM_HW(ASM) \ +- { \ +- ASM; \ +- if (__glibc_likely (inptr == inend) \ +- || result == __GCONV_FULL_OUTPUT) \ +- break; \ +- \ +- int i; \ +- for (i = 1; inptr + i < inend && i < 5; ++i) \ +- if ((inptr[i] & 0xc0) != 0x80) \ +- break; \ +- \ +- if (__glibc_likely (inptr + i == inend \ +- && result == __GCONV_EMPTY_INPUT)) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- STANDARD_FROM_LOOP_ERR_HANDLER (i); \ +- } +- +-/* This hardware routine uses the Convert UTF8 to UTF32 (cu14) instruction. */ +-#define BODY_FROM_ETF3EH BODY_FROM_HW (HARDWARE_CONVERT ("cu14 %0, %1, 1")) +- +- +-/* The software routine is copied from gconv_simple.c. */ +-#define BODY_FROM_C \ +- { \ +- /* Next input byte. */ \ +- uint32_t ch = *inptr; \ +- \ +- if (__glibc_likely (ch < 0x80)) \ +- { \ +- /* One byte sequence. */ \ +- ++inptr; \ +- } \ +- else \ +- { \ +- uint_fast32_t cnt; \ +- uint_fast32_t i; \ +- \ +- if (ch >= 0xc2 && ch < 0xe0) \ +- { \ +- /* We expect two bytes. The first byte cannot be 0xc0 or \ +- 0xc1, otherwise the wide character could have been \ +- represented using a single byte. */ \ +- cnt = 2; \ +- ch &= 0x1f; \ +- } \ +- else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ +- { \ +- /* We expect three bytes. */ \ +- cnt = 3; \ +- ch &= 0x0f; \ +- } \ +- else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ +- { \ +- /* We expect four bytes. */ \ +- cnt = 4; \ +- ch &= 0x07; \ +- } \ +- else \ +- { \ +- /* Search the end of this ill-formed UTF-8 character. This \ +- is the next byte with (x & 0xc0) != 0x80. */ \ +- i = 0; \ +- do \ +- ++i; \ +- while (inptr + i < inend \ +- && (*(inptr + i) & 0xc0) == 0x80 \ +- && i < 5); \ +- \ +- errout: \ +- STANDARD_FROM_LOOP_ERR_HANDLER (i); \ +- } \ +- \ +- if (__glibc_unlikely (inptr + cnt > inend)) \ +- { \ +- /* We don't have enough input. But before we report \ +- that check that all the bytes are correct. */ \ +- for (i = 1; inptr + i < inend; ++i) \ +- if ((inptr[i] & 0xc0) != 0x80) \ +- break; \ +- \ +- if (__glibc_likely (inptr + i == inend)) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- \ +- goto errout; \ +- } \ +- \ +- /* Read the possible remaining bytes. */ \ +- for (i = 1; i < cnt; ++i) \ +- { \ +- uint32_t byte = inptr[i]; \ +- \ +- if ((byte & 0xc0) != 0x80) \ +- /* This is an illegal encoding. */ \ +- break; \ +- \ +- ch <<= 6; \ +- ch |= byte & 0x3f; \ +- } \ +- \ +- /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ +- If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ +- have been represented with fewer than cnt bytes. */ \ +- if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ +- /* Do not accept UTF-16 surrogates. */ \ +- || (ch >= 0xd800 && ch <= 0xdfff) \ +- || (ch > 0x10ffff)) \ +- { \ +- /* This is an illegal encoding. */ \ +- goto errout; \ +- } \ +- \ +- inptr += cnt; \ +- } \ +- \ +- /* Now adjust the pointers and store the result. */ \ +- *((uint32_t *) outptr) = ch; \ +- outptr += sizeof (uint32_t); \ +- } +- +-#define HW_FROM_VX \ +- { \ +- register const unsigned char* pInput asm ("8") = inptr; \ +- register size_t inlen asm ("9") = inend - inptr; \ +- register unsigned char* pOutput asm ("10") = outptr; \ +- register size_t outlen asm("11") = outend - outptr; \ +- unsigned long tmp, tmp2, tmp3; \ +- asm volatile (".machine push\n\t" \ +- ".machine \"z13\"\n\t" \ +- ".machinemode \"zarch_nohighgprs\"\n\t" \ +- " vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \ +- " vrepib %%v31,0x20\n\t" \ +- /* Loop which handles UTF-8 chars <=0x7f. */ \ +- "0: clgijl %[R_INLEN],16,20f\n\t" \ +- " clgijl %[R_OUTLEN],64,20f\n\t" \ +- "1: vl %%v16,0(%[R_IN])\n\t" \ +- " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ +- " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ +- UTF8 chars. */ \ +- /* Enlarge to UCS4. */ \ +- " vuplhb %%v18,%%v16\n\t" \ +- " vupllb %%v19,%%v16\n\t" \ +- " la %[R_IN],16(%[R_IN])\n\t" \ +- " vuplhh %%v20,%%v18\n\t" \ +- " aghi %[R_INLEN],-16\n\t" \ +- " vupllh %%v21,%%v18\n\t" \ +- " aghi %[R_OUTLEN],-64\n\t" \ +- " vuplhh %%v22,%%v19\n\t" \ +- " vupllh %%v23,%%v19\n\t" \ +- /* Store 64 bytes to buf_out. */ \ +- " vstm %%v20,%%v23,0(%[R_OUT])\n\t" \ +- " la %[R_OUT],64(%[R_OUT])\n\t" \ +- " clgijl %[R_INLEN],16,20f\n\t" \ +- " clgijl %[R_OUTLEN],64,20f\n\t" \ +- " j 1b\n\t" \ +- "10: \n\t" \ +- /* At least one byte is > 0x7f. \ +- Store the preceding 1-byte chars. */ \ +- " vlgvb %[R_TMP],%%v17,7\n\t" \ +- " sllk %[R_TMP2],%[R_TMP],2\n\t" /* Compute highest \ +- index to store. */ \ +- " llgfr %[R_TMP3],%[R_TMP2]\n\t" \ +- " ahi %[R_TMP2],-1\n\t" \ +- " jl 20f\n\t" \ +- " vuplhb %%v18,%%v16\n\t" \ +- " vuplhh %%v20,%%v18\n\t" \ +- " vstl %%v20,%[R_TMP2],0(%[R_OUT])\n\t" \ +- " ahi %[R_TMP2],-16\n\t" \ +- " jl 11f\n\t" \ +- " vupllh %%v21,%%v18\n\t" \ +- " vstl %%v21,%[R_TMP2],16(%[R_OUT])\n\t" \ +- " ahi %[R_TMP2],-16\n\t" \ +- " jl 11f\n\t" \ +- " vupllb %%v19,%%v16\n\t" \ +- " vuplhh %%v22,%%v19\n\t" \ +- " vstl %%v22,%[R_TMP2],32(%[R_OUT])\n\t" \ +- " ahi %[R_TMP2],-16\n\t" \ +- " jl 11f\n\t" \ +- " vupllh %%v23,%%v19\n\t" \ +- " vstl %%v23,%[R_TMP2],48(%[R_OUT])\n\t" \ +- "11: \n\t" \ +- /* Update pointers. */ \ +- " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ +- " slgr %[R_INLEN],%[R_TMP]\n\t" \ +- " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ +- " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ +- /* Handle multibyte utf8-char with convert instruction. */ \ +- "20: cu14 %[R_OUT],%[R_IN],1\n\t" \ +- " jo 0b\n\t" /* Try vector implemenation again. */ \ +- " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ +- " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ +- ".machine pop" \ +- : /* outputs */ [R_IN] "+a" (pInput) \ +- , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ +- , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ +- , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ +- , [R_RES] "+d" (result) \ +- : /* inputs */ \ +- [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ +- , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ +- : /* clobber list */ "memory", "cc" \ +- ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ +- ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ +- ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ +- ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v30") \ +- ASM_CLOBBER_VR ("v31") \ +- ); \ +- inptr = pInput; \ +- outptr = pOutput; \ +- } +-#define BODY_FROM_VX BODY_FROM_HW (HW_FROM_VX) +- +-/* These definitions apply to the UTF-8 to UTF-32 direction. The +- software implementation for UTF-8 still supports multibyte +- characters up to 6 bytes whereas the hardware variant does not. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-#define LOOPFCT __from_utf8_loop_c +- +-#define LOOP_NEED_FLAGS +- +-#define STORE_REST STORE_REST_COMMON +-#define UNPACK_BYTES UNPACK_BYTES_COMMON +-#define CLEAR_STATE CLEAR_STATE_COMMON +-#define BODY BODY_FROM_C +-#include +- +- +-/* Generate loop-function with hardware utf-convert instruction. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-#define LOOPFCT __from_utf8_loop_etf3eh +- +-#define LOOP_NEED_FLAGS +- +-#define STORE_REST STORE_REST_COMMON +-#define UNPACK_BYTES UNPACK_BYTES_COMMON +-#define CLEAR_STATE CLEAR_STATE_COMMON +-#define BODY BODY_FROM_ETF3EH +-#include +- +-#if defined HAVE_S390_VX_ASM_SUPPORT +-/* Generate loop-function with hardware vector instructions. */ +-# define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-# define MAX_NEEDED_INPUT MAX_NEEDED_FROM +-# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-# define LOOPFCT __from_utf8_loop_vx +- +-# define LOOP_NEED_FLAGS +- +-# define STORE_REST STORE_REST_COMMON +-# define UNPACK_BYTES UNPACK_BYTES_COMMON +-# define CLEAR_STATE CLEAR_STATE_COMMON +-# define BODY BODY_FROM_VX +-# include +-#endif +- +- +-/* Generate ifunc'ed loop function. */ +-__typeof(__from_utf8_loop_c) +-__attribute__ ((ifunc ("__from_utf8_loop_resolver"))) +-__from_utf8_loop; +- +-static void * +-__from_utf8_loop_resolver (unsigned long int dl_hwcap) +-{ +-#if defined HAVE_S390_VX_ASM_SUPPORT +- if (dl_hwcap & HWCAP_S390_VX) +- return __from_utf8_loop_vx; +- else +-#endif +- if (dl_hwcap & HWCAP_S390_ETF3EH) +- return __from_utf8_loop_etf3eh; +- else +- return __from_utf8_loop_c; +-} +- +-strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) +- +- +-/* Conversion from UTF-32 internal/BE to UTF-8. */ +-#define BODY_TO_HW(ASM) \ +- { \ +- ASM; \ +- if (__glibc_likely (inptr == inend) \ +- || result == __GCONV_FULL_OUTPUT) \ +- break; \ +- if (inptr + 4 > inend) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- STANDARD_TO_LOOP_ERR_HANDLER (4); \ +- } +- +-/* The hardware routine uses the S/390 cu41 instruction. */ +-#define BODY_TO_ETF3EH BODY_TO_HW (HARDWARE_CONVERT ("cu41 %0, %1")) +- +-/* The hardware routine uses the S/390 vector and cu41 instructions. */ +-#define BODY_TO_VX BODY_TO_HW (HW_TO_VX) +- +-/* The software routine mimics the S/390 cu41 instruction. */ +-#define BODY_TO_C \ +- { \ +- uint32_t wc = *((const uint32_t *) inptr); \ +- \ +- if (__glibc_likely (wc <= 0x7f)) \ +- { \ +- /* Single UTF-8 char. */ \ +- *outptr = (uint8_t)wc; \ +- outptr++; \ +- } \ +- else if (wc <= 0x7ff) \ +- { \ +- /* Two UTF-8 chars. */ \ +- if (__glibc_unlikely (outptr + 2 > outend)) \ +- { \ +- /* Overflow in the output buffer. */ \ +- result = __GCONV_FULL_OUTPUT; \ +- break; \ +- } \ +- \ +- outptr[0] = 0xc0; \ +- outptr[0] |= wc >> 6; \ +- \ +- outptr[1] = 0x80; \ +- outptr[1] |= wc & 0x3f; \ +- \ +- outptr += 2; \ +- } \ +- else if (wc <= 0xffff) \ +- { \ +- /* Three UTF-8 chars. */ \ +- if (__glibc_unlikely (outptr + 3 > outend)) \ +- { \ +- /* Overflow in the output buffer. */ \ +- result = __GCONV_FULL_OUTPUT; \ +- break; \ +- } \ +- if (wc >= 0xd800 && wc < 0xdc00) \ +- { \ +- /* Do not accept UTF-16 surrogates. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ +- STANDARD_TO_LOOP_ERR_HANDLER (4); \ +- } \ +- outptr[0] = 0xe0; \ +- outptr[0] |= wc >> 12; \ +- \ +- outptr[1] = 0x80; \ +- outptr[1] |= (wc >> 6) & 0x3f; \ +- \ +- outptr[2] = 0x80; \ +- outptr[2] |= wc & 0x3f; \ +- \ +- outptr += 3; \ +- } \ +- else if (wc <= 0x10ffff) \ +- { \ +- /* Four UTF-8 chars. */ \ +- if (__glibc_unlikely (outptr + 4 > outend)) \ +- { \ +- /* Overflow in the output buffer. */ \ +- result = __GCONV_FULL_OUTPUT; \ +- break; \ +- } \ +- outptr[0] = 0xf0; \ +- outptr[0] |= wc >> 18; \ +- \ +- outptr[1] = 0x80; \ +- outptr[1] |= (wc >> 12) & 0x3f; \ +- \ +- outptr[2] = 0x80; \ +- outptr[2] |= (wc >> 6) & 0x3f; \ +- \ +- outptr[3] = 0x80; \ +- outptr[3] |= wc & 0x3f; \ +- \ +- outptr += 4; \ +- } \ +- else \ +- { \ +- STANDARD_TO_LOOP_ERR_HANDLER (4); \ +- } \ +- inptr += 4; \ +- } +- +-#define HW_TO_VX \ +- { \ +- register const unsigned char* pInput asm ("8") = inptr; \ +- register size_t inlen asm ("9") = inend - inptr; \ +- register unsigned char* pOutput asm ("10") = outptr; \ +- register size_t outlen asm("11") = outend - outptr; \ +- unsigned long tmp, tmp2; \ +- asm volatile (".machine push\n\t" \ +- ".machine \"z13\"\n\t" \ +- ".machinemode \"zarch_nohighgprs\"\n\t" \ +- " vleif %%v20,127,0\n\t" /* element 0: 127 */ \ +- " vzero %%v21\n\t" \ +- " vleih %%v21,8192,0\n\t" /* element 0: > */ \ +- " vleih %%v21,-8192,2\n\t" /* element 1: =<> */ \ +- /* Loop which handles UTF-32 chars <=0x7f. */ \ +- "0: clgijl %[R_INLEN],64,20f\n\t" \ +- " clgijl %[R_OUTLEN],16,20f\n\t" \ +- "1: vlm %%v16,%%v19,0(%[R_IN])\n\t" \ +- " lghi %[R_TMP],0\n\t" \ +- /* Shorten to byte values. */ \ +- " vpkf %%v23,%%v16,%%v17\n\t" \ +- " vpkf %%v24,%%v18,%%v19\n\t" \ +- " vpkh %%v23,%%v23,%%v24\n\t" \ +- /* Checking for values > 0x7f. */ \ +- " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ +- " jno 10f\n\t" \ +- " vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ +- " jno 11f\n\t" \ +- " vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \ +- " jno 12f\n\t" \ +- " vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \ +- " jno 13f\n\t" \ +- /* Store 16bytes to outptr. */ \ +- " vst %%v23,0(%[R_OUT])\n\t" \ +- " aghi %[R_INLEN],-64\n\t" \ +- " aghi %[R_OUTLEN],-16\n\t" \ +- " la %[R_IN],64(%[R_IN])\n\t" \ +- " la %[R_OUT],16(%[R_OUT])\n\t" \ +- " clgijl %[R_INLEN],64,20f\n\t" \ +- " clgijl %[R_OUTLEN],16,20f\n\t" \ +- " j 1b\n\t" \ +- /* Found a value > 0x7f. */ \ +- "13: ahi %[R_TMP],4\n\t" \ +- "12: ahi %[R_TMP],4\n\t" \ +- "11: ahi %[R_TMP],4\n\t" \ +- "10: vlgvb %[R_I],%%v22,7\n\t" \ +- " srlg %[R_I],%[R_I],2\n\t" \ +- " agr %[R_I],%[R_TMP]\n\t" \ +- " je 20f\n\t" \ +- /* Store characters before invalid one... */ \ +- " slgr %[R_OUTLEN],%[R_I]\n\t" \ +- "15: aghi %[R_I],-1\n\t" \ +- " vstl %%v23,%[R_I],0(%[R_OUT])\n\t" \ +- /* ... and update pointers. */ \ +- " aghi %[R_I],1\n\t" \ +- " la %[R_OUT],0(%[R_I],%[R_OUT])\n\t" \ +- " sllg %[R_I],%[R_I],2\n\t" \ +- " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ +- " slgr %[R_INLEN],%[R_I]\n\t" \ +- /* Handle multibyte utf8-char with convert instruction. */ \ +- "20: cu41 %[R_OUT],%[R_IN]\n\t" \ +- " jo 0b\n\t" /* Try vector implemenation again. */ \ +- " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ +- " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ +- ".machine pop" \ +- : /* outputs */ [R_IN] "+a" (pInput) \ +- , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ +- , [R_OUTLEN] "+d" (outlen), [R_TMP] "=d" (tmp) \ +- , [R_I] "=a" (tmp2) \ +- , [R_RES] "+d" (result) \ +- : /* inputs */ \ +- [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ +- , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ +- : /* clobber list */ "memory", "cc" \ +- ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ +- ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ +- ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ +- ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ +- ASM_CLOBBER_VR ("v24") \ +- ); \ +- inptr = pInput; \ +- outptr = pOutput; \ +- } +- +-/* Generate loop-function with software routing. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_TO +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-#define LOOPFCT __to_utf8_loop_c +-#define BODY BODY_TO_C +-#define LOOP_NEED_FLAGS +-#include +- +-/* Generate loop-function with hardware utf-convert instruction. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_TO +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-#define LOOPFCT __to_utf8_loop_etf3eh +-#define LOOP_NEED_FLAGS +-#define BODY BODY_TO_ETF3EH +-#include +- +-#if defined HAVE_S390_VX_ASM_SUPPORT +-/* Generate loop-function with hardware vector and utf-convert instructions. */ +-# define MIN_NEEDED_INPUT MIN_NEEDED_TO +-# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-# define LOOPFCT __to_utf8_loop_vx +-# define BODY BODY_TO_VX +-# define LOOP_NEED_FLAGS +-# include +-#endif +- +-/* Generate ifunc'ed loop function. */ +-__typeof(__to_utf8_loop_c) +-__attribute__ ((ifunc ("__to_utf8_loop_resolver"))) +-__to_utf8_loop; +- +-static void * +-__to_utf8_loop_resolver (unsigned long int dl_hwcap) +-{ +-#if defined HAVE_S390_VX_ASM_SUPPORT +- if (dl_hwcap & HWCAP_S390_VX) +- return __to_utf8_loop_vx; +- else +-#endif +- if (dl_hwcap & HWCAP_S390_ETF3EH) +- return __to_utf8_loop_etf3eh; +- else +- return __to_utf8_loop_c; +-} +- +-strong_alias (__to_utf8_loop_c_single, __to_utf8_loop_single) +- +- +-#include +diff --git a/sysdeps/s390/utf16-utf32-z9.c b/sysdeps/s390/utf16-utf32-z9.c +new file mode 100644 +index 0000000..8d42ab8 +--- /dev/null ++++ b/sysdeps/s390/utf16-utf32-z9.c +@@ -0,0 +1,636 @@ ++/* Conversion between UTF-16 and UTF-32 BE/internal. ++ ++ This module uses the Z9-109 variants of the Convert Unicode ++ instructions. ++ Copyright (C) 1997-2016 Free Software Foundation, Inc. ++ ++ Author: Andreas Krebbel ++ Based on the work by Ulrich Drepper , 1997. ++ ++ Thanks to Daniel Appich who covered the relevant performance work ++ in his diploma thesis. ++ ++ This is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ This is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#if defined HAVE_S390_VX_GCC_SUPPORT ++# define ASM_CLOBBER_VR(NR) , NR ++#else ++# define ASM_CLOBBER_VR(NR) ++#endif ++ ++#if defined __s390x__ ++# define CONVERT_32BIT_SIZE_T(REG) ++#else ++# define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t" ++#endif ++ ++/* UTF-32 big endian byte order mark. */ ++#define BOM_UTF32 0x0000feffu ++ ++/* UTF-16 big endian byte order mark. */ ++#define BOM_UTF16 0xfeff ++ ++#define DEFINE_INIT 0 ++#define DEFINE_FINI 0 ++#define MIN_NEEDED_FROM 2 ++#define MAX_NEEDED_FROM 4 ++#define MIN_NEEDED_TO 4 ++#define FROM_LOOP __from_utf16_loop ++#define TO_LOOP __to_utf16_loop ++#define FROM_DIRECTION (dir == from_utf16) ++#define ONE_DIRECTION 0 ++ ++/* Direction of the transformation. */ ++enum direction ++{ ++ illegal_dir, ++ to_utf16, ++ from_utf16 ++}; ++ ++struct utf16_data ++{ ++ enum direction dir; ++ int emit_bom; ++}; ++ ++ ++extern int gconv_init (struct __gconv_step *step); ++int ++gconv_init (struct __gconv_step *step) ++{ ++ /* Determine which direction. */ ++ struct utf16_data *new_data; ++ enum direction dir = illegal_dir; ++ int emit_bom; ++ int result; ++ ++ emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0 ++ || __strcasecmp (step->__to_name, "UTF-16//") == 0); ++ ++ if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0 ++ && (__strcasecmp (step->__to_name, "UTF-32//") == 0 ++ || __strcasecmp (step->__to_name, "UTF-32BE//") == 0 ++ || __strcasecmp (step->__to_name, "INTERNAL") == 0)) ++ { ++ dir = from_utf16; ++ } ++ else if ((__strcasecmp (step->__to_name, "UTF-16//") == 0 ++ || __strcasecmp (step->__to_name, "UTF-16BE//") == 0) ++ && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0 ++ || __strcasecmp (step->__from_name, "INTERNAL") == 0)) ++ { ++ dir = to_utf16; ++ } ++ ++ result = __GCONV_NOCONV; ++ if (dir != illegal_dir) ++ { ++ new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data)); ++ ++ result = __GCONV_NOMEM; ++ if (new_data != NULL) ++ { ++ new_data->dir = dir; ++ new_data->emit_bom = emit_bom; ++ step->__data = new_data; ++ ++ if (dir == from_utf16) ++ { ++ step->__min_needed_from = MIN_NEEDED_FROM; ++ step->__max_needed_from = MIN_NEEDED_FROM; ++ step->__min_needed_to = MIN_NEEDED_TO; ++ step->__max_needed_to = MIN_NEEDED_TO; ++ } ++ else ++ { ++ step->__min_needed_from = MIN_NEEDED_TO; ++ step->__max_needed_from = MIN_NEEDED_TO; ++ step->__min_needed_to = MIN_NEEDED_FROM; ++ step->__max_needed_to = MIN_NEEDED_FROM; ++ } ++ ++ step->__stateful = 0; ++ ++ result = __GCONV_OK; ++ } ++ } ++ ++ return result; ++} ++ ++ ++extern void gconv_end (struct __gconv_step *data); ++void ++gconv_end (struct __gconv_step *data) ++{ ++ free (data->__data); ++} ++ ++/* The macro for the hardware loop. This is used for both ++ directions. */ ++#define HARDWARE_CONVERT(INSTRUCTION) \ ++ { \ ++ register const unsigned char* pInput __asm__ ("8") = inptr; \ ++ register size_t inlen __asm__ ("9") = inend - inptr; \ ++ register unsigned char* pOutput __asm__ ("10") = outptr; \ ++ register size_t outlen __asm__("11") = outend - outptr; \ ++ unsigned long cc = 0; \ ++ \ ++ __asm__ __volatile__ (".machine push \n\t" \ ++ ".machine \"z9-109\" \n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ "0: " INSTRUCTION " \n\t" \ ++ ".machine pop \n\t" \ ++ " jo 0b \n\t" \ ++ " ipm %2 \n" \ ++ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ ++ "+d" (outlen), "+d" (inlen) \ ++ : \ ++ : "cc", "memory"); \ ++ \ ++ inptr = pInput; \ ++ outptr = pOutput; \ ++ cc >>= 28; \ ++ \ ++ if (cc == 1) \ ++ { \ ++ result = __GCONV_FULL_OUTPUT; \ ++ } \ ++ else if (cc == 2) \ ++ { \ ++ result = __GCONV_ILLEGAL_INPUT; \ ++ } \ ++ } ++ ++#define PREPARE_LOOP \ ++ enum direction dir = ((struct utf16_data *) step->__data)->dir; \ ++ int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \ ++ \ ++ if (emit_bom && !data->__internal_use \ ++ && data->__invocation_counter == 0) \ ++ { \ ++ if (dir == to_utf16) \ ++ { \ ++ /* Emit the UTF-16 Byte Order Mark. */ \ ++ if (__glibc_unlikely (outbuf + 2 > outend)) \ ++ return __GCONV_FULL_OUTPUT; \ ++ \ ++ put16u (outbuf, BOM_UTF16); \ ++ outbuf += 2; \ ++ } \ ++ else \ ++ { \ ++ /* Emit the UTF-32 Byte Order Mark. */ \ ++ if (__glibc_unlikely (outbuf + 4 > outend)) \ ++ return __GCONV_FULL_OUTPUT; \ ++ \ ++ put32u (outbuf, BOM_UTF32); \ ++ outbuf += 4; \ ++ } \ ++ } ++ ++/* Conversion function from UTF-16 to UTF-32 internal/BE. */ ++ ++/* The software routine is copied from utf-16.c (minus bytes ++ swapping). */ ++#define BODY_FROM_C \ ++ { \ ++ uint16_t u1 = get16 (inptr); \ ++ \ ++ if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \ ++ { \ ++ /* No surrogate. */ \ ++ put32 (outptr, u1); \ ++ inptr += 2; \ ++ } \ ++ else \ ++ { \ ++ /* An isolated low-surrogate was found. This has to be \ ++ considered ill-formed. */ \ ++ if (__glibc_unlikely (u1 >= 0xdc00)) \ ++ { \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (2); \ ++ } \ ++ /* It's a surrogate character. At least the first word says \ ++ it is. */ \ ++ if (__glibc_unlikely (inptr + 4 > inend)) \ ++ { \ ++ /* We don't have enough input for another complete input \ ++ character. */ \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ \ ++ inptr += 2; \ ++ uint16_t u2 = get16 (inptr); \ ++ if (__builtin_expect (u2 < 0xdc00, 0) \ ++ || __builtin_expect (u2 > 0xdfff, 0)) \ ++ { \ ++ /* This is no valid second word for a surrogate. */ \ ++ inptr -= 2; \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (2); \ ++ } \ ++ \ ++ put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \ ++ inptr += 2; \ ++ } \ ++ outptr += 4; \ ++ } ++ ++#define BODY_FROM_VX \ ++ { \ ++ size_t inlen = inend - inptr; \ ++ size_t outlen = outend - outptr; \ ++ unsigned long tmp, tmp2, tmp3; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ /* Setup to check for surrogates. */ \ ++ " larl %[R_TMP],9f\n\t" \ ++ " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \ ++ CONVERT_32BIT_SIZE_T ([R_INLEN]) \ ++ CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \ ++ /* Loop which handles UTF-16 chars <0xd800, >0xdfff. */ \ ++ "0: clgijl %[R_INLEN],16,2f\n\t" \ ++ " clgijl %[R_OUTLEN],32,2f\n\t" \ ++ "1: vl %%v16,0(%[R_IN])\n\t" \ ++ /* Check for surrogate chars. */ \ ++ " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \ ++ " jno 10f\n\t" \ ++ /* Enlarge to UTF-32. */ \ ++ " vuplhh %%v17,%%v16\n\t" \ ++ " la %[R_IN],16(%[R_IN])\n\t" \ ++ " vupllh %%v18,%%v16\n\t" \ ++ " aghi %[R_INLEN],-16\n\t" \ ++ /* Store 32 bytes to buf_out. */ \ ++ " vstm %%v17,%%v18,0(%[R_OUT])\n\t" \ ++ " aghi %[R_OUTLEN],-32\n\t" \ ++ " la %[R_OUT],32(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],16,2f\n\t" \ ++ " clgijl %[R_OUTLEN],32,2f\n\t" \ ++ " j 1b\n\t" \ ++ /* Setup to check for ch >= 0xd800 && ch <= 0xdfff. (v30, v31) */ \ ++ "9: .short 0xd800,0xdfff,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ " .short 0xa000,0xc000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ /* At least on uint16_t is in range of surrogates. \ ++ Store the preceding chars. */ \ ++ "10: vlgvb %[R_TMP],%%v19,7\n\t" \ ++ " vuplhh %%v17,%%v16\n\t" \ ++ " sllg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ ++ " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ ++ " jl 12f\n\t" \ ++ " vstl %%v17,%[R_TMP2],0(%[R_OUT])\n\t" \ ++ " vupllh %%v18,%%v16\n\t" \ ++ " ahi %[R_TMP2],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vstl %%v18,%[R_TMP2],16(%[R_OUT])\n\t" \ ++ "11: \n\t" /* Update pointers. */ \ ++ " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_TMP]\n\t" \ ++ " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ ++ " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ ++ /* Calculate remaining uint16_t values in loaded vrs. */ \ ++ "12: lghi %[R_TMP2],16\n\t" \ ++ " sgr %[R_TMP2],%[R_TMP]\n\t" \ ++ " srl %[R_TMP2],1\n\t" \ ++ " llh %[R_TMP],0(%[R_IN])\n\t" \ ++ " aghi %[R_OUTLEN],-4\n\t" \ ++ " j 16f\n\t" \ ++ /* Handle remaining bytes. */ \ ++ "2: \n\t" \ ++ /* Zero, one or more bytes available? */ \ ++ " clgfi %[R_INLEN],1\n\t" \ ++ " je 97f\n\t" /* Only one byte available. */ \ ++ " jl 99f\n\t" /* End if no bytes available. */ \ ++ /* Calculate remaining uint16_t values in inptr. */ \ ++ " srlg %[R_TMP2],%[R_INLEN],1\n\t" \ ++ /* Handle remaining uint16_t values. */ \ ++ "13: llh %[R_TMP],0(%[R_IN])\n\t" \ ++ " slgfi %[R_OUTLEN],4\n\t" \ ++ " jl 96f \n\t" \ ++ " clfi %[R_TMP],0xd800\n\t" \ ++ " jhe 15f\n\t" \ ++ "14: st %[R_TMP],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],2(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-2\n\t" \ ++ " la %[R_OUT],4(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],13b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Handle UTF-16 surrogate pair. */ \ ++ "15: clfi %[R_TMP],0xdfff\n\t" \ ++ " jh 14b\n\t" /* Jump away if ch > 0xdfff. */ \ ++ "16: clfi %[R_TMP],0xdc00\n\t" \ ++ " jhe 98f\n\t" /* Jump away in case of low-surrogate. */ \ ++ " slgfi %[R_INLEN],4\n\t" \ ++ " jl 97f\n\t" /* Big enough input? */ \ ++ " llh %[R_TMP3],2(%[R_IN])\n\t" /* Load low surrogate. */ \ ++ " slfi %[R_TMP],0xd7c0\n\t" \ ++ " sll %[R_TMP],10\n\t" \ ++ " risbgn %[R_TMP],%[R_TMP3],54,63,0\n\t" /* Insert klmnopqrst. */ \ ++ " nilf %[R_TMP3],0xfc00\n\t" \ ++ " clfi %[R_TMP3],0xdc00\n\t" /* Check if it starts with 0xdc00. */ \ ++ " jne 98f\n\t" \ ++ " st %[R_TMP],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],4(%[R_IN])\n\t" \ ++ " la %[R_OUT],4(%[R_OUT])\n\t" \ ++ " aghi %[R_TMP2],-2\n\t" \ ++ " jh 13b\n\t" /* Handle remaining uint16_t values. */ \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ "96: \n\t" /* Return full output. */ \ ++ " lghi %[R_RES],%[RES_OUT_FULL]\n\t" \ ++ " j 99f\n\t" \ ++ "97: \n\t" /* Return incomplete input. */ \ ++ " lghi %[R_RES],%[RES_IN_FULL]\n\t" \ ++ " j 99f\n\t" \ ++ "98:\n\t" /* Return Illegal character. */ \ ++ " lghi %[R_RES],%[RES_IN_ILL]\n\t" \ ++ "99:\n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (inptr) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ ++ ); \ ++ if (__glibc_likely (inptr == inend) \ ++ || result != __GCONV_ILLEGAL_INPUT) \ ++ break; \ ++ \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (2); \ ++ } ++ ++ ++/* Generate loop-function with software routing. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#if defined HAVE_S390_VX_ASM_SUPPORT ++# define LOOPFCT __from_utf16_loop_c ++# define LOOP_NEED_FLAGS ++# define BODY BODY_FROM_C ++# include ++ ++/* Generate loop-function with hardware vector instructions. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++# define LOOPFCT __from_utf16_loop_vx ++# define LOOP_NEED_FLAGS ++# define BODY BODY_FROM_VX ++# include ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__from_utf16_loop_c) ++__attribute__ ((ifunc ("__from_utf16_loop_resolver"))) ++__from_utf16_loop; ++ ++static void * ++__from_utf16_loop_resolver (unsigned long int dl_hwcap) ++{ ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __from_utf16_loop_vx; ++ else ++ return __from_utf16_loop_c; ++} ++ ++strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single) ++#else ++# define LOOPFCT FROM_LOOP ++# define LOOP_NEED_FLAGS ++# define BODY BODY_FROM_C ++# include ++#endif ++ ++/* Conversion from UTF-32 internal/BE to UTF-16. */ ++ ++/* The software routine is copied from utf-16.c (minus bytes ++ swapping). */ ++#define BODY_TO_C \ ++ { \ ++ uint32_t c = get32 (inptr); \ ++ \ ++ if (__builtin_expect (c <= 0xd7ff, 1) \ ++ || (c >=0xdc00 && c <= 0xffff)) \ ++ { \ ++ /* Two UTF-16 chars. */ \ ++ put16 (outptr, c); \ ++ } \ ++ else if (__builtin_expect (c >= 0x10000, 1) \ ++ && __builtin_expect (c <= 0x10ffff, 1)) \ ++ { \ ++ /* Four UTF-16 chars. */ \ ++ uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \ ++ uint16_t out; \ ++ \ ++ /* Generate a surrogate character. */ \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ ++ { \ ++ /* Overflow in the output buffer. */ \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ \ ++ out = 0xd800; \ ++ out |= (zabcd & 0xff) << 6; \ ++ out |= (c >> 10) & 0x3f; \ ++ put16 (outptr, out); \ ++ outptr += 2; \ ++ \ ++ out = 0xdc00; \ ++ out |= c & 0x3ff; \ ++ put16 (outptr, out); \ ++ } \ ++ else \ ++ { \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } \ ++ outptr += 2; \ ++ inptr += 4; \ ++ } ++ ++#define BODY_TO_ETF3EH \ ++ { \ ++ HARDWARE_CONVERT ("cu42 %0, %1"); \ ++ \ ++ if (__glibc_likely (inptr == inend) \ ++ || result == __GCONV_FULL_OUTPUT) \ ++ break; \ ++ \ ++ if (inptr + 4 > inend) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } ++ ++#define BODY_TO_VX \ ++ { \ ++ register const unsigned char* pInput asm ("8") = inptr; \ ++ register size_t inlen asm ("9") = inend - inptr; \ ++ register unsigned char* pOutput asm ("10") = outptr; \ ++ register size_t outlen asm("11") = outend - outptr; \ ++ unsigned long tmp, tmp2, tmp3; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ /* Setup to check for surrogates. */ \ ++ " larl %[R_TMP],9f\n\t" \ ++ " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \ ++ CONVERT_32BIT_SIZE_T ([R_INLEN]) \ ++ CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \ ++ /* Loop which handles UTF-16 chars \ ++ ch < 0xd800 || (ch > 0xdfff && ch < 0x10000). */ \ ++ "0: clgijl %[R_INLEN],32,20f\n\t" \ ++ " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \ ++ " lghi %[R_TMP2],0\n\t" \ ++ /* Shorten to UTF-16. */ \ ++ " vpkf %%v18,%%v16,%%v17\n\t" \ ++ /* Check for surrogate chars. */ \ ++ " vstrcfs %%v19,%%v16,%%v30,%%v31\n\t" \ ++ " jno 10f\n\t" \ ++ " vstrcfs %%v19,%%v17,%%v30,%%v31\n\t" \ ++ " jno 11f\n\t" \ ++ /* Store 16 bytes to buf_out. */ \ ++ " vst %%v18,0(%[R_OUT])\n\t" \ ++ " la %[R_IN],32(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-32\n\t" \ ++ " aghi %[R_OUTLEN],-16\n\t" \ ++ " la %[R_OUT],16(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],32,20f\n\t" \ ++ " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ " j 1b\n\t" \ ++ /* Setup to check for ch >= 0xd800 && ch <= 0xdfff \ ++ and check for ch >= 0x10000. (v30, v31) */ \ ++ "9: .long 0xd800,0xdfff,0x10000,0x10000\n\t" \ ++ " .long 0xa0000000,0xc0000000, 0xa0000000,0xa0000000\n\t" \ ++ /* At least on UTF32 char is in range of surrogates. \ ++ Store the preceding characters. */ \ ++ "11: ahi %[R_TMP2],16\n\t" \ ++ "10: vlgvb %[R_TMP],%%v19,7\n\t" \ ++ " agr %[R_TMP],%[R_TMP2]\n\t" \ ++ " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ ++ " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ ++ " jl 20f\n\t" \ ++ " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ ++ /* Update pointers. */ \ ++ " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_TMP]\n\t" \ ++ " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ ++ " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ ++ /* Handles UTF16 surrogates with convert instruction. */ \ ++ "20: cu42 %[R_OUT],%[R_IN]\n\t" \ ++ " jo 0b\n\t" /* Try vector implemenation again. */ \ ++ " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ ++ " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (pInput) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ ++ ); \ ++ inptr = pInput; \ ++ outptr = pOutput; \ ++ \ ++ if (__glibc_likely (inptr == inend) \ ++ || result == __GCONV_FULL_OUTPUT) \ ++ break; \ ++ if (inptr + 4 > inend) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } ++ ++/* Generate loop-function with software routing. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++#define LOOPFCT __to_utf16_loop_c ++#define LOOP_NEED_FLAGS ++#define BODY BODY_TO_C ++#include ++ ++/* Generate loop-function with hardware utf-convert instruction. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++#define LOOPFCT __to_utf16_loop_etf3eh ++#define LOOP_NEED_FLAGS ++#define BODY BODY_TO_ETF3EH ++#include ++ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++/* Generate loop-function with hardware vector instructions. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_TO ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++# define LOOPFCT __to_utf16_loop_vx ++# define LOOP_NEED_FLAGS ++# define BODY BODY_TO_VX ++# include ++#endif ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__to_utf16_loop_c) ++__attribute__ ((ifunc ("__to_utf16_loop_resolver"))) ++__to_utf16_loop; ++ ++static void * ++__to_utf16_loop_resolver (unsigned long int dl_hwcap) ++{ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __to_utf16_loop_vx; ++ else ++#endif ++ if (dl_hwcap & HWCAP_S390_ZARCH && dl_hwcap & HWCAP_S390_HIGH_GPRS ++ && dl_hwcap & HWCAP_S390_ETF3EH) ++ return __to_utf16_loop_etf3eh; ++ else ++ return __to_utf16_loop_c; ++} ++ ++strong_alias (__to_utf16_loop_c_single, __to_utf16_loop_single) ++ ++ ++#include +diff --git a/sysdeps/s390/utf8-utf16-z9.c b/sysdeps/s390/utf8-utf16-z9.c +new file mode 100644 +index 0000000..d3dc9bd +--- /dev/null ++++ b/sysdeps/s390/utf8-utf16-z9.c +@@ -0,0 +1,818 @@ ++/* Conversion between UTF-16 and UTF-32 BE/internal. ++ ++ This module uses the Z9-109 variants of the Convert Unicode ++ instructions. ++ Copyright (C) 1997-2016 Free Software Foundation, Inc. ++ ++ Author: Andreas Krebbel ++ Based on the work by Ulrich Drepper , 1997. ++ ++ Thanks to Daniel Appich who covered the relevant performance work ++ in his diploma thesis. ++ ++ This is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ This is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#if defined HAVE_S390_VX_GCC_SUPPORT ++# define ASM_CLOBBER_VR(NR) , NR ++#else ++# define ASM_CLOBBER_VR(NR) ++#endif ++ ++#if defined __s390x__ ++# define CONVERT_32BIT_SIZE_T(REG) ++#else ++# define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t" ++#endif ++ ++/* Defines for skeleton.c. */ ++#define DEFINE_INIT 0 ++#define DEFINE_FINI 0 ++#define MIN_NEEDED_FROM 1 ++#define MAX_NEEDED_FROM 4 ++#define MIN_NEEDED_TO 2 ++#define MAX_NEEDED_TO 4 ++#define FROM_LOOP __from_utf8_loop ++#define TO_LOOP __to_utf8_loop ++#define FROM_DIRECTION (dir == from_utf8) ++#define ONE_DIRECTION 0 ++ ++ ++/* UTF-16 big endian byte order mark. */ ++#define BOM_UTF16 0xfeff ++ ++/* Direction of the transformation. */ ++enum direction ++{ ++ illegal_dir, ++ to_utf8, ++ from_utf8 ++}; ++ ++struct utf8_data ++{ ++ enum direction dir; ++ int emit_bom; ++}; ++ ++ ++extern int gconv_init (struct __gconv_step *step); ++int ++gconv_init (struct __gconv_step *step) ++{ ++ /* Determine which direction. */ ++ struct utf8_data *new_data; ++ enum direction dir = illegal_dir; ++ int emit_bom; ++ int result; ++ ++ emit_bom = (__strcasecmp (step->__to_name, "UTF-16//") == 0); ++ ++ if (__strcasecmp (step->__from_name, "ISO-10646/UTF8/") == 0 ++ && (__strcasecmp (step->__to_name, "UTF-16//") == 0 ++ || __strcasecmp (step->__to_name, "UTF-16BE//") == 0)) ++ { ++ dir = from_utf8; ++ } ++ else if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0 ++ && __strcasecmp (step->__to_name, "ISO-10646/UTF8/") == 0) ++ { ++ dir = to_utf8; ++ } ++ ++ result = __GCONV_NOCONV; ++ if (dir != illegal_dir) ++ { ++ new_data = (struct utf8_data *) malloc (sizeof (struct utf8_data)); ++ ++ result = __GCONV_NOMEM; ++ if (new_data != NULL) ++ { ++ new_data->dir = dir; ++ new_data->emit_bom = emit_bom; ++ step->__data = new_data; ++ ++ if (dir == from_utf8) ++ { ++ step->__min_needed_from = MIN_NEEDED_FROM; ++ step->__max_needed_from = MIN_NEEDED_FROM; ++ step->__min_needed_to = MIN_NEEDED_TO; ++ step->__max_needed_to = MIN_NEEDED_TO; ++ } ++ else ++ { ++ step->__min_needed_from = MIN_NEEDED_TO; ++ step->__max_needed_from = MIN_NEEDED_TO; ++ step->__min_needed_to = MIN_NEEDED_FROM; ++ step->__max_needed_to = MIN_NEEDED_FROM; ++ } ++ ++ step->__stateful = 0; ++ ++ result = __GCONV_OK; ++ } ++ } ++ ++ return result; ++} ++ ++ ++extern void gconv_end (struct __gconv_step *data); ++void ++gconv_end (struct __gconv_step *data) ++{ ++ free (data->__data); ++} ++ ++/* The macro for the hardware loop. This is used for both ++ directions. */ ++#define HARDWARE_CONVERT(INSTRUCTION) \ ++ { \ ++ register const unsigned char* pInput __asm__ ("8") = inptr; \ ++ register size_t inlen __asm__ ("9") = inend - inptr; \ ++ register unsigned char* pOutput __asm__ ("10") = outptr; \ ++ register size_t outlen __asm__("11") = outend - outptr; \ ++ unsigned long cc = 0; \ ++ \ ++ __asm__ __volatile__ (".machine push \n\t" \ ++ ".machine \"z9-109\" \n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ "0: " INSTRUCTION " \n\t" \ ++ ".machine pop \n\t" \ ++ " jo 0b \n\t" \ ++ " ipm %2 \n" \ ++ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ ++ "+d" (outlen), "+d" (inlen) \ ++ : \ ++ : "cc", "memory"); \ ++ \ ++ inptr = pInput; \ ++ outptr = pOutput; \ ++ cc >>= 28; \ ++ \ ++ if (cc == 1) \ ++ { \ ++ result = __GCONV_FULL_OUTPUT; \ ++ } \ ++ else if (cc == 2) \ ++ { \ ++ result = __GCONV_ILLEGAL_INPUT; \ ++ } \ ++ } ++ ++#define PREPARE_LOOP \ ++ enum direction dir = ((struct utf8_data *) step->__data)->dir; \ ++ int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \ ++ \ ++ if (emit_bom && !data->__internal_use \ ++ && data->__invocation_counter == 0) \ ++ { \ ++ /* Emit the UTF-16 Byte Order Mark. */ \ ++ if (__glibc_unlikely (outbuf + 2 > outend)) \ ++ return __GCONV_FULL_OUTPUT; \ ++ \ ++ put16u (outbuf, BOM_UTF16); \ ++ outbuf += 2; \ ++ } ++ ++/* Conversion function from UTF-8 to UTF-16. */ ++#define BODY_FROM_HW(ASM) \ ++ { \ ++ ASM; \ ++ if (__glibc_likely (inptr == inend) \ ++ || result == __GCONV_FULL_OUTPUT) \ ++ break; \ ++ \ ++ int i; \ ++ for (i = 1; inptr + i < inend && i < 5; ++i) \ ++ if ((inptr[i] & 0xc0) != 0x80) \ ++ break; \ ++ \ ++ if (__glibc_likely (inptr + i == inend \ ++ && result == __GCONV_EMPTY_INPUT)) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (i); \ ++ } ++ ++#define BODY_FROM_ETF3EH BODY_FROM_HW (HARDWARE_CONVERT ("cu12 %0, %1, 1")) ++ ++#define HW_FROM_VX \ ++ { \ ++ register const unsigned char* pInput asm ("8") = inptr; \ ++ register size_t inlen asm ("9") = inend - inptr; \ ++ register unsigned char* pOutput asm ("10") = outptr; \ ++ register size_t outlen asm("11") = outend - outptr; \ ++ unsigned long tmp, tmp2, tmp3; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ " vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \ ++ " vrepib %%v31,0x20\n\t" \ ++ CONVERT_32BIT_SIZE_T ([R_INLEN]) \ ++ CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \ ++ /* Loop which handles UTF-8 chars <=0x7f. */ \ ++ "0: clgijl %[R_INLEN],16,20f\n\t" \ ++ " clgijl %[R_OUTLEN],32,20f\n\t" \ ++ "1: vl %%v16,0(%[R_IN])\n\t" \ ++ " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ ++ " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ ++ UTF8 chars. */ \ ++ /* Enlarge to UTF-16. */ \ ++ " vuplhb %%v18,%%v16\n\t" \ ++ " la %[R_IN],16(%[R_IN])\n\t" \ ++ " vupllb %%v19,%%v16\n\t" \ ++ " aghi %[R_INLEN],-16\n\t" \ ++ /* Store 32 bytes to buf_out. */ \ ++ " vstm %%v18,%%v19,0(%[R_OUT])\n\t" \ ++ " aghi %[R_OUTLEN],-32\n\t" \ ++ " la %[R_OUT],32(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],16,20f\n\t" \ ++ " clgijl %[R_OUTLEN],32,20f\n\t" \ ++ " j 1b\n\t" \ ++ "10:\n\t" \ ++ /* At least one byte is > 0x7f. \ ++ Store the preceding 1-byte chars. */ \ ++ " vlgvb %[R_TMP],%%v17,7\n\t" \ ++ " sllk %[R_TMP2],%[R_TMP],1\n\t" /* Compute highest \ ++ index to store. */ \ ++ " llgfr %[R_TMP3],%[R_TMP2]\n\t" \ ++ " ahi %[R_TMP2],-1\n\t" \ ++ " jl 20f\n\t" \ ++ " vuplhb %%v18,%%v16\n\t" \ ++ " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ ++ " ahi %[R_TMP2],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllb %%v19,%%v16\n\t" \ ++ " vstl %%v19,%[R_TMP2],16(%[R_OUT])\n\t" \ ++ "11: \n\t" /* Update pointers. */ \ ++ " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_TMP]\n\t" \ ++ " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ ++ " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ ++ /* Handle multibyte utf8-char with convert instruction. */ \ ++ "20: cu12 %[R_OUT],%[R_IN],1\n\t" \ ++ " jo 0b\n\t" /* Try vector implemenation again. */ \ ++ " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ ++ " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (pInput) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ ++ ); \ ++ inptr = pInput; \ ++ outptr = pOutput; \ ++ } ++#define BODY_FROM_VX BODY_FROM_HW (HW_FROM_VX) ++ ++ ++/* The software implementation is based on the code in gconv_simple.c. */ ++#define BODY_FROM_C \ ++ { \ ++ /* Next input byte. */ \ ++ uint16_t ch = *inptr; \ ++ \ ++ if (__glibc_likely (ch < 0x80)) \ ++ { \ ++ /* One byte sequence. */ \ ++ ++inptr; \ ++ } \ ++ else \ ++ { \ ++ uint_fast32_t cnt; \ ++ uint_fast32_t i; \ ++ \ ++ if (ch >= 0xc2 && ch < 0xe0) \ ++ { \ ++ /* We expect two bytes. The first byte cannot be 0xc0 \ ++ or 0xc1, otherwise the wide character could have been \ ++ represented using a single byte. */ \ ++ cnt = 2; \ ++ ch &= 0x1f; \ ++ } \ ++ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ ++ { \ ++ /* We expect three bytes. */ \ ++ cnt = 3; \ ++ ch &= 0x0f; \ ++ } \ ++ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ ++ { \ ++ /* We expect four bytes. */ \ ++ cnt = 4; \ ++ ch &= 0x07; \ ++ } \ ++ else \ ++ { \ ++ /* Search the end of this ill-formed UTF-8 character. This \ ++ is the next byte with (x & 0xc0) != 0x80. */ \ ++ i = 0; \ ++ do \ ++ ++i; \ ++ while (inptr + i < inend \ ++ && (*(inptr + i) & 0xc0) == 0x80 \ ++ && i < 5); \ ++ \ ++ errout: \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (i); \ ++ } \ ++ \ ++ if (__glibc_unlikely (inptr + cnt > inend)) \ ++ { \ ++ /* We don't have enough input. But before we report \ ++ that check that all the bytes are correct. */ \ ++ for (i = 1; inptr + i < inend; ++i) \ ++ if ((inptr[i] & 0xc0) != 0x80) \ ++ break; \ ++ \ ++ if (__glibc_likely (inptr + i == inend)) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ \ ++ goto errout; \ ++ } \ ++ \ ++ if (cnt == 4) \ ++ { \ ++ /* For 4 byte UTF-8 chars two UTF-16 chars (high and \ ++ low) are needed. */ \ ++ uint16_t zabcd, high, low; \ ++ \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ ++ { \ ++ /* Overflow in the output buffer. */ \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ \ ++ /* Check if tail-bytes >= 0x80, < 0xc0. */ \ ++ for (i = 1; i < cnt; ++i) \ ++ { \ ++ if ((inptr[i] & 0xc0) != 0x80) \ ++ /* This is an illegal encoding. */ \ ++ goto errout; \ ++ } \ ++ \ ++ /* See Principles of Operations cu12. */ \ ++ zabcd = (((inptr[0] & 0x7) << 2) | \ ++ ((inptr[1] & 0x30) >> 4)) - 1; \ ++ \ ++ /* z-bit must be zero after subtracting 1. */ \ ++ if (zabcd & 0x10) \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (4) \ ++ \ ++ high = (uint16_t)(0xd8 << 8); /* high surrogate id */ \ ++ high |= zabcd << 6; /* abcd bits */ \ ++ high |= (inptr[1] & 0xf) << 2; /* efgh bits */ \ ++ high |= (inptr[2] & 0x30) >> 4; /* ij bits */ \ ++ \ ++ low = (uint16_t)(0xdc << 8); /* low surrogate id */ \ ++ low |= ((uint16_t)inptr[2] & 0xc) << 6; /* kl bits */ \ ++ low |= (inptr[2] & 0x3) << 6; /* mn bits */ \ ++ low |= inptr[3] & 0x3f; /* opqrst bits */ \ ++ \ ++ put16 (outptr, high); \ ++ outptr += 2; \ ++ put16 (outptr, low); \ ++ outptr += 2; \ ++ inptr += 4; \ ++ continue; \ ++ } \ ++ else \ ++ { \ ++ /* Read the possible remaining bytes. */ \ ++ for (i = 1; i < cnt; ++i) \ ++ { \ ++ uint16_t byte = inptr[i]; \ ++ \ ++ if ((byte & 0xc0) != 0x80) \ ++ /* This is an illegal encoding. */ \ ++ break; \ ++ \ ++ ch <<= 6; \ ++ ch |= byte & 0x3f; \ ++ } \ ++ \ ++ /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ ++ If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ ++ have been represented with fewer than cnt bytes. */ \ ++ if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ ++ /* Do not accept UTF-16 surrogates. */ \ ++ || (ch >= 0xd800 && ch <= 0xdfff)) \ ++ { \ ++ /* This is an illegal encoding. */ \ ++ goto errout; \ ++ } \ ++ \ ++ inptr += cnt; \ ++ } \ ++ } \ ++ /* Now adjust the pointers and store the result. */ \ ++ *((uint16_t *) outptr) = ch; \ ++ outptr += sizeof (uint16_t); \ ++ } ++ ++/* Generate loop-function with software implementation. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO ++#define LOOPFCT __from_utf8_loop_c ++#define LOOP_NEED_FLAGS ++#define BODY BODY_FROM_C ++#include ++ ++/* Generate loop-function with hardware utf-convert instruction. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO ++#define LOOPFCT __from_utf8_loop_etf3eh ++#define LOOP_NEED_FLAGS ++#define BODY BODY_FROM_ETF3EH ++#include ++ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++/* Generate loop-function with hardware vector and utf-convert instructions. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++# define MAX_NEEDED_OUTPUT MAX_NEEDED_TO ++# define LOOPFCT __from_utf8_loop_vx ++# define LOOP_NEED_FLAGS ++# define BODY BODY_FROM_VX ++# include ++#endif ++ ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__from_utf8_loop_c) ++__attribute__ ((ifunc ("__from_utf8_loop_resolver"))) ++__from_utf8_loop; ++ ++static void * ++__from_utf8_loop_resolver (unsigned long int dl_hwcap) ++{ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __from_utf8_loop_vx; ++ else ++#endif ++ if (dl_hwcap & HWCAP_S390_ZARCH && dl_hwcap & HWCAP_S390_HIGH_GPRS ++ && dl_hwcap & HWCAP_S390_ETF3EH) ++ return __from_utf8_loop_etf3eh; ++ else ++ return __from_utf8_loop_c; ++} ++ ++strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) ++ ++/* Conversion from UTF-16 to UTF-8. */ ++ ++/* The software routine is based on the functionality of the S/390 ++ hardware instruction (cu21) as described in the Principles of ++ Operation. */ ++#define BODY_TO_C \ ++ { \ ++ uint16_t c = get16 (inptr); \ ++ \ ++ if (__glibc_likely (c <= 0x007f)) \ ++ { \ ++ /* Single byte UTF-8 char. */ \ ++ *outptr = c & 0xff; \ ++ outptr++; \ ++ } \ ++ else if (c >= 0x0080 && c <= 0x07ff) \ ++ { \ ++ /* Two byte UTF-8 char. */ \ ++ \ ++ if (__glibc_unlikely (outptr + 2 > outend)) \ ++ { \ ++ /* Overflow in the output buffer. */ \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ \ ++ outptr[0] = 0xc0; \ ++ outptr[0] |= c >> 6; \ ++ \ ++ outptr[1] = 0x80; \ ++ outptr[1] |= c & 0x3f; \ ++ \ ++ outptr += 2; \ ++ } \ ++ else if ((c >= 0x0800 && c <= 0xd7ff) || c > 0xdfff) \ ++ { \ ++ /* Three byte UTF-8 char. */ \ ++ \ ++ if (__glibc_unlikely (outptr + 3 > outend)) \ ++ { \ ++ /* Overflow in the output buffer. */ \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ outptr[0] = 0xe0; \ ++ outptr[0] |= c >> 12; \ ++ \ ++ outptr[1] = 0x80; \ ++ outptr[1] |= (c >> 6) & 0x3f; \ ++ \ ++ outptr[2] = 0x80; \ ++ outptr[2] |= c & 0x3f; \ ++ \ ++ outptr += 3; \ ++ } \ ++ else if (c >= 0xd800 && c <= 0xdbff) \ ++ { \ ++ /* Four byte UTF-8 char. */ \ ++ uint16_t low, uvwxy; \ ++ \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ ++ { \ ++ /* Overflow in the output buffer. */ \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ if (__glibc_unlikely (inptr + 4 > inend)) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ \ ++ inptr += 2; \ ++ low = get16 (inptr); \ ++ \ ++ if ((low & 0xfc00) != 0xdc00) \ ++ { \ ++ inptr -= 2; \ ++ STANDARD_TO_LOOP_ERR_HANDLER (2); \ ++ } \ ++ uvwxy = ((c >> 6) & 0xf) + 1; \ ++ outptr[0] = 0xf0; \ ++ outptr[0] |= uvwxy >> 2; \ ++ \ ++ outptr[1] = 0x80; \ ++ outptr[1] |= (uvwxy << 4) & 0x30; \ ++ outptr[1] |= (c >> 2) & 0x0f; \ ++ \ ++ outptr[2] = 0x80; \ ++ outptr[2] |= (c & 0x03) << 4; \ ++ outptr[2] |= (low >> 6) & 0x0f; \ ++ \ ++ outptr[3] = 0x80; \ ++ outptr[3] |= low & 0x3f; \ ++ \ ++ outptr += 4; \ ++ } \ ++ else \ ++ { \ ++ STANDARD_TO_LOOP_ERR_HANDLER (2); \ ++ } \ ++ inptr += 2; \ ++ } ++ ++#define BODY_TO_VX \ ++ { \ ++ size_t inlen = inend - inptr; \ ++ size_t outlen = outend - outptr; \ ++ unsigned long tmp, tmp2, tmp3; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ /* Setup to check for values <= 0x7f. */ \ ++ " larl %[R_TMP],9f\n\t" \ ++ " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \ ++ CONVERT_32BIT_SIZE_T ([R_INLEN]) \ ++ CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \ ++ /* Loop which handles UTF-16 chars <=0x7f. */ \ ++ "0: clgijl %[R_INLEN],32,2f\n\t" \ ++ " clgijl %[R_OUTLEN],16,2f\n\t" \ ++ "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \ ++ " lghi %[R_TMP2],0\n\t" \ ++ /* Check for > 1byte UTF-8 chars. */ \ ++ " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \ ++ " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ ++ UTF8 chars. */ \ ++ " vstrchs %%v19,%%v17,%%v30,%%v31\n\t" \ ++ " jno 11f\n\t" /* Jump away if not all bytes are 1byte \ ++ UTF8 chars. */ \ ++ /* Shorten to UTF-8. */ \ ++ " vpkh %%v18,%%v16,%%v17\n\t" \ ++ " la %[R_IN],32(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-32\n\t" \ ++ /* Store 16 bytes to buf_out. */ \ ++ " vst %%v18,0(%[R_OUT])\n\t" \ ++ " aghi %[R_OUTLEN],-16\n\t" \ ++ " la %[R_OUT],16(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],32,2f\n\t" \ ++ " clgijl %[R_OUTLEN],16,2f\n\t" \ ++ " j 1b\n\t" \ ++ /* Setup to check for ch > 0x7f. (v30, v31) */ \ ++ "9: .short 0x7f,0x7f,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ " .short 0x2000,0x2000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ /* At least one byte is > 0x7f. \ ++ Store the preceding 1-byte chars. */ \ ++ "11: lghi %[R_TMP2],16\n\t" /* match was found in v17. */ \ ++ "10:\n\t" \ ++ " vlgvb %[R_TMP],%%v19,7\n\t" \ ++ /* Shorten to UTF-8. */ \ ++ " vpkh %%v18,%%v16,%%v17\n\t" \ ++ " ar %[R_TMP],%[R_TMP2]\n\t" /* Number of in bytes. */ \ ++ " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ ++ " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ ++ " jl 13f\n\t" \ ++ " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ ++ /* Update pointers. */ \ ++ " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_TMP]\n\t" \ ++ " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ ++ " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ ++ "13: \n\t" \ ++ /* Calculate remaining uint16_t values in loaded vrs. */ \ ++ " lghi %[R_TMP2],16\n\t" \ ++ " slgr %[R_TMP2],%[R_TMP3]\n\t" \ ++ " llh %[R_TMP],0(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-2\n\t" \ ++ " j 22f\n\t" \ ++ /* Handle remaining bytes. */ \ ++ "2: \n\t" \ ++ /* Zero, one or more bytes available? */ \ ++ " clgfi %[R_INLEN],1\n\t" \ ++ " locghie %[R_RES],%[RES_IN_FULL]\n\t" /* Only one byte. */ \ ++ " jle 99f\n\t" /* End if less than two bytes. */ \ ++ /* Calculate remaining uint16_t values in inptr. */ \ ++ " srlg %[R_TMP2],%[R_INLEN],1\n\t" \ ++ /* Handle multibyte utf8-char. */ \ ++ "20: llh %[R_TMP],0(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-2\n\t" \ ++ /* Test if ch is 1-byte UTF-8 char. */ \ ++ "21: clijh %[R_TMP],0x7f,22f\n\t" \ ++ /* Handle 1-byte UTF-8 char. */ \ ++ "31: slgfi %[R_OUTLEN],1\n\t" \ ++ " jl 90f \n\t" \ ++ " stc %[R_TMP],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],2(%[R_IN])\n\t" \ ++ " la %[R_OUT],1(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Test if ch is 2-byte UTF-8 char. */ \ ++ "22: clfi %[R_TMP],0x7ff\n\t" \ ++ " jh 23f\n\t" \ ++ /* Handle 2-byte UTF-8 char. */ \ ++ "32: slgfi %[R_OUTLEN],2\n\t" \ ++ " jl 90f \n\t" \ ++ " llill %[R_TMP3],0xc080\n\t" \ ++ " la %[R_IN],2(%[R_IN])\n\t" \ ++ " risbgn %[R_TMP3],%[R_TMP],51,55,2\n\t" /* 1. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 2. byte. */ \ ++ " sth %[R_TMP3],0(%[R_OUT])\n\t" \ ++ " la %[R_OUT],2(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Test if ch is 3-byte UTF-8 char. */ \ ++ "23: clfi %[R_TMP],0xd7ff\n\t" \ ++ " jh 24f\n\t" \ ++ /* Handle 3-byte UTF-8 char. */ \ ++ "33: slgfi %[R_OUTLEN],3\n\t" \ ++ " jl 90f \n\t" \ ++ " llilf %[R_TMP3],0xe08080\n\t" \ ++ " la %[R_IN],2(%[R_IN])\n\t" \ ++ " risbgn %[R_TMP3],%[R_TMP],44,47,4\n\t" /* 1. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],50,55,2\n\t" /* 2. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 3. byte. */ \ ++ " stcm %[R_TMP3],7,0(%[R_OUT])\n\t" \ ++ " la %[R_OUT],3(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Test if ch is 4-byte UTF-8 char. */ \ ++ "24: clfi %[R_TMP],0xdfff\n\t" \ ++ " jh 33b\n\t" /* Handle this 3-byte UTF-8 char. */ \ ++ " clfi %[R_TMP],0xdbff\n\t" \ ++ " locghih %[R_RES],%[RES_IN_ILL]\n\t" \ ++ " jh 99f\n\t" /* Jump away if this is a low surrogate \ ++ without a preceding high surrogate. */ \ ++ /* Handle 4-byte UTF-8 char. */ \ ++ "34: slgfi %[R_OUTLEN],4\n\t" \ ++ " jl 90f \n\t" \ ++ " slgfi %[R_INLEN],2\n\t" \ ++ " locghil %[R_RES],%[RES_IN_FULL]\n\t" \ ++ " jl 99f\n\t" /* Jump away if low surrogate is missing. */ \ ++ " llilf %[R_TMP3],0xf0808080\n\t" \ ++ " aghi %[R_TMP],0x40\n\t" \ ++ " risbgn %[R_TMP3],%[R_TMP],37,39,16\n\t" /* 1. byte: uvw */ \ ++ " risbgn %[R_TMP3],%[R_TMP],42,43,14\n\t" /* 2. byte: xy */ \ ++ " risbgn %[R_TMP3],%[R_TMP],44,47,14\n\t" /* 2. byte: efgh */ \ ++ " risbgn %[R_TMP3],%[R_TMP],50,51,12\n\t" /* 3. byte: ij */ \ ++ " llh %[R_TMP],2(%[R_IN])\n\t" /* Load low surrogate. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],52,55,2\n\t" /* 3. byte: klmn */ \ ++ " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 4. byte: opqrst */ \ ++ " nilf %[R_TMP],0xfc00\n\t" \ ++ " clfi %[R_TMP],0xdc00\n\t" /* Check if it starts with 0xdc00. */ \ ++ " locghine %[R_RES],%[RES_IN_ILL]\n\t" \ ++ " jne 99f\n\t" /* Jump away if low surrogate is invalid. */ \ ++ " st %[R_TMP3],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],4(%[R_IN])\n\t" \ ++ " la %[R_OUT],4(%[R_OUT])\n\t" \ ++ " aghi %[R_TMP2],-2\n\t" \ ++ " jh 20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Exit with __GCONV_FULL_OUTPUT. */ \ ++ "90: lghi %[R_RES],%[RES_OUT_FULL]\n\t" \ ++ "99: \n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (inptr) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ ++ ); \ ++ if (__glibc_likely (inptr == inend) \ ++ || result != __GCONV_ILLEGAL_INPUT) \ ++ break; \ ++ \ ++ STANDARD_TO_LOOP_ERR_HANDLER (2); \ ++ } ++ ++/* Generate loop-function with software implementation. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MAX_NEEDED_INPUT MAX_NEEDED_TO ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++#if defined HAVE_S390_VX_ASM_SUPPORT ++# define LOOPFCT __to_utf8_loop_c ++# define BODY BODY_TO_C ++# define LOOP_NEED_FLAGS ++# include ++ ++/* Generate loop-function with software implementation. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_TO ++# define MAX_NEEDED_INPUT MAX_NEEDED_TO ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++# define LOOPFCT __to_utf8_loop_vx ++# define BODY BODY_TO_VX ++# define LOOP_NEED_FLAGS ++# include ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__to_utf8_loop_c) ++__attribute__ ((ifunc ("__to_utf8_loop_resolver"))) ++__to_utf8_loop; ++ ++static void * ++__to_utf8_loop_resolver (unsigned long int dl_hwcap) ++{ ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __to_utf8_loop_vx; ++ else ++ return __to_utf8_loop_c; ++} ++ ++strong_alias (__to_utf8_loop_c_single, __to_utf8_loop_single) ++ ++#else ++# define LOOPFCT TO_LOOP ++# define BODY BODY_TO_C ++# define LOOP_NEED_FLAGS ++# include ++#endif /* !HAVE_S390_VX_ASM_SUPPORT */ ++ ++#include +diff --git a/sysdeps/s390/utf8-utf32-z9.c b/sysdeps/s390/utf8-utf32-z9.c +new file mode 100644 +index 0000000..e39e0a7 +--- /dev/null ++++ b/sysdeps/s390/utf8-utf32-z9.c +@@ -0,0 +1,820 @@ ++/* Conversion between UTF-8 and UTF-32 BE/internal. ++ ++ This module uses the Z9-109 variants of the Convert Unicode ++ instructions. ++ Copyright (C) 1997-2016 Free Software Foundation, Inc. ++ ++ Author: Andreas Krebbel ++ Based on the work by Ulrich Drepper , 1997. ++ ++ Thanks to Daniel Appich who covered the relevant performance work ++ in his diploma thesis. ++ ++ This is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ This is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#if defined HAVE_S390_VX_GCC_SUPPORT ++# define ASM_CLOBBER_VR(NR) , NR ++#else ++# define ASM_CLOBBER_VR(NR) ++#endif ++ ++#if defined __s390x__ ++# define CONVERT_32BIT_SIZE_T(REG) ++#else ++# define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t" ++#endif ++ ++/* Defines for skeleton.c. */ ++#define DEFINE_INIT 0 ++#define DEFINE_FINI 0 ++#define MIN_NEEDED_FROM 1 ++#define MAX_NEEDED_FROM 6 ++#define MIN_NEEDED_TO 4 ++#define FROM_LOOP __from_utf8_loop ++#define TO_LOOP __to_utf8_loop ++#define FROM_DIRECTION (dir == from_utf8) ++#define ONE_DIRECTION 0 ++ ++/* UTF-32 big endian byte order mark. */ ++#define BOM 0x0000feffu ++ ++/* Direction of the transformation. */ ++enum direction ++{ ++ illegal_dir, ++ to_utf8, ++ from_utf8 ++}; ++ ++struct utf8_data ++{ ++ enum direction dir; ++ int emit_bom; ++}; ++ ++ ++extern int gconv_init (struct __gconv_step *step); ++int ++gconv_init (struct __gconv_step *step) ++{ ++ /* Determine which direction. */ ++ struct utf8_data *new_data; ++ enum direction dir = illegal_dir; ++ int emit_bom; ++ int result; ++ ++ emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0); ++ ++ if (__strcasecmp (step->__from_name, "ISO-10646/UTF8/") == 0 ++ && (__strcasecmp (step->__to_name, "UTF-32//") == 0 ++ || __strcasecmp (step->__to_name, "UTF-32BE//") == 0 ++ || __strcasecmp (step->__to_name, "INTERNAL") == 0)) ++ { ++ dir = from_utf8; ++ } ++ else if (__strcasecmp (step->__to_name, "ISO-10646/UTF8/") == 0 ++ && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0 ++ || __strcasecmp (step->__from_name, "INTERNAL") == 0)) ++ { ++ dir = to_utf8; ++ } ++ ++ result = __GCONV_NOCONV; ++ if (dir != illegal_dir) ++ { ++ new_data = (struct utf8_data *) malloc (sizeof (struct utf8_data)); ++ ++ result = __GCONV_NOMEM; ++ if (new_data != NULL) ++ { ++ new_data->dir = dir; ++ new_data->emit_bom = emit_bom; ++ step->__data = new_data; ++ ++ if (dir == from_utf8) ++ { ++ step->__min_needed_from = MIN_NEEDED_FROM; ++ step->__max_needed_from = MIN_NEEDED_FROM; ++ step->__min_needed_to = MIN_NEEDED_TO; ++ step->__max_needed_to = MIN_NEEDED_TO; ++ } ++ else ++ { ++ step->__min_needed_from = MIN_NEEDED_TO; ++ step->__max_needed_from = MIN_NEEDED_TO; ++ step->__min_needed_to = MIN_NEEDED_FROM; ++ step->__max_needed_to = MIN_NEEDED_FROM; ++ } ++ ++ step->__stateful = 0; ++ ++ result = __GCONV_OK; ++ } ++ } ++ ++ return result; ++} ++ ++ ++extern void gconv_end (struct __gconv_step *data); ++void ++gconv_end (struct __gconv_step *data) ++{ ++ free (data->__data); ++} ++ ++/* The macro for the hardware loop. This is used for both ++ directions. */ ++#define HARDWARE_CONVERT(INSTRUCTION) \ ++ { \ ++ register const unsigned char* pInput __asm__ ("8") = inptr; \ ++ register size_t inlen __asm__ ("9") = inend - inptr; \ ++ register unsigned char* pOutput __asm__ ("10") = outptr; \ ++ register size_t outlen __asm__("11") = outend - outptr; \ ++ unsigned long cc = 0; \ ++ \ ++ __asm__ __volatile__ (".machine push \n\t" \ ++ ".machine \"z9-109\" \n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ "0: " INSTRUCTION " \n\t" \ ++ ".machine pop \n\t" \ ++ " jo 0b \n\t" \ ++ " ipm %2 \n" \ ++ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ ++ "+d" (outlen), "+d" (inlen) \ ++ : \ ++ : "cc", "memory"); \ ++ \ ++ inptr = pInput; \ ++ outptr = pOutput; \ ++ cc >>= 28; \ ++ \ ++ if (cc == 1) \ ++ { \ ++ result = __GCONV_FULL_OUTPUT; \ ++ } \ ++ else if (cc == 2) \ ++ { \ ++ result = __GCONV_ILLEGAL_INPUT; \ ++ } \ ++ } ++ ++#define PREPARE_LOOP \ ++ enum direction dir = ((struct utf8_data *) step->__data)->dir; \ ++ int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \ ++ \ ++ if (emit_bom && !data->__internal_use \ ++ && data->__invocation_counter == 0) \ ++ { \ ++ /* Emit the Byte Order Mark. */ \ ++ if (__glibc_unlikely (outbuf + 4 > outend)) \ ++ return __GCONV_FULL_OUTPUT; \ ++ \ ++ put32u (outbuf, BOM); \ ++ outbuf += 4; \ ++ } ++ ++/* Conversion function from UTF-8 to UTF-32 internal/BE. */ ++ ++#define STORE_REST_COMMON \ ++ { \ ++ /* We store the remaining bytes while converting them into the UCS4 \ ++ format. We can assume that the first byte in the buffer is \ ++ correct and that it requires a larger number of bytes than there \ ++ are in the input buffer. */ \ ++ wint_t ch = **inptrp; \ ++ size_t cnt, r; \ ++ \ ++ state->__count = inend - *inptrp; \ ++ \ ++ assert (ch != 0xc0 && ch != 0xc1); \ ++ if (ch >= 0xc2 && ch < 0xe0) \ ++ { \ ++ /* We expect two bytes. The first byte cannot be 0xc0 or \ ++ 0xc1, otherwise the wide character could have been \ ++ represented using a single byte. */ \ ++ cnt = 2; \ ++ ch &= 0x1f; \ ++ } \ ++ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ ++ { \ ++ /* We expect three bytes. */ \ ++ cnt = 3; \ ++ ch &= 0x0f; \ ++ } \ ++ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ ++ { \ ++ /* We expect four bytes. */ \ ++ cnt = 4; \ ++ ch &= 0x07; \ ++ } \ ++ else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ ++ { \ ++ /* We expect five bytes. */ \ ++ cnt = 5; \ ++ ch &= 0x03; \ ++ } \ ++ else \ ++ { \ ++ /* We expect six bytes. */ \ ++ cnt = 6; \ ++ ch &= 0x01; \ ++ } \ ++ \ ++ /* The first byte is already consumed. */ \ ++ r = cnt - 1; \ ++ while (++(*inptrp) < inend) \ ++ { \ ++ ch <<= 6; \ ++ ch |= **inptrp & 0x3f; \ ++ --r; \ ++ } \ ++ \ ++ /* Shift for the so far missing bytes. */ \ ++ ch <<= r * 6; \ ++ \ ++ /* Store the number of bytes expected for the entire sequence. */ \ ++ state->__count |= cnt << 8; \ ++ \ ++ /* Store the value. */ \ ++ state->__value.__wch = ch; \ ++ } ++ ++#define UNPACK_BYTES_COMMON \ ++ { \ ++ static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \ ++ wint_t wch = state->__value.__wch; \ ++ size_t ntotal = state->__count >> 8; \ ++ \ ++ inlen = state->__count & 255; \ ++ \ ++ bytebuf[0] = inmask[ntotal - 2]; \ ++ \ ++ do \ ++ { \ ++ if (--ntotal < inlen) \ ++ bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ ++ wch >>= 6; \ ++ } \ ++ while (ntotal > 1); \ ++ \ ++ bytebuf[0] |= wch; \ ++ } ++ ++#define CLEAR_STATE_COMMON \ ++ state->__count = 0 ++ ++#define BODY_FROM_HW(ASM) \ ++ { \ ++ ASM; \ ++ if (__glibc_likely (inptr == inend) \ ++ || result == __GCONV_FULL_OUTPUT) \ ++ break; \ ++ \ ++ int i; \ ++ for (i = 1; inptr + i < inend && i < 5; ++i) \ ++ if ((inptr[i] & 0xc0) != 0x80) \ ++ break; \ ++ \ ++ if (__glibc_likely (inptr + i == inend \ ++ && result == __GCONV_EMPTY_INPUT)) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (i); \ ++ } ++ ++/* This hardware routine uses the Convert UTF8 to UTF32 (cu14) instruction. */ ++#define BODY_FROM_ETF3EH BODY_FROM_HW (HARDWARE_CONVERT ("cu14 %0, %1, 1")) ++ ++ ++/* The software routine is copied from gconv_simple.c. */ ++#define BODY_FROM_C \ ++ { \ ++ /* Next input byte. */ \ ++ uint32_t ch = *inptr; \ ++ \ ++ if (__glibc_likely (ch < 0x80)) \ ++ { \ ++ /* One byte sequence. */ \ ++ ++inptr; \ ++ } \ ++ else \ ++ { \ ++ uint_fast32_t cnt; \ ++ uint_fast32_t i; \ ++ \ ++ if (ch >= 0xc2 && ch < 0xe0) \ ++ { \ ++ /* We expect two bytes. The first byte cannot be 0xc0 or \ ++ 0xc1, otherwise the wide character could have been \ ++ represented using a single byte. */ \ ++ cnt = 2; \ ++ ch &= 0x1f; \ ++ } \ ++ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ ++ { \ ++ /* We expect three bytes. */ \ ++ cnt = 3; \ ++ ch &= 0x0f; \ ++ } \ ++ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ ++ { \ ++ /* We expect four bytes. */ \ ++ cnt = 4; \ ++ ch &= 0x07; \ ++ } \ ++ else \ ++ { \ ++ /* Search the end of this ill-formed UTF-8 character. This \ ++ is the next byte with (x & 0xc0) != 0x80. */ \ ++ i = 0; \ ++ do \ ++ ++i; \ ++ while (inptr + i < inend \ ++ && (*(inptr + i) & 0xc0) == 0x80 \ ++ && i < 5); \ ++ \ ++ errout: \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (i); \ ++ } \ ++ \ ++ if (__glibc_unlikely (inptr + cnt > inend)) \ ++ { \ ++ /* We don't have enough input. But before we report \ ++ that check that all the bytes are correct. */ \ ++ for (i = 1; inptr + i < inend; ++i) \ ++ if ((inptr[i] & 0xc0) != 0x80) \ ++ break; \ ++ \ ++ if (__glibc_likely (inptr + i == inend)) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ \ ++ goto errout; \ ++ } \ ++ \ ++ /* Read the possible remaining bytes. */ \ ++ for (i = 1; i < cnt; ++i) \ ++ { \ ++ uint32_t byte = inptr[i]; \ ++ \ ++ if ((byte & 0xc0) != 0x80) \ ++ /* This is an illegal encoding. */ \ ++ break; \ ++ \ ++ ch <<= 6; \ ++ ch |= byte & 0x3f; \ ++ } \ ++ \ ++ /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ ++ If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ ++ have been represented with fewer than cnt bytes. */ \ ++ if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ ++ /* Do not accept UTF-16 surrogates. */ \ ++ || (ch >= 0xd800 && ch <= 0xdfff) \ ++ || (ch > 0x10ffff)) \ ++ { \ ++ /* This is an illegal encoding. */ \ ++ goto errout; \ ++ } \ ++ \ ++ inptr += cnt; \ ++ } \ ++ \ ++ /* Now adjust the pointers and store the result. */ \ ++ *((uint32_t *) outptr) = ch; \ ++ outptr += sizeof (uint32_t); \ ++ } ++ ++#define HW_FROM_VX \ ++ { \ ++ register const unsigned char* pInput asm ("8") = inptr; \ ++ register size_t inlen asm ("9") = inend - inptr; \ ++ register unsigned char* pOutput asm ("10") = outptr; \ ++ register size_t outlen asm("11") = outend - outptr; \ ++ unsigned long tmp, tmp2, tmp3; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ " vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \ ++ " vrepib %%v31,0x20\n\t" \ ++ CONVERT_32BIT_SIZE_T ([R_INLEN]) \ ++ CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \ ++ /* Loop which handles UTF-8 chars <=0x7f. */ \ ++ "0: clgijl %[R_INLEN],16,20f\n\t" \ ++ " clgijl %[R_OUTLEN],64,20f\n\t" \ ++ "1: vl %%v16,0(%[R_IN])\n\t" \ ++ " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ ++ " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ ++ UTF8 chars. */ \ ++ /* Enlarge to UCS4. */ \ ++ " vuplhb %%v18,%%v16\n\t" \ ++ " vupllb %%v19,%%v16\n\t" \ ++ " la %[R_IN],16(%[R_IN])\n\t" \ ++ " vuplhh %%v20,%%v18\n\t" \ ++ " aghi %[R_INLEN],-16\n\t" \ ++ " vupllh %%v21,%%v18\n\t" \ ++ " aghi %[R_OUTLEN],-64\n\t" \ ++ " vuplhh %%v22,%%v19\n\t" \ ++ " vupllh %%v23,%%v19\n\t" \ ++ /* Store 64 bytes to buf_out. */ \ ++ " vstm %%v20,%%v23,0(%[R_OUT])\n\t" \ ++ " la %[R_OUT],64(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],16,20f\n\t" \ ++ " clgijl %[R_OUTLEN],64,20f\n\t" \ ++ " j 1b\n\t" \ ++ "10: \n\t" \ ++ /* At least one byte is > 0x7f. \ ++ Store the preceding 1-byte chars. */ \ ++ " vlgvb %[R_TMP],%%v17,7\n\t" \ ++ " sllk %[R_TMP2],%[R_TMP],2\n\t" /* Compute highest \ ++ index to store. */ \ ++ " llgfr %[R_TMP3],%[R_TMP2]\n\t" \ ++ " ahi %[R_TMP2],-1\n\t" \ ++ " jl 20f\n\t" \ ++ " vuplhb %%v18,%%v16\n\t" \ ++ " vuplhh %%v20,%%v18\n\t" \ ++ " vstl %%v20,%[R_TMP2],0(%[R_OUT])\n\t" \ ++ " ahi %[R_TMP2],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllh %%v21,%%v18\n\t" \ ++ " vstl %%v21,%[R_TMP2],16(%[R_OUT])\n\t" \ ++ " ahi %[R_TMP2],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllb %%v19,%%v16\n\t" \ ++ " vuplhh %%v22,%%v19\n\t" \ ++ " vstl %%v22,%[R_TMP2],32(%[R_OUT])\n\t" \ ++ " ahi %[R_TMP2],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllh %%v23,%%v19\n\t" \ ++ " vstl %%v23,%[R_TMP2],48(%[R_OUT])\n\t" \ ++ "11: \n\t" \ ++ /* Update pointers. */ \ ++ " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_TMP]\n\t" \ ++ " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ ++ " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ ++ /* Handle multibyte utf8-char with convert instruction. */ \ ++ "20: cu14 %[R_OUT],%[R_IN],1\n\t" \ ++ " jo 0b\n\t" /* Try vector implemenation again. */ \ ++ " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ ++ " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (pInput) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ ++ ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v30") \ ++ ASM_CLOBBER_VR ("v31") \ ++ ); \ ++ inptr = pInput; \ ++ outptr = pOutput; \ ++ } ++#define BODY_FROM_VX BODY_FROM_HW (HW_FROM_VX) ++ ++/* These definitions apply to the UTF-8 to UTF-32 direction. The ++ software implementation for UTF-8 still supports multibyte ++ characters up to 6 bytes whereas the hardware variant does not. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define LOOPFCT __from_utf8_loop_c ++ ++#define LOOP_NEED_FLAGS ++ ++#define STORE_REST STORE_REST_COMMON ++#define UNPACK_BYTES UNPACK_BYTES_COMMON ++#define CLEAR_STATE CLEAR_STATE_COMMON ++#define BODY BODY_FROM_C ++#include ++ ++ ++/* Generate loop-function with hardware utf-convert instruction. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define LOOPFCT __from_utf8_loop_etf3eh ++ ++#define LOOP_NEED_FLAGS ++ ++#define STORE_REST STORE_REST_COMMON ++#define UNPACK_BYTES UNPACK_BYTES_COMMON ++#define CLEAR_STATE CLEAR_STATE_COMMON ++#define BODY BODY_FROM_ETF3EH ++#include ++ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++/* Generate loop-function with hardware vector instructions. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++# define LOOPFCT __from_utf8_loop_vx ++ ++# define LOOP_NEED_FLAGS ++ ++# define STORE_REST STORE_REST_COMMON ++# define UNPACK_BYTES UNPACK_BYTES_COMMON ++# define CLEAR_STATE CLEAR_STATE_COMMON ++# define BODY BODY_FROM_VX ++# include ++#endif ++ ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__from_utf8_loop_c) ++__attribute__ ((ifunc ("__from_utf8_loop_resolver"))) ++__from_utf8_loop; ++ ++static void * ++__from_utf8_loop_resolver (unsigned long int dl_hwcap) ++{ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __from_utf8_loop_vx; ++ else ++#endif ++ if (dl_hwcap & HWCAP_S390_ZARCH && dl_hwcap & HWCAP_S390_HIGH_GPRS ++ && dl_hwcap & HWCAP_S390_ETF3EH) ++ return __from_utf8_loop_etf3eh; ++ else ++ return __from_utf8_loop_c; ++} ++ ++strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) ++ ++ ++/* Conversion from UTF-32 internal/BE to UTF-8. */ ++#define BODY_TO_HW(ASM) \ ++ { \ ++ ASM; \ ++ if (__glibc_likely (inptr == inend) \ ++ || result == __GCONV_FULL_OUTPUT) \ ++ break; \ ++ if (inptr + 4 > inend) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } ++ ++/* The hardware routine uses the S/390 cu41 instruction. */ ++#define BODY_TO_ETF3EH BODY_TO_HW (HARDWARE_CONVERT ("cu41 %0, %1")) ++ ++/* The hardware routine uses the S/390 vector and cu41 instructions. */ ++#define BODY_TO_VX BODY_TO_HW (HW_TO_VX) ++ ++/* The software routine mimics the S/390 cu41 instruction. */ ++#define BODY_TO_C \ ++ { \ ++ uint32_t wc = *((const uint32_t *) inptr); \ ++ \ ++ if (__glibc_likely (wc <= 0x7f)) \ ++ { \ ++ /* Single UTF-8 char. */ \ ++ *outptr = (uint8_t)wc; \ ++ outptr++; \ ++ } \ ++ else if (wc <= 0x7ff) \ ++ { \ ++ /* Two UTF-8 chars. */ \ ++ if (__glibc_unlikely (outptr + 2 > outend)) \ ++ { \ ++ /* Overflow in the output buffer. */ \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ \ ++ outptr[0] = 0xc0; \ ++ outptr[0] |= wc >> 6; \ ++ \ ++ outptr[1] = 0x80; \ ++ outptr[1] |= wc & 0x3f; \ ++ \ ++ outptr += 2; \ ++ } \ ++ else if (wc <= 0xffff) \ ++ { \ ++ /* Three UTF-8 chars. */ \ ++ if (__glibc_unlikely (outptr + 3 > outend)) \ ++ { \ ++ /* Overflow in the output buffer. */ \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ if (wc >= 0xd800 && wc < 0xdc00) \ ++ { \ ++ /* Do not accept UTF-16 surrogates. */ \ ++ result = __GCONV_ILLEGAL_INPUT; \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } \ ++ outptr[0] = 0xe0; \ ++ outptr[0] |= wc >> 12; \ ++ \ ++ outptr[1] = 0x80; \ ++ outptr[1] |= (wc >> 6) & 0x3f; \ ++ \ ++ outptr[2] = 0x80; \ ++ outptr[2] |= wc & 0x3f; \ ++ \ ++ outptr += 3; \ ++ } \ ++ else if (wc <= 0x10ffff) \ ++ { \ ++ /* Four UTF-8 chars. */ \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ ++ { \ ++ /* Overflow in the output buffer. */ \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ outptr[0] = 0xf0; \ ++ outptr[0] |= wc >> 18; \ ++ \ ++ outptr[1] = 0x80; \ ++ outptr[1] |= (wc >> 12) & 0x3f; \ ++ \ ++ outptr[2] = 0x80; \ ++ outptr[2] |= (wc >> 6) & 0x3f; \ ++ \ ++ outptr[3] = 0x80; \ ++ outptr[3] |= wc & 0x3f; \ ++ \ ++ outptr += 4; \ ++ } \ ++ else \ ++ { \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } \ ++ inptr += 4; \ ++ } ++ ++#define HW_TO_VX \ ++ { \ ++ register const unsigned char* pInput asm ("8") = inptr; \ ++ register size_t inlen asm ("9") = inend - inptr; \ ++ register unsigned char* pOutput asm ("10") = outptr; \ ++ register size_t outlen asm("11") = outend - outptr; \ ++ unsigned long tmp, tmp2; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ " vleif %%v20,127,0\n\t" /* element 0: 127 */ \ ++ " vzero %%v21\n\t" \ ++ " vleih %%v21,8192,0\n\t" /* element 0: > */ \ ++ " vleih %%v21,-8192,2\n\t" /* element 1: =<> */ \ ++ CONVERT_32BIT_SIZE_T ([R_INLEN]) \ ++ CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \ ++ /* Loop which handles UTF-32 chars <=0x7f. */ \ ++ "0: clgijl %[R_INLEN],64,20f\n\t" \ ++ " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ "1: vlm %%v16,%%v19,0(%[R_IN])\n\t" \ ++ " lghi %[R_TMP],0\n\t" \ ++ /* Shorten to byte values. */ \ ++ " vpkf %%v23,%%v16,%%v17\n\t" \ ++ " vpkf %%v24,%%v18,%%v19\n\t" \ ++ " vpkh %%v23,%%v23,%%v24\n\t" \ ++ /* Checking for values > 0x7f. */ \ ++ " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ ++ " jno 10f\n\t" \ ++ " vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ ++ " jno 11f\n\t" \ ++ " vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \ ++ " jno 12f\n\t" \ ++ " vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \ ++ " jno 13f\n\t" \ ++ /* Store 16bytes to outptr. */ \ ++ " vst %%v23,0(%[R_OUT])\n\t" \ ++ " aghi %[R_INLEN],-64\n\t" \ ++ " aghi %[R_OUTLEN],-16\n\t" \ ++ " la %[R_IN],64(%[R_IN])\n\t" \ ++ " la %[R_OUT],16(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],64,20f\n\t" \ ++ " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ " j 1b\n\t" \ ++ /* Found a value > 0x7f. */ \ ++ "13: ahi %[R_TMP],4\n\t" \ ++ "12: ahi %[R_TMP],4\n\t" \ ++ "11: ahi %[R_TMP],4\n\t" \ ++ "10: vlgvb %[R_I],%%v22,7\n\t" \ ++ " srlg %[R_I],%[R_I],2\n\t" \ ++ " agr %[R_I],%[R_TMP]\n\t" \ ++ " je 20f\n\t" \ ++ /* Store characters before invalid one... */ \ ++ " slgr %[R_OUTLEN],%[R_I]\n\t" \ ++ "15: aghi %[R_I],-1\n\t" \ ++ " vstl %%v23,%[R_I],0(%[R_OUT])\n\t" \ ++ /* ... and update pointers. */ \ ++ " aghi %[R_I],1\n\t" \ ++ " la %[R_OUT],0(%[R_I],%[R_OUT])\n\t" \ ++ " sllg %[R_I],%[R_I],2\n\t" \ ++ " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_I]\n\t" \ ++ /* Handle multibyte utf8-char with convert instruction. */ \ ++ "20: cu41 %[R_OUT],%[R_IN]\n\t" \ ++ " jo 0b\n\t" /* Try vector implemenation again. */ \ ++ " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ ++ " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (pInput) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=d" (tmp) \ ++ , [R_I] "=a" (tmp2) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ ++ ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ ++ ASM_CLOBBER_VR ("v24") \ ++ ); \ ++ inptr = pInput; \ ++ outptr = pOutput; \ ++ } ++ ++/* Generate loop-function with software routing. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++#define LOOPFCT __to_utf8_loop_c ++#define BODY BODY_TO_C ++#define LOOP_NEED_FLAGS ++#include ++ ++/* Generate loop-function with hardware utf-convert instruction. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++#define LOOPFCT __to_utf8_loop_etf3eh ++#define LOOP_NEED_FLAGS ++#define BODY BODY_TO_ETF3EH ++#include ++ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++/* Generate loop-function with hardware vector and utf-convert instructions. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_TO ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++# define LOOPFCT __to_utf8_loop_vx ++# define BODY BODY_TO_VX ++# define LOOP_NEED_FLAGS ++# include ++#endif ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__to_utf8_loop_c) ++__attribute__ ((ifunc ("__to_utf8_loop_resolver"))) ++__to_utf8_loop; ++ ++static void * ++__to_utf8_loop_resolver (unsigned long int dl_hwcap) ++{ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __to_utf8_loop_vx; ++ else ++#endif ++ if (dl_hwcap & HWCAP_S390_ZARCH && dl_hwcap & HWCAP_S390_HIGH_GPRS ++ && dl_hwcap & HWCAP_S390_ETF3EH) ++ return __to_utf8_loop_etf3eh; ++ else ++ return __to_utf8_loop_c; ++} ++ ++strong_alias (__to_utf8_loop_c_single, __to_utf8_loop_single) ++ ++ ++#include +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-14.patch b/SOURCES/glibc-rh1380680-14.patch new file mode 100644 index 0000000..b1ed24e --- /dev/null +++ b/SOURCES/glibc-rh1380680-14.patch @@ -0,0 +1,288 @@ +From 1ae5597025b342ee8fec59e04970b44fc1361744 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 17:17:51 +0100 +Subject: [PATCH 14/17] S390: Fix utf32 to utf8 handling of low surrogates + (disable cu41). + +Upstream commit 52f8a48e24563daa807f94824ce9782b9a9eece9 + +According to the latest Unicode standard, a conversion from/to UTF-xx has +to report an error if the character value is in range of an utf16 surrogate +(0xd800..0xdfff). See https://sourceware.org/ml/libc-help/2015-12/msg00015.html. + +Thus the cu41 instruction, which converts from utf32 to utf8, has to be +disabled because it does not report an error in case of a value in range of +a low surrogate (0xdc00..0xdfff). The etf3eh variant is removed and the c, +vector variant is adjusted to handle the value in range of an utf16 low +surrogate correctly. + +ChangeLog: + + * sysdeps/s390/utf8-utf32-z9.c: Disable cu41 instruction and report + an error in case of a value in range of an utf16 low surrogate. +--- + sysdeps/s390/utf8-utf32-z9.c | 188 ++++++++++++++++++++++++++----------------- + 1 file changed, 115 insertions(+), 73 deletions(-) + +diff --git a/sysdeps/s390/utf8-utf32-z9.c b/sysdeps/s390/utf8-utf32-z9.c +index e39e0a7..efae745 100644 +--- a/sysdeps/s390/utf8-utf32-z9.c ++++ b/sysdeps/s390/utf8-utf32-z9.c +@@ -572,28 +572,6 @@ __from_utf8_loop_resolver (unsigned long int dl_hwcap) + + strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) + +- +-/* Conversion from UTF-32 internal/BE to UTF-8. */ +-#define BODY_TO_HW(ASM) \ +- { \ +- ASM; \ +- if (__glibc_likely (inptr == inend) \ +- || result == __GCONV_FULL_OUTPUT) \ +- break; \ +- if (inptr + 4 > inend) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- STANDARD_TO_LOOP_ERR_HANDLER (4); \ +- } +- +-/* The hardware routine uses the S/390 cu41 instruction. */ +-#define BODY_TO_ETF3EH BODY_TO_HW (HARDWARE_CONVERT ("cu41 %0, %1")) +- +-/* The hardware routine uses the S/390 vector and cu41 instructions. */ +-#define BODY_TO_VX BODY_TO_HW (HW_TO_VX) +- + /* The software routine mimics the S/390 cu41 instruction. */ + #define BODY_TO_C \ + { \ +@@ -632,7 +610,7 @@ strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ +- if (wc >= 0xd800 && wc < 0xdc00) \ ++ if (wc >= 0xd800 && wc <= 0xdfff) \ + { \ + /* Do not accept UTF-16 surrogates. */ \ + result = __GCONV_ILLEGAL_INPUT; \ +@@ -679,13 +657,12 @@ strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) + inptr += 4; \ + } + +-#define HW_TO_VX \ ++/* The hardware routine uses the S/390 vector instructions. */ ++#define BODY_TO_VX \ + { \ +- register const unsigned char* pInput asm ("8") = inptr; \ +- register size_t inlen asm ("9") = inend - inptr; \ +- register unsigned char* pOutput asm ("10") = outptr; \ +- register size_t outlen asm("11") = outend - outptr; \ +- unsigned long tmp, tmp2; \ ++ size_t inlen = inend - inptr; \ ++ size_t outlen = outend - outptr; \ ++ unsigned long tmp, tmp2, tmp3; \ + asm volatile (".machine push\n\t" \ + ".machine \"z13\"\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ +@@ -696,10 +673,10 @@ strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) + CONVERT_32BIT_SIZE_T ([R_INLEN]) \ + CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \ + /* Loop which handles UTF-32 chars <=0x7f. */ \ +- "0: clgijl %[R_INLEN],64,20f\n\t" \ +- " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ "0: clgijl %[R_INLEN],64,2f\n\t" \ ++ " clgijl %[R_OUTLEN],16,2f\n\t" \ + "1: vlm %%v16,%%v19,0(%[R_IN])\n\t" \ +- " lghi %[R_TMP],0\n\t" \ ++ " lghi %[R_TMP2],0\n\t" \ + /* Shorten to byte values. */ \ + " vpkf %%v23,%%v16,%%v17\n\t" \ + " vpkf %%v24,%%v18,%%v19\n\t" \ +@@ -719,41 +696,116 @@ strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) + " aghi %[R_OUTLEN],-16\n\t" \ + " la %[R_IN],64(%[R_IN])\n\t" \ + " la %[R_OUT],16(%[R_OUT])\n\t" \ +- " clgijl %[R_INLEN],64,20f\n\t" \ +- " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ " clgijl %[R_INLEN],64,2f\n\t" \ ++ " clgijl %[R_OUTLEN],16,2f\n\t" \ + " j 1b\n\t" \ + /* Found a value > 0x7f. */ \ +- "13: ahi %[R_TMP],4\n\t" \ +- "12: ahi %[R_TMP],4\n\t" \ +- "11: ahi %[R_TMP],4\n\t" \ +- "10: vlgvb %[R_I],%%v22,7\n\t" \ +- " srlg %[R_I],%[R_I],2\n\t" \ +- " agr %[R_I],%[R_TMP]\n\t" \ +- " je 20f\n\t" \ ++ "13: ahi %[R_TMP2],4\n\t" \ ++ "12: ahi %[R_TMP2],4\n\t" \ ++ "11: ahi %[R_TMP2],4\n\t" \ ++ "10: vlgvb %[R_TMP],%%v22,7\n\t" \ ++ " srlg %[R_TMP],%[R_TMP],2\n\t" \ ++ " agr %[R_TMP],%[R_TMP2]\n\t" \ ++ " je 16f\n\t" \ + /* Store characters before invalid one... */ \ +- " slgr %[R_OUTLEN],%[R_I]\n\t" \ +- "15: aghi %[R_I],-1\n\t" \ +- " vstl %%v23,%[R_I],0(%[R_OUT])\n\t" \ ++ " slgr %[R_OUTLEN],%[R_TMP]\n\t" \ ++ "15: aghi %[R_TMP],-1\n\t" \ ++ " vstl %%v23,%[R_TMP],0(%[R_OUT])\n\t" \ + /* ... and update pointers. */ \ +- " aghi %[R_I],1\n\t" \ +- " la %[R_OUT],0(%[R_I],%[R_OUT])\n\t" \ +- " sllg %[R_I],%[R_I],2\n\t" \ +- " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ +- " slgr %[R_INLEN],%[R_I]\n\t" \ +- /* Handle multibyte utf8-char with convert instruction. */ \ +- "20: cu41 %[R_OUT],%[R_IN]\n\t" \ +- " jo 0b\n\t" /* Try vector implemenation again. */ \ +- " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ +- " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ ++ " aghi %[R_TMP],1\n\t" \ ++ " la %[R_OUT],0(%[R_TMP],%[R_OUT])\n\t" \ ++ " sllg %[R_TMP2],%[R_TMP],2\n\t" \ ++ " la %[R_IN],0(%[R_TMP2],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_TMP2]\n\t" \ ++ /* Calculate remaining uint32_t values in loaded vrs. */ \ ++ "16: lghi %[R_TMP2],16\n\t" \ ++ " sgr %[R_TMP2],%[R_TMP]\n\t" \ ++ " l %[R_TMP],0(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-4\n\t" \ ++ " j 22f\n\t" \ ++ /* Handle remaining bytes. */ \ ++ "2: clgije %[R_INLEN],0,99f\n\t" \ ++ " clgijl %[R_INLEN],4,92f\n\t" \ ++ /* Calculate remaining uint32_t values in inptr. */ \ ++ " srlg %[R_TMP2],%[R_INLEN],2\n\t" \ ++ /* Handle multibyte utf8-char. */ \ ++ "20: l %[R_TMP],0(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-4\n\t" \ ++ /* Test if ch is 1byte UTF-8 char. */ \ ++ "21: clijh %[R_TMP],0x7f,22f\n\t" \ ++ /* Handle 1-byte UTF-8 char. */ \ ++ "31: slgfi %[R_OUTLEN],1\n\t" \ ++ " jl 90f \n\t" \ ++ " stc %[R_TMP],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],4(%[R_IN])\n\t" \ ++ " la %[R_OUT],1(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Test if ch is 2byte UTF-8 char. */ \ ++ "22: clfi %[R_TMP],0x7ff\n\t" \ ++ " jh 23f\n\t" \ ++ /* Handle 2-byte UTF-8 char. */ \ ++ "32: slgfi %[R_OUTLEN],2\n\t" \ ++ " jl 90f \n\t" \ ++ " llill %[R_TMP3],0xc080\n\t" \ ++ " risbgn %[R_TMP3],%[R_TMP],51,55,2\n\t" /* 1. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 2. byte. */ \ ++ " sth %[R_TMP3],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],4(%[R_IN])\n\t" \ ++ " la %[R_OUT],2(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Test if ch is 3-byte UTF-8 char. */ \ ++ "23: clfi %[R_TMP],0xffff\n\t" \ ++ " jh 24f\n\t" \ ++ /* Handle 3-byte UTF-8 char. */ \ ++ "33: slgfi %[R_OUTLEN],3\n\t" \ ++ " jl 90f \n\t" \ ++ " llilf %[R_TMP3],0xe08080\n\t" \ ++ " risbgn %[R_TMP3],%[R_TMP],44,47,4\n\t" /* 1. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],50,55,2\n\t" /* 2. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 3. byte. */ \ ++ /* Test if ch is a UTF-16 surrogate: ch & 0xf800 == 0xd800 */ \ ++ " nilf %[R_TMP],0xf800\n\t" \ ++ " clfi %[R_TMP],0xd800\n\t" \ ++ " je 91f\n\t" /* Do not accept UTF-16 surrogates. */ \ ++ " stcm %[R_TMP3],7,0(%[R_OUT])\n\t" \ ++ " la %[R_IN],4(%[R_IN])\n\t" \ ++ " la %[R_OUT],3(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Test if ch is 4-byte UTF-8 char. */ \ ++ "24: clfi %[R_TMP],0x10ffff\n\t" \ ++ " jh 91f\n\t" /* ch > 0x10ffff is not allowed! */ \ ++ /* Handle 4-byte UTF-8 char. */ \ ++ "34: slgfi %[R_OUTLEN],4\n\t" \ ++ " jl 90f \n\t" \ ++ " llilf %[R_TMP3],0xf0808080\n\t" \ ++ " risbgn %[R_TMP3],%[R_TMP],37,39,6\n\t" /* 1. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],42,47,4\n\t" /* 2. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],50,55,2\n\t" /* 3. byte. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 4. byte. */ \ ++ " st %[R_TMP3],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],4(%[R_IN])\n\t" \ ++ " la %[R_OUT],4(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ "92: lghi %[R_RES],%[RES_IN_FULL]\n\t" \ ++ " j 99f\n\t" \ ++ "91: lghi %[R_RES],%[RES_IN_ILL]\n\t" \ ++ " j 99f\n\t" \ ++ "90: lghi %[R_RES],%[RES_OUT_FULL]\n\t" \ ++ "99: \n\t" \ + ".machine pop" \ +- : /* outputs */ [R_IN] "+a" (pInput) \ +- , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ +- , [R_OUTLEN] "+d" (outlen), [R_TMP] "=d" (tmp) \ +- , [R_I] "=a" (tmp2) \ ++ : /* outputs */ [R_IN] "+a" (inptr) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=a" (tmp2), [R_TMP3] "=d" (tmp3) \ + , [R_RES] "+d" (result) \ + : /* inputs */ \ + [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ + , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \ + : /* clobber list */ "memory", "cc" \ + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ +@@ -761,8 +813,11 @@ strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) + ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ + ASM_CLOBBER_VR ("v24") \ + ); \ +- inptr = pInput; \ +- outptr = pOutput; \ ++ if (__glibc_likely (inptr == inend) \ ++ || result != __GCONV_ILLEGAL_INPUT) \ ++ break; \ ++ \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } + + /* Generate loop-function with software routing. */ +@@ -774,15 +829,6 @@ strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) + #define LOOP_NEED_FLAGS + #include + +-/* Generate loop-function with hardware utf-convert instruction. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_TO +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-#define LOOPFCT __to_utf8_loop_etf3eh +-#define LOOP_NEED_FLAGS +-#define BODY BODY_TO_ETF3EH +-#include +- + #if defined HAVE_S390_VX_ASM_SUPPORT + /* Generate loop-function with hardware vector and utf-convert instructions. */ + # define MIN_NEEDED_INPUT MIN_NEEDED_TO +@@ -807,10 +853,6 @@ __to_utf8_loop_resolver (unsigned long int dl_hwcap) + return __to_utf8_loop_vx; + else + #endif +- if (dl_hwcap & HWCAP_S390_ZARCH && dl_hwcap & HWCAP_S390_HIGH_GPRS +- && dl_hwcap & HWCAP_S390_ETF3EH) +- return __to_utf8_loop_etf3eh; +- else + return __to_utf8_loop_c; + } + +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-15.patch b/SOURCES/glibc-rh1380680-15.patch new file mode 100644 index 0000000..42a47fd --- /dev/null +++ b/SOURCES/glibc-rh1380680-15.patch @@ -0,0 +1,270 @@ +From cddea07761373ce92dc75e8306212d71fa2043ba Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 17:18:39 +0100 +Subject: [PATCH 15/17] S390: Fix utf32 to utf16 handling of low surrogates + (disable cu42). + +Upstream commit a42a95c43133d69b1108f582cffa0f6986a9c3da + +According to the latest Unicode standard, a conversion from/to UTF-xx has +to report an error if the character value is in range of an utf16 surrogate +(0xd800..0xdfff). See https://sourceware.org/ml/libc-help/2015-12/msg00015.html. + +Thus the cu42 instruction, which converts from utf32 to utf16, has to be +disabled because it does not report an error in case of a value in range of +a low surrogate (0xdc00..0xdfff). The etf3eh variant is removed and the c, +vector variant is adjusted to handle the value in range of an utf16 low +surrogate correctly. + +ChangeLog: + + * sysdeps/s390/utf16-utf32-z9.c: Disable cu42 instruction and report + an error in case of a value in range of an utf16 low surrogate. +--- + sysdeps/s390/utf16-utf32-z9.c | 155 +++++++++++++++++------------------------- + 1 file changed, 62 insertions(+), 93 deletions(-) + +diff --git a/sysdeps/s390/utf16-utf32-z9.c b/sysdeps/s390/utf16-utf32-z9.c +index 8d42ab8..5d2ac44 100644 +--- a/sysdeps/s390/utf16-utf32-z9.c ++++ b/sysdeps/s390/utf16-utf32-z9.c +@@ -145,42 +145,6 @@ gconv_end (struct __gconv_step *data) + free (data->__data); + } + +-/* The macro for the hardware loop. This is used for both +- directions. */ +-#define HARDWARE_CONVERT(INSTRUCTION) \ +- { \ +- register const unsigned char* pInput __asm__ ("8") = inptr; \ +- register size_t inlen __asm__ ("9") = inend - inptr; \ +- register unsigned char* pOutput __asm__ ("10") = outptr; \ +- register size_t outlen __asm__("11") = outend - outptr; \ +- unsigned long cc = 0; \ +- \ +- __asm__ __volatile__ (".machine push \n\t" \ +- ".machine \"z9-109\" \n\t" \ +- ".machinemode \"zarch_nohighgprs\"\n\t" \ +- "0: " INSTRUCTION " \n\t" \ +- ".machine pop \n\t" \ +- " jo 0b \n\t" \ +- " ipm %2 \n" \ +- : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ +- "+d" (outlen), "+d" (inlen) \ +- : \ +- : "cc", "memory"); \ +- \ +- inptr = pInput; \ +- outptr = pOutput; \ +- cc >>= 28; \ +- \ +- if (cc == 1) \ +- { \ +- result = __GCONV_FULL_OUTPUT; \ +- } \ +- else if (cc == 2) \ +- { \ +- result = __GCONV_ILLEGAL_INPUT; \ +- } \ +- } +- + #define PREPARE_LOOP \ + enum direction dir = ((struct utf16_data *) step->__data)->dir; \ + int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \ +@@ -310,7 +274,7 @@ gconv_end (struct __gconv_step *data) + " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ + /* Calculate remaining uint16_t values in loaded vrs. */ \ + "12: lghi %[R_TMP2],16\n\t" \ +- " sgr %[R_TMP2],%[R_TMP]\n\t" \ ++ " slgr %[R_TMP2],%[R_TMP]\n\t" \ + " srl %[R_TMP2],1\n\t" \ + " llh %[R_TMP],0(%[R_IN])\n\t" \ + " aghi %[R_OUTLEN],-4\n\t" \ +@@ -437,7 +401,7 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single) + uint32_t c = get32 (inptr); \ + \ + if (__builtin_expect (c <= 0xd7ff, 1) \ +- || (c >=0xdc00 && c <= 0xffff)) \ ++ || (c > 0xdfff && c <= 0xffff)) \ + { \ + /* Two UTF-16 chars. */ \ + put16 (outptr, c); \ +@@ -475,29 +439,10 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single) + inptr += 4; \ + } + +-#define BODY_TO_ETF3EH \ +- { \ +- HARDWARE_CONVERT ("cu42 %0, %1"); \ +- \ +- if (__glibc_likely (inptr == inend) \ +- || result == __GCONV_FULL_OUTPUT) \ +- break; \ +- \ +- if (inptr + 4 > inend) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- \ +- STANDARD_TO_LOOP_ERR_HANDLER (4); \ +- } +- + #define BODY_TO_VX \ + { \ +- register const unsigned char* pInput asm ("8") = inptr; \ +- register size_t inlen asm ("9") = inend - inptr; \ +- register unsigned char* pOutput asm ("10") = outptr; \ +- register size_t outlen asm("11") = outend - outptr; \ ++ size_t inlen = inend - inptr; \ ++ size_t outlen = outend - outptr; \ + unsigned long tmp, tmp2, tmp3; \ + asm volatile (".machine push\n\t" \ + ".machine \"z13\"\n\t" \ +@@ -509,8 +454,8 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single) + CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \ + /* Loop which handles UTF-16 chars \ + ch < 0xd800 || (ch > 0xdfff && ch < 0x10000). */ \ +- "0: clgijl %[R_INLEN],32,20f\n\t" \ +- " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ "0: clgijl %[R_INLEN],32,2f\n\t" \ ++ " clgijl %[R_OUTLEN],16,2f\n\t" \ + "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \ + " lghi %[R_TMP2],0\n\t" \ + /* Shorten to UTF-16. */ \ +@@ -526,9 +471,15 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single) + " aghi %[R_INLEN],-32\n\t" \ + " aghi %[R_OUTLEN],-16\n\t" \ + " la %[R_OUT],16(%[R_OUT])\n\t" \ +- " clgijl %[R_INLEN],32,20f\n\t" \ +- " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ " clgijl %[R_INLEN],32,2f\n\t" \ ++ " clgijl %[R_OUTLEN],16,2f\n\t" \ + " j 1b\n\t" \ ++ /* Calculate remaining uint32_t values in inptr. */ \ ++ "2: \n\t" \ ++ " clgije %[R_INLEN],0,99f\n\t" \ ++ " clgijl %[R_INLEN],4,92f\n\t" \ ++ " srlg %[R_TMP2],%[R_INLEN],2\n\t" \ ++ " j 20f\n\t" \ + /* Setup to check for ch >= 0xd800 && ch <= 0xdfff \ + and check for ch >= 0x10000. (v30, v31) */ \ + "9: .long 0xd800,0xdfff,0x10000,0x10000\n\t" \ +@@ -540,21 +491,59 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single) + " agr %[R_TMP],%[R_TMP2]\n\t" \ + " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ + " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ +- " jl 20f\n\t" \ ++ " jl 12f\n\t" \ + " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ + /* Update pointers. */ \ + " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ + " slgr %[R_INLEN],%[R_TMP]\n\t" \ + " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ + " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ +- /* Handles UTF16 surrogates with convert instruction. */ \ +- "20: cu42 %[R_OUT],%[R_IN]\n\t" \ +- " jo 0b\n\t" /* Try vector implemenation again. */ \ +- " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ +- " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ ++ /* Calculate remaining uint32_t values in vrs. */ \ ++ "12: lghi %[R_TMP2],8\n\t" \ ++ " srlg %[R_TMP3],%[R_TMP3],1\n\t" \ ++ " slgr %[R_TMP2],%[R_TMP3]\n\t" \ ++ /* Handle remaining UTF-32 characters. */ \ ++ "20: l %[R_TMP],0(%[R_IN])\n\t" \ ++ " aghi %[R_INLEN],-4\n\t" \ ++ /* Test if ch is 2byte UTF-16 char. */ \ ++ " clfi %[R_TMP],0xffff\n\t" \ ++ " jh 21f\n\t" \ ++ /* Handle 2 byte UTF16 char. */ \ ++ " lgr %[R_TMP3],%[R_TMP]\n\t" \ ++ " nilf %[R_TMP],0xf800\n\t" \ ++ " clfi %[R_TMP],0xd800\n\t" \ ++ " je 91f\n\t" /* Do not accept UTF-16 surrogates. */ \ ++ " slgfi %[R_OUTLEN],2\n\t" \ ++ " jl 90f \n\t" \ ++ " sth %[R_TMP3],0(%[R_OUT])\n\t" \ ++ " la %[R_IN],4(%[R_IN])\n\t" \ ++ " la %[R_OUT],2(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ /* Test if ch is 4byte UTF-16 char. */ \ ++ "21: clfi %[R_TMP],0x10ffff\n\t" \ ++ " jh 91f\n\t" /* ch > 0x10ffff is not allowed! */ \ ++ /* Handle 4 byte UTF16 char. */ \ ++ " slgfi %[R_OUTLEN],4\n\t" \ ++ " jl 90f \n\t" \ ++ " slfi %[R_TMP],0x10000\n\t" /* zabcd = uvwxy - 1. */ \ ++ " llilf %[R_TMP3],0xd800dc00\n\t" \ ++ " la %[R_IN],4(%[R_IN])\n\t" \ ++ " risbgn %[R_TMP3],%[R_TMP],38,47,6\n\t" /* High surrogate. */ \ ++ " risbgn %[R_TMP3],%[R_TMP],54,63,0\n\t" /* Low surrogate. */ \ ++ " st %[R_TMP3],0(%[R_OUT])\n\t" \ ++ " la %[R_OUT],4(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP2],20b\n\t" \ ++ " j 0b\n\t" /* Switch to vx-loop. */ \ ++ "92: lghi %[R_RES],%[RES_IN_FULL]\n\t" \ ++ " j 99f\n\t" \ ++ "91: lghi %[R_RES],%[RES_IN_ILL]\n\t" \ ++ " j 99f\n\t" \ ++ "90: lghi %[R_RES],%[RES_OUT_FULL]\n\t" \ ++ "99: \n\t" \ + ".machine pop" \ +- : /* outputs */ [R_IN] "+a" (pInput) \ +- , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ ++ : /* outputs */ [R_IN] "+a" (inptr) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \ + , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ + , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ + , [R_RES] "+d" (result) \ +@@ -567,17 +556,10 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single) + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ + ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ + ); \ +- inptr = pInput; \ +- outptr = pOutput; \ +- \ + if (__glibc_likely (inptr == inend) \ +- || result == __GCONV_FULL_OUTPUT) \ ++ || result != __GCONV_ILLEGAL_INPUT) \ + break; \ +- if (inptr + 4 > inend) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ ++ \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } + +@@ -590,15 +572,6 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single) + #define BODY BODY_TO_C + #include + +-/* Generate loop-function with hardware utf-convert instruction. */ +-#define MIN_NEEDED_INPUT MIN_NEEDED_TO +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-#define LOOPFCT __to_utf16_loop_etf3eh +-#define LOOP_NEED_FLAGS +-#define BODY BODY_TO_ETF3EH +-#include +- + #if defined HAVE_S390_VX_ASM_SUPPORT + /* Generate loop-function with hardware vector instructions. */ + # define MIN_NEEDED_INPUT MIN_NEEDED_TO +@@ -623,10 +596,6 @@ __to_utf16_loop_resolver (unsigned long int dl_hwcap) + return __to_utf16_loop_vx; + else + #endif +- if (dl_hwcap & HWCAP_S390_ZARCH && dl_hwcap & HWCAP_S390_HIGH_GPRS +- && dl_hwcap & HWCAP_S390_ETF3EH) +- return __to_utf16_loop_etf3eh; +- else + return __to_utf16_loop_c; + } + +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-16.patch b/SOURCES/glibc-rh1380680-16.patch new file mode 100644 index 0000000..948eb9c --- /dev/null +++ b/SOURCES/glibc-rh1380680-16.patch @@ -0,0 +1,183 @@ +From e4613df21e25e063d120ee23a650c65cd16df4be Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 17:30:22 +0100 +Subject: [PATCH 16/17] Fix ucs4le_internal_loop in error case. [BZ #19726] + +Upstream commit 8f25676c83eef5c85db98f9cf027890fbe810447 + +When converting from UCS4LE to INTERNAL, the input-value is checked for a too +large value and the iconv() call sets errno to EILSEQ. In this case the inbuf +argument of the iconv() call should point to the invalid character, but it +points to the beginning of the inbuf. +Thus this patch updates the pointers inptrp and outptrp before returning in +this error case. + +This patch also adds a new testcase for this issue. +The new test was tested on a s390, power, intel machine. + +ChangeLog: + + [BZ #19726] + * iconv/gconv_simple.c (ucs4le_internal_loop): Update inptrp and + outptrp in case of an illegal input. + * iconv/tst-iconv6.c: New file. + * iconv/Makefile (tests): Add tst-iconv6. +--- + iconv/Makefile | 2 +- + iconv/gconv_simple.c | 2 + + iconv/tst-iconv6.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 120 insertions(+), 1 deletion(-) + create mode 100644 iconv/tst-iconv6.c + +diff --git a/iconv/Makefile b/iconv/Makefile +index 3e7f567..4d34c3f 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -43,7 +43,7 @@ CFLAGS-charmap.c = -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \ + CFLAGS-linereader.c = -DNO_TRANSLITERATION + CFLAGS-simple-hash.c = -I../locale + +-tests = tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 ++tests = tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 + + others = iconv_prog iconvconfig + install-others-programs = $(inst_bindir)/iconv +diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c +index 8697309..b9f846d 100644 +--- a/iconv/gconv_simple.c ++++ b/iconv/gconv_simple.c +@@ -634,6 +634,8 @@ ucs4le_internal_loop (struct __gconv_step *step, + continue; + } + ++ *inptrp = inptr; ++ *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + +diff --git a/iconv/tst-iconv6.c b/iconv/tst-iconv6.c +new file mode 100644 +index 0000000..57d7f38 +--- /dev/null ++++ b/iconv/tst-iconv6.c +@@ -0,0 +1,117 @@ ++/* Testing ucs4le_internal_loop() in gconv_simple.c. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ iconv_t cd; ++ char *inptr; ++ size_t inlen; ++ char *outptr; ++ size_t outlen; ++ size_t n; ++ int e; ++ int result = 0; ++ ++#if __BYTE_ORDER == __BIG_ENDIAN ++ /* On big-endian machines, ucs4le_internal_loop() swaps the bytes before ++ error checking. Thus the input values has to be swapped. */ ++# define VALUE(val) bswap_32 (val) ++#else ++# define VALUE(val) val ++#endif ++ uint32_t inbuf[3] = { VALUE (0x41), VALUE (0x80000000), VALUE (0x42) }; ++ uint32_t outbuf[3] = { 0, 0, 0 }; ++ ++ cd = iconv_open ("WCHAR_T", "UCS-4LE"); ++ if (cd == (iconv_t) -1) ++ { ++ printf ("cannot convert from UCS4LE to wchar_t: %m\n"); ++ return 1; ++ } ++ ++ inptr = (char *) inbuf; ++ inlen = sizeof (inbuf); ++ outptr = (char *) outbuf; ++ outlen = sizeof (outbuf); ++ ++ n = iconv (cd, &inptr, &inlen, &outptr, &outlen); ++ e = errno; ++ ++ if (n != (size_t) -1) ++ { ++ printf ("incorrect iconv() return value: %zd, expected -1\n", n); ++ result = 1; ++ } ++ ++ if (e != EILSEQ) ++ { ++ printf ("incorrect error value: %s, expected %s\n", ++ strerror (e), strerror (EILSEQ)); ++ result = 1; ++ } ++ ++ if (inptr != (char *) &inbuf[1]) ++ { ++ printf ("inptr=0x%p does not point to invalid character! Expected=0x%p\n" ++ , inptr, &inbuf[1]); ++ result = 1; ++ } ++ ++ if (inlen != sizeof (inbuf) - sizeof (uint32_t)) ++ { ++ printf ("inlen=%zd != %zd\n" ++ , inlen, sizeof (inbuf) - sizeof (uint32_t)); ++ result = 1; ++ } ++ ++ if (outptr != (char *) &outbuf[1]) ++ { ++ printf ("outptr=0x%p does not point to invalid character in inbuf! " ++ "Expected=0x%p\n" ++ , outptr, &outbuf[1]); ++ result = 1; ++ } ++ ++ if (outlen != sizeof (inbuf) - sizeof (uint32_t)) ++ { ++ printf ("outlen=%zd != %zd\n" ++ , outlen, sizeof (outbuf) - sizeof (uint32_t)); ++ result = 1; ++ } ++ ++ if (outbuf[0] != 0x41 || outbuf[1] != 0 || outbuf[2] != 0) ++ { ++ puts ("Characters conversion is incorrect!"); ++ result = 1; ++ } ++ ++ iconv_close (cd); ++ ++ return result; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-17.patch b/SOURCES/glibc-rh1380680-17.patch new file mode 100644 index 0000000..4d316db --- /dev/null +++ b/SOURCES/glibc-rh1380680-17.patch @@ -0,0 +1,400 @@ +From dd5820453c8c8c6521e45e1cb229f70a5ab5f6b0 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 17:45:56 +0100 +Subject: [PATCH 17/17] Fix UTF-16 surrogate handling. [BZ #19727] + +Upstream commit 7ab1de21067d72460ac14089bf6541b10fc14c80 + +According to the latest Unicode standard, a conversion from/to UTF-xx has +to report an error if the character value is in range of an utf16 surrogate +(0xd800..0xdfff). See https://sourceware.org/ml/libc-help/2015-12/msg00015.html. +Thus this patch fixes this behaviour for converting from utf32 to internal and +from internal to utf8. + +Furthermore the conversion from utf16 to internal does not report an error if the +input-stream consists of two low-surrogate values. If an uint16_t value is in the +range of 0xd800 .. 0xdfff, the next uint16_t value is checked, if it is in the +range of a low surrogate (0xdc00 .. 0xdfff). Afterwards these two uint16_t +values are interpreted as a high- and low-surrogates pair. But there is no test +if the first uint16_t value is really in the range of a high-surrogate +(0xd800 .. 0xdbff). If there would be two uint16_t values in the range of a low +surrogate, then they will be treated as a valid high- and low-surrogates pair. +This patch adds this test. + +This patch also adds a new testcase, which checks UTF conversions with input +values in range of UTF16 surrogates. The test converts from UTF-xx to INTERNAL, +INTERNAL to UTF-xx and directly between UTF-xx to UTF-yy. The latter conversion +is needed because s390 has iconv-modules, which converts from/to UTF in one step. +The new testcase was tested on a s390, power and intel machine. + +ChangeLog: + + [BZ #19727] + * iconvdata/utf-16.c (BODY): Report an error if first word is not a + valid high surrogate. + * iconvdata/utf-32.c (BODY): Report an error if the value is in range + of an utf16 surrogate. + * iconv/gconv_simple.c (BODY): Likewise. + * iconvdata/bug-iconv12.c: New file. + * iconvdata/Makefile (tests): Add bug-iconv12. +--- + iconv/gconv_simple.c | 3 +- + iconvdata/Makefile | 4 +- + iconvdata/bug-iconv12.c | 263 ++++++++++++++++++++++++++++++++++++++++++++++++ + iconvdata/utf-16.c | 12 +++ + iconvdata/utf-32.c | 3 +- + 5 files changed, 282 insertions(+), 3 deletions(-) + create mode 100644 iconvdata/bug-iconv12.c + +diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c +index b9f846d..48932ee 100644 +--- a/iconv/gconv_simple.c ++++ b/iconv/gconv_simple.c +@@ -888,7 +888,8 @@ ucs4le_internal_loop_single (struct __gconv_step *step, + if (__builtin_expect (wc < 0x80, 1)) \ + /* It's an one byte sequence. */ \ + *outptr++ = (unsigned char) wc; \ +- else if (__builtin_expect (wc <= 0x7fffffff, 1)) \ ++ else if (__builtin_expect (wc <= 0x7fffffff \ ++ && (wc < 0xd800 || wc > 0xdfff), 1)) \ + { \ + size_t step; \ + unsigned char *start; \ +diff --git a/iconvdata/Makefile b/iconvdata/Makefile +index e2624de..0ec6755 100644 +--- a/iconvdata/Makefile ++++ b/iconvdata/Makefile +@@ -68,7 +68,7 @@ include ../Makeconfig + ifeq (yes,$(build-shared)) + tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ + tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ +- bug-iconv10 ++ bug-iconv10 bug-iconv12 + ifeq ($(have-thread-library),yes) + tests += bug-iconv3 + endif +@@ -294,6 +294,8 @@ $(objpfx)bug-iconv5.out: $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) + $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) ++$(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \ ++ $(addprefix $(objpfx),$(modules.so)) + $(objpfx)tst-loading.out: $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) + $(objpfx)tst-iconv4.out: $(objpfx)gconv-modules \ +diff --git a/iconvdata/bug-iconv12.c b/iconvdata/bug-iconv12.c +new file mode 100644 +index 0000000..49f5208 +--- /dev/null ++++ b/iconvdata/bug-iconv12.c +@@ -0,0 +1,263 @@ ++/* bug 19727: Testing UTF conversions with UTF16 surrogates as input. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int ++run_conversion (const char *from, const char *to, char *inbuf, size_t inbuflen, ++ int exp_errno, int line) ++{ ++ char outbuf[16]; ++ iconv_t cd; ++ char *inptr; ++ size_t inlen; ++ char *outptr; ++ size_t outlen; ++ size_t n; ++ int e; ++ int fails = 0; ++ ++ cd = iconv_open (to, from); ++ if (cd == (iconv_t) -1) ++ { ++ printf ("line %d: cannot convert from %s to %s: %m\n", line, from, to); ++ return 1; ++ } ++ ++ inptr = (char *) inbuf; ++ inlen = inbuflen; ++ outptr = outbuf; ++ outlen = sizeof (outbuf); ++ ++ errno = 0; ++ n = iconv (cd, &inptr, &inlen, &outptr, &outlen); ++ e = errno; ++ ++ if (exp_errno == 0) ++ { ++ if (n == (size_t) -1) ++ { ++ puts ("n should be >= 0, but n == -1"); ++ fails ++; ++ } ++ ++ if (e != 0) ++ { ++ printf ("errno should be 0: 'Success', but errno == %d: '%s'\n" ++ , e, strerror(e)); ++ fails ++; ++ } ++ } ++ else ++ { ++ if (n != (size_t) -1) ++ { ++ printf ("n should be -1, but n == %zd\n", n); ++ fails ++; ++ } ++ ++ if (e != exp_errno) ++ { ++ printf ("errno should be %d: '%s', but errno == %d: '%s'\n" ++ , exp_errno, strerror (exp_errno), e, strerror (e)); ++ fails ++; ++ } ++ } ++ ++ iconv_close (cd); ++ ++ if (fails > 0) ++ { ++ printf ("Errors in line %d while converting %s to %s.\n\n" ++ , line, from, to); ++ } ++ ++ return fails; ++} ++ ++static int ++do_test (void) ++{ ++ int fails = 0; ++ char buf[4]; ++ ++ /* This test runs iconv() with UTF character in range of an UTF16 surrogate. ++ UTF-16 high surrogate is in range 0xD800..0xDBFF and ++ UTF-16 low surrogate is in range 0xDC00..0xDFFF. ++ Converting from or to UTF-xx has to report errors in those cases. ++ In UTF-16, surrogate pairs with a high surrogate in front of a low ++ surrogate is valid. */ ++ ++ /* Use RUN_UCS4_UTF32_INPUT to test conversion ... ++ ++ ... from INTERNAL to UTF-xx[LE|BE]: ++ Converting from UCS4 to UTF-xx[LE|BE] first converts UCS4 to INTERNAL ++ without checking for UTF-16 surrogate values ++ and then converts from INTERNAL to UTF-xx[LE|BE]. ++ The latter conversion has to report an error in those cases. ++ ++ ... from UTF-32[LE|BE] to INTERNAL: ++ Converting directly from UTF-32LE to UTF-8|16 is needed, ++ because e.g. s390x has iconv-modules which converts directly. */ ++#define RUN_UCS4_UTF32_INPUT(b0, b1, b2, b3, err, line) \ ++ buf[0] = b0; \ ++ buf[1] = b1; \ ++ buf[2] = b2; \ ++ buf[3] = b3; \ ++ fails += run_conversion ("UCS4", "UTF-8", buf, 4, err, line); \ ++ fails += run_conversion ("UCS4", "UTF-16LE", buf, 4, err, line); \ ++ fails += run_conversion ("UCS4", "UTF-16BE", buf, 4, err, line); \ ++ fails += run_conversion ("UCS4", "UTF-32LE", buf, 4, err, line); \ ++ fails += run_conversion ("UCS4", "UTF-32BE", buf, 4, err, line); \ ++ fails += run_conversion ("UTF-32BE", "WCHAR_T", buf, 4, err, line); \ ++ fails += run_conversion ("UTF-32BE", "UTF-8", buf, 4, err, line); \ ++ fails += run_conversion ("UTF-32BE", "UTF-16LE", buf, 4, err, line); \ ++ fails += run_conversion ("UTF-32BE", "UTF-16BE", buf, 4, err, line); \ ++ buf[0] = b3; \ ++ buf[1] = b2; \ ++ buf[2] = b1; \ ++ buf[3] = b0; \ ++ fails += run_conversion ("UTF-32LE", "WCHAR_T", buf, 4, err, line); \ ++ fails += run_conversion ("UTF-32LE", "UTF-8", buf, 4, err, line); \ ++ fails += run_conversion ("UTF-32LE", "UTF-16LE", buf, 4, err, line); \ ++ fails += run_conversion ("UTF-32LE", "UTF-16BE", buf, 4, err, line); ++ ++ /* Use UCS4/UTF32 input of 0xD7FF. */ ++ RUN_UCS4_UTF32_INPUT (0x0, 0x0, 0xD7, 0xFF, 0, __LINE__); ++ ++ /* Use UCS4/UTF32 input of 0xD800. */ ++ RUN_UCS4_UTF32_INPUT (0x0, 0x0, 0xD8, 0x00, EILSEQ, __LINE__); ++ ++ /* Use UCS4/UTF32 input of 0xDBFF. */ ++ RUN_UCS4_UTF32_INPUT (0x0, 0x0, 0xDB, 0xFF, EILSEQ, __LINE__); ++ ++ /* Use UCS4/UTF32 input of 0xDC00. */ ++ RUN_UCS4_UTF32_INPUT (0x0, 0x0, 0xDC, 0x00, EILSEQ, __LINE__); ++ ++ /* Use UCS4/UTF32 input of 0xDFFF. */ ++ RUN_UCS4_UTF32_INPUT (0x0, 0x0, 0xDF, 0xFF, EILSEQ, __LINE__); ++ ++ /* Use UCS4/UTF32 input of 0xE000. */ ++ RUN_UCS4_UTF32_INPUT (0x0, 0x0, 0xE0, 0x00, 0, __LINE__); ++ ++ ++ /* Use RUN_UTF16_INPUT to test conversion from UTF16[LE|BE] to INTERNAL. ++ Converting directly from UTF-16 to UTF-8|32 is needed, ++ because e.g. s390x has iconv-modules which converts directly. ++ Use len == 2 or 4 to specify one or two UTF-16 characters. */ ++#define RUN_UTF16_INPUT(b0, b1, b2, b3, len, err, line) \ ++ buf[0] = b0; \ ++ buf[1] = b1; \ ++ buf[2] = b2; \ ++ buf[3] = b3; \ ++ fails += run_conversion ("UTF-16BE", "WCHAR_T", buf, len, err, line); \ ++ fails += run_conversion ("UTF-16BE", "UTF-8", buf, len, err, line); \ ++ fails += run_conversion ("UTF-16BE", "UTF-32LE", buf, len, err, line); \ ++ fails += run_conversion ("UTF-16BE", "UTF-32BE", buf, len, err, line); \ ++ buf[0] = b1; \ ++ buf[1] = b0; \ ++ buf[2] = b3; \ ++ buf[3] = b2; \ ++ fails += run_conversion ("UTF-16LE", "WCHAR_T", buf, len, err, line); \ ++ fails += run_conversion ("UTF-16LE", "UTF-8", buf, len, err, line); \ ++ fails += run_conversion ("UTF-16LE", "UTF-32LE", buf, len, err, line); \ ++ fails += run_conversion ("UTF-16LE", "UTF-32BE", buf, len, err, line); ++ ++ /* Use UTF16 input of 0xD7FF. */ ++ RUN_UTF16_INPUT (0xD7, 0xFF, 0xD7, 0xFF, 4, 0, __LINE__); ++ ++ /* Use [single] UTF16 high surrogate 0xD800 [with a valid character behind]. ++ And check an UTF16 surrogate pair [without valid low surrogate]. */ ++ RUN_UTF16_INPUT (0xD8, 0x0, 0x0, 0x0, 2, EINVAL, __LINE__); ++ RUN_UTF16_INPUT (0xD8, 0x0, 0xD7, 0xFF, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xD8, 0x0, 0xD8, 0x0, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xD8, 0x0, 0xE0, 0x0, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xD8, 0x0, 0xDC, 0x0, 4, 0, __LINE__); ++ ++ /* Use [single] UTF16 high surrogate 0xDBFF [with a valid character behind]. ++ And check an UTF16 surrogate pair [without valid low surrogate]. */ ++ RUN_UTF16_INPUT (0xDB, 0xFF, 0x0, 0x0, 2, EINVAL, __LINE__); ++ RUN_UTF16_INPUT (0xDB, 0xFF, 0xD7, 0xFF, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xDB, 0xFF, 0xDB, 0xFF, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xDB, 0xFF, 0xE0, 0x0, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xDB, 0xFF, 0xDF, 0xFF, 4, 0, __LINE__); ++ ++ /* Use single UTF16 low surrogate 0xDC00 [with a valid character behind]. ++ And check an UTF16 surrogate pair [without valid high surrogate]. */ ++ RUN_UTF16_INPUT (0xDC, 0x0, 0x0, 0x0, 2, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xDC, 0x0, 0xD7, 0xFF, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xD8, 0x0, 0xDC, 0x0, 4, 0, __LINE__); ++ RUN_UTF16_INPUT (0xD7, 0xFF, 0xDC, 0x0, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xDC, 0x0, 0xDC, 0x0, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xE0, 0x0, 0xDC, 0x0, 4, EILSEQ, __LINE__); ++ ++ /* Use single UTF16 low surrogate 0xDFFF [with a valid character behind]. ++ And check an UTF16 surrogate pair [without valid high surrogate]. */ ++ RUN_UTF16_INPUT (0xDF, 0xFF, 0x0, 0x0, 2, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xDF, 0xFF, 0xD7, 0xFF, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xDB, 0xFF, 0xDF, 0xFF, 4, 0, __LINE__); ++ RUN_UTF16_INPUT (0xD7, 0xFF, 0xDF, 0xFF, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xDF, 0xFF, 0xDF, 0xFF, 4, EILSEQ, __LINE__); ++ RUN_UTF16_INPUT (0xE0, 0x0, 0xDF, 0xFF, 4, EILSEQ, __LINE__); ++ ++ /* Use UCS4/UTF32 input of 0xE000. */ ++ RUN_UTF16_INPUT (0xE0, 0x0, 0xE0, 0x0, 4, 0, __LINE__); ++ ++ ++ /* Use RUN_UTF8_3BYTE_INPUT to test conversion from UTF-8 to INTERNAL. ++ Converting directly from UTF-8 to UTF-16|32 is needed, ++ because e.g. s390x has iconv-modules which converts directly. */ ++#define RUN_UTF8_3BYTE_INPUT(b0, b1, b2, err, line) \ ++ buf[0] = b0; \ ++ buf[1] = b1; \ ++ buf[2] = b2; \ ++ fails += run_conversion ("UTF-8", "WCHAR_T", buf, 3, err, line); \ ++ fails += run_conversion ("UTF-8", "UTF-16LE", buf, 3, err, line); \ ++ fails += run_conversion ("UTF-8", "UTF-16BE", buf, 3, err, line); \ ++ fails += run_conversion ("UTF-8", "UTF-32LE", buf, 3, err, line); \ ++ fails += run_conversion ("UTF-8", "UTF-32BE", buf, 3, err, line); ++ ++ /* Use UTF-8 input of 0xD7FF. */ ++ RUN_UTF8_3BYTE_INPUT (0xED, 0x9F, 0xBF, 0, __LINE__); ++ ++ /* Use UTF-8 input of 0xD800. */ ++ RUN_UTF8_3BYTE_INPUT (0xED, 0xA0, 0x80, EILSEQ, __LINE__); ++ ++ /* Use UTF-8 input of 0xDBFF. */ ++ RUN_UTF8_3BYTE_INPUT (0xED, 0xAF, 0xBF, EILSEQ, __LINE__); ++ ++ /* Use UTF-8 input of 0xDC00. */ ++ RUN_UTF8_3BYTE_INPUT (0xED, 0xB0, 0x80, EILSEQ, __LINE__); ++ ++ /* Use UTF-8 input of 0xDFFF. */ ++ RUN_UTF8_3BYTE_INPUT (0xED, 0xBF, 0xBF, EILSEQ, __LINE__); ++ ++ /* Use UTF-8 input of 0xF000. */ ++ RUN_UTF8_3BYTE_INPUT (0xEF, 0x80, 0x80, 0, __LINE__); ++ ++ return fails > 0 ? EXIT_FAILURE : EXIT_SUCCESS; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" +diff --git a/iconvdata/utf-16.c b/iconvdata/utf-16.c +index b4ddfeb..e833d3c 100644 +--- a/iconvdata/utf-16.c ++++ b/iconvdata/utf-16.c +@@ -294,6 +294,12 @@ gconv_end (struct __gconv_step *data) + { \ + uint16_t u2; \ + \ ++ if (__glibc_unlikely (u1 >= 0xdc00)) \ ++ { \ ++ /* This is no valid first word for a surrogate. */ \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (2); \ ++ } \ ++ \ + /* It's a surrogate character. At least the first word says \ + it is. */ \ + if (__builtin_expect (inptr + 4 > inend, 0)) \ +@@ -328,6 +334,12 @@ gconv_end (struct __gconv_step *data) + } \ + else \ + { \ ++ if (__glibc_unlikely (u1 >= 0xdc00)) \ ++ { \ ++ /* This is no valid first word for a surrogate. */ \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (2); \ ++ } \ ++ \ + /* It's a surrogate character. At least the first word says \ + it is. */ \ + if (__builtin_expect (inptr + 4 > inend, 0)) \ +diff --git a/iconvdata/utf-32.c b/iconvdata/utf-32.c +index e0c4b19..1173d6f 100644 +--- a/iconvdata/utf-32.c ++++ b/iconvdata/utf-32.c +@@ -238,7 +238,8 @@ gconv_end (struct __gconv_step *data) + if (swap) \ + u1 = bswap_32 (u1); \ + \ +- if (__builtin_expect (u1 >= 0x110000, 0)) \ ++ if (__builtin_expect (u1 >= 0x110000 \ ++ || (u1 >= 0xd800 && u1 < 0xe000), 0)) \ + { \ + /* This is illegal. */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (4); \ +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-2.patch b/SOURCES/glibc-rh1380680-2.patch new file mode 100644 index 0000000..4341f93 --- /dev/null +++ b/SOURCES/glibc-rh1380680-2.patch @@ -0,0 +1,114 @@ +From 29f8926b153d59ba18f67ce5445dce66bacc0cbf Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 15:25:04 +0100 +Subject: [PATCH 02/17] S390: Configure check for vector support in gcc. + +Upstream commit 9b7f05599a92dead97d6683bc838a57bc63ac52b + +The S390 specific test checks if the gcc has support for vector registers +by compiling an inline assembly which clobbers vector registers. +On success the macro HAVE_S390_VX_GCC_SUPPORT is defined. +This macro can be used to determine if e.g. clobbering vector registers +is allowed or not. + +ChangeLog: + + * config.h.in (HAVE_S390_VX_GCC_SUPPORT): New macro undefine. + * sysdeps/s390/configure.in: Add test for S390 vector register + support in gcc. + * sysdeps/s390/configure: Regenerated. +--- + config.h.in | 4 ++++ + sysdeps/s390/configure | 33 +++++++++++++++++++++++++++++++++ + sysdeps/s390/configure.in | 22 ++++++++++++++++++++++ + 3 files changed, 59 insertions(+) + +diff --git a/config.h.in b/config.h.in +index f7f2388..62e04c7 100644 +--- a/config.h.in ++++ b/config.h.in +@@ -113,6 +113,10 @@ + /* Define if assembler supports vector instructions on S390. */ + #undef HAVE_S390_VX_ASM_SUPPORT + ++/* Define if gcc supports vector registers as clobbers in inline assembly ++ on S390. */ ++#undef HAVE_S390_VX_GCC_SUPPORT ++ + /* Define if gcc supports FMA4. */ + #undef HAVE_FMA4_SUPPORT + +diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure +index cc4c1e0..eb49a4c 100644 +--- a/sysdeps/s390/configure ++++ b/sysdeps/s390/configure +@@ -42,3 +42,36 @@ else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Use binutils with vector-support in order to use optimized implementations." >&5 + $as_echo "$as_me: WARNING: Use binutils with vector-support in order to use optimized implementations." >&2;} + fi ++ ++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for S390 vector support in gcc" >&5 ++$as_echo_n "checking for S390 vector support in gcc... " >&6; } ++if ${libc_cv_gcc_s390_vx+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ cat > conftest.c <<\EOF ++void testvecclobber () ++{ ++ __asm__ ("" : : : "v16"); ++} ++EOF ++if { ac_try='${CC-cc} --shared conftest.c -o conftest.o &> /dev/null' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ; ++then ++ libc_cv_gcc_s390_vx=yes ++else ++ libc_cv_gcc_s390_vx=no ++fi ++rm -f conftest* ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_gcc_s390_vx" >&5 ++$as_echo "$libc_cv_gcc_s390_vx" >&6; } ++ ++if test "$libc_cv_gcc_s390_vx" = yes ; ++then ++ $as_echo "#define HAVE_S390_VX_GCC_SUPPORT 1" >>confdefs.h ++ ++fi +diff --git a/sysdeps/s390/configure.in b/sysdeps/s390/configure.in +index 733e356..a3b1f25 100644 +--- a/sysdeps/s390/configure.in ++++ b/sysdeps/s390/configure.in +@@ -31,3 +31,25 @@ then + else + AC_MSG_WARN([Use binutils with vector-support in order to use optimized implementations.]) + fi ++ ++AC_CACHE_CHECK(for S390 vector support in gcc, libc_cv_gcc_s390_vx, [dnl ++cat > conftest.c <<\EOF ++void testvecclobber () ++{ ++ __asm__ ("" : : : "v16"); ++} ++EOF ++dnl ++dnl test, if gcc supports S390 vector registers as clobber in inline assembly ++if AC_TRY_COMMAND([${CC-cc} --shared conftest.c -o conftest.o &> /dev/null]) ; ++then ++ libc_cv_gcc_s390_vx=yes ++else ++ libc_cv_gcc_s390_vx=no ++fi ++rm -f conftest* ]) ++ ++if test "$libc_cv_gcc_s390_vx" = yes ; ++then ++ AC_DEFINE(HAVE_S390_VX_GCC_SUPPORT) ++fi +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-3.patch b/SOURCES/glibc-rh1380680-3.patch new file mode 100644 index 0000000..5687447 --- /dev/null +++ b/SOURCES/glibc-rh1380680-3.patch @@ -0,0 +1,525 @@ +From c3fd92047b3cc1b66b9b241be0765fe1e72678a1 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 15:29:26 +0100 +Subject: [PATCH 03/17] S390: Optimize 8bit-generic iconv modules. + +Upstream commit 4690dab084f854bf0013b5eaabcf90c2d5b692ff + +This patch introduces a s390 specific 8bit-generic.c file which provides an +optimized version for z13 with translate-/vector-instructions, which will be +chosen at runtime via ifunc. +If the build-environment lacks vector support, then iconvdata/8bit-generic.c +is used wihtout any change. Otherwise iconvdata/8bit-generic.c is used to create +conversion loop routines without vector instructions as fallback, if vector +instructions aren't available at runtime. + +The vector routines can only be used with charsets where the maximum UCS4 value +fits in 1 byte size. Then the hardware translate-instruction is used +to translate between up to 256 generic characters and "1 byte UCS4" +characters at once. The vector instructions are used to convert between +the "1 byte UCS4" and UCS4. + +The gen-8bit.sh script in sysdeps/s390/multiarch generates the conversion +table to_ucs1. Therefore in sysdeps/s390/multiarch/Makefile is added an +override define generate-8bit-table, which is originally defined in +iconvdata/Makefile. This version calls the gen-8bit.sh in iconvdata folder +and the s390 one. + +ChangeLog: + + * sysdeps/s390/multiarch/8bit-generic.c: New File. + * sysdeps/s390/multiarch/gen-8bit.sh: New File. + * sysdeps/s390/multiarch/Makefile (generate-8bit-table): + New override define. + * sysdeps/s390/multiarch/iconv/skeleton.c: Likewise. +--- + sysdeps/s390/multiarch/8bit-generic.c | 415 ++++++++++++++++++++++++++++++++ + sysdeps/s390/multiarch/Makefile | 10 + + sysdeps/s390/multiarch/gen-8bit.sh | 6 + + sysdeps/s390/multiarch/iconv/skeleton.c | 21 ++ + 4 files changed, 452 insertions(+) + create mode 100644 sysdeps/s390/multiarch/8bit-generic.c + create mode 100644 sysdeps/s390/multiarch/gen-8bit.sh + create mode 100644 sysdeps/s390/multiarch/iconv/skeleton.c + +diff --git a/sysdeps/s390/multiarch/8bit-generic.c b/sysdeps/s390/multiarch/8bit-generic.c +new file mode 100644 +index 0000000..93565e1 +--- /dev/null ++++ b/sysdeps/s390/multiarch/8bit-generic.c +@@ -0,0 +1,415 @@ ++/* Generic conversion to and from 8bit charsets - S390 version. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++ ++# if defined HAVE_S390_VX_GCC_SUPPORT ++# define ASM_CLOBBER_VR(NR) , NR ++# else ++# define ASM_CLOBBER_VR(NR) ++# endif ++ ++/* Generate the conversion loop routines without vector instructions as ++ fallback, if vector instructions aren't available at runtime. */ ++# define IGNORE_ICONV_SKELETON ++# define from_generic __from_generic_c ++# define to_generic __to_generic_c ++# include "iconvdata/8bit-generic.c" ++# undef IGNORE_ICONV_SKELETON ++# undef from_generic ++# undef to_generic ++ ++/* Generate the converion routines with vector instructions. The vector ++ routines can only be used with charsets where the maximum UCS4 value ++ fits in 1 byte size. Then the hardware translate-instruction is used ++ to translate between multiple generic characters and "1 byte UCS4" ++ characters at once. The vector instructions are used to convert between ++ the "1 byte UCS4" and UCS4. */ ++# include ++# include ++ ++# undef FROM_LOOP ++# undef TO_LOOP ++# define FROM_LOOP __from_generic_vx ++# define TO_LOOP __to_generic_vx ++ ++# define MIN_NEEDED_FROM 1 ++# define MIN_NEEDED_TO 4 ++# define ONE_DIRECTION 0 ++ ++/* First define the conversion function from the 8bit charset to UCS4. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++# define LOOPFCT FROM_LOOP ++# define BODY_FROM_ORIG \ ++ { \ ++ uint32_t ch = to_ucs4[*inptr]; \ ++ \ ++ if (HAS_HOLES && __builtin_expect (ch == L'\0', 0) && *inptr != '\0') \ ++ { \ ++ /* This is an illegal character. */ \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ ++ } \ ++ \ ++ put32 (outptr, ch); \ ++ outptr += 4; \ ++ ++inptr; \ ++ } ++ ++# define BODY \ ++ { \ ++ if (__builtin_expect (inend - inptr < 16, 1) \ ++ || outend - outptr < 64) \ ++ /* Convert remaining bytes with c code. */ \ ++ BODY_FROM_ORIG \ ++ else \ ++ { \ ++ /* Convert 16 ... 256 bytes at once with tr-instruction. */ \ ++ size_t index; \ ++ char buf[256]; \ ++ size_t loop_count = (inend - inptr) / 16; \ ++ if (loop_count > (outend - outptr) / 64) \ ++ loop_count = (outend - outptr) / 64; \ ++ if (loop_count > 16) \ ++ loop_count = 16; \ ++ __asm__ volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ " sllk %[R_I],%[R_LI],4\n\t" \ ++ " ahi %[R_I],-1\n\t" \ ++ /* Execute mvc and tr with correct len. */ \ ++ " exrl %[R_I],21f\n\t" \ ++ " exrl %[R_I],22f\n\t" \ ++ /* Post-processing. */ \ ++ " lghi %[R_I],0\n\t" \ ++ " vzero %%v0\n\t" \ ++ "0: \n\t" \ ++ /* Find invalid character - value is zero. */ \ ++ " vl %%v16,0(%[R_I],%[R_BUF])\n\t" \ ++ " vceqbs %%v23,%%v0,%%v16\n\t" \ ++ " jle 10f\n\t" \ ++ "1: \n\t" \ ++ /* Enlarge to UCS4. */ \ ++ " vuplhb %%v17,%%v16\n\t" \ ++ " vupllb %%v18,%%v16\n\t" \ ++ " vuplhh %%v19,%%v17\n\t" \ ++ " vupllh %%v20,%%v17\n\t" \ ++ " vuplhh %%v21,%%v18\n\t" \ ++ " vupllh %%v22,%%v18\n\t" \ ++ /* Store 64bytes to buf_out. */ \ ++ " vstm %%v19,%%v22,0(%[R_OUT])\n\t" \ ++ " aghi %[R_I],16\n\t" \ ++ " la %[R_OUT],64(%[R_OUT])\n\t" \ ++ " brct %[R_LI],0b\n\t" \ ++ " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ ++ " j 20f\n\t" \ ++ "21: mvc 0(1,%[R_BUF]),0(%[R_IN])\n\t" \ ++ "22: tr 0(1,%[R_BUF]),0(%[R_TBL])\n\t" \ ++ /* Possibly invalid character found. */ \ ++ "10: \n\t" \ ++ /* Test if input was zero, too. */ \ ++ " vl %%v24,0(%[R_I],%[R_IN])\n\t" \ ++ " vceqb %%v24,%%v0,%%v24\n\t" \ ++ /* Zeros in buf (v23) and inptr (v24) are marked \ ++ with one bits. After xor, invalid characters \ ++ are marked as one bits. Proceed, if no \ ++ invalid characters are found. */ \ ++ " vx %%v24,%%v23,%%v24\n\t" \ ++ " vfenebs %%v24,%%v24,%%v0\n\t" \ ++ " jo 1b\n\t" \ ++ /* Found an invalid translation. \ ++ Store the preceding chars. */ \ ++ " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ ++ " vlgvb %[R_I],%%v24,7\n\t" \ ++ " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ ++ " sll %[R_I],2\n\t" \ ++ " ahi %[R_I],-1\n\t" \ ++ " jl 20f\n\t" \ ++ " lgr %[R_LI],%[R_I]\n\t" \ ++ " vuplhb %%v17,%%v16\n\t" \ ++ " vuplhh %%v19,%%v17\n\t" \ ++ " vstl %%v19,%[R_I],0(%[R_OUT])\n\t" \ ++ " ahi %[R_I],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllh %%v20,%%v17\n\t" \ ++ " vstl %%v20,%[R_I],16(%[R_OUT])\n\t" \ ++ " ahi %[R_I],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllb %%v18,%%v16\n\t" \ ++ " vuplhh %%v21,%%v18\n\t" \ ++ " vstl %%v21,%[R_I],32(%[R_OUT])\n\t" \ ++ " ahi %[R_I],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllh %%v22,%%v18\n\t" \ ++ " vstl %%v22,%[R_I],48(%[R_OUT])\n\t" \ ++ "11: \n\t" \ ++ " la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t" \ ++ "20: \n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (inptr) \ ++ , [R_OUT] "+a" (outptr), [R_I] "=&a" (index) \ ++ , [R_LI] "+a" (loop_count) \ ++ : /* inputs */ [R_BUF] "a" (buf) \ ++ , [R_TBL] "a" (to_ucs1) \ ++ : /* clobber list*/ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v0") ASM_CLOBBER_VR ("v16") \ ++ ASM_CLOBBER_VR ("v17") ASM_CLOBBER_VR ("v18") \ ++ ASM_CLOBBER_VR ("v19") ASM_CLOBBER_VR ("v20") \ ++ ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22") \ ++ ASM_CLOBBER_VR ("v23") ASM_CLOBBER_VR ("v24") \ ++ ); \ ++ /* Error occured? */ \ ++ if (loop_count != 0) \ ++ { \ ++ /* Found an invalid character! */ \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ ++ } \ ++ } \ ++ } ++ ++# define LOOP_NEED_FLAGS ++# include ++ ++/* Next, define the other direction - from UCS4 to 8bit charset. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_TO ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++# define LOOPFCT TO_LOOP ++# define BODY_TO_ORIG \ ++ { \ ++ uint32_t ch = get32 (inptr); \ ++ \ ++ if (__builtin_expect (ch >= sizeof (from_ucs4) / sizeof (from_ucs4[0]), 0)\ ++ || (__builtin_expect (from_ucs4[ch], '\1') == '\0' && ch != 0)) \ ++ { \ ++ UNICODE_TAG_HANDLER (ch, 4); \ ++ \ ++ /* This is an illegal character. */ \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } \ ++ \ ++ *outptr++ = from_ucs4[ch]; \ ++ inptr += 4; \ ++ } ++# define BODY \ ++ { \ ++ if (__builtin_expect (inend - inptr < 64, 1) \ ++ || outend - outptr < 16) \ ++ /* Convert remaining bytes with c code. */ \ ++ BODY_TO_ORIG \ ++ else \ ++ { \ ++ /* Convert 64 ... 1024 bytes at once with tr-instruction. */ \ ++ size_t index, tmp; \ ++ char buf[256]; \ ++ size_t loop_count = (inend - inptr) / 64; \ ++ uint32_t max = sizeof (from_ucs4) / sizeof (from_ucs4[0]); \ ++ if (loop_count > (outend - outptr) / 16) \ ++ loop_count = (outend - outptr) / 16; \ ++ if (loop_count > 16) \ ++ loop_count = 16; \ ++ size_t remaining_loop_count = loop_count; \ ++ /* Step 1: Check for ch>=max, ch == 0 and shorten to bytes. \ ++ (ch == 0 is no error, but is handled differently) */ \ ++ __asm__ volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ /* Setup to check for ch >= max. */ \ ++ " vzero %%v21\n\t" \ ++ " vleih %%v21,-24576,0\n\t" /* element 0: > */ \ ++ " vleih %%v21,-8192,2\n\t" /* element 1: =<> */ \ ++ " vlvgf %%v20,%[R_MAX],0\n\t" /* element 0: val */ \ ++ /* Process in 64byte - 16 characters blocks. */ \ ++ " lghi %[R_I],0\n\t" \ ++ " lghi %[R_TMP],0\n\t" \ ++ "0: \n\t" \ ++ " vlm %%v16,%%v19,0(%[R_IN])\n\t" \ ++ /* Test for ch >= max and ch == 0. */ \ ++ " vstrczfs %%v22,%%v16,%%v20,%%v21\n\t" \ ++ " jno 10f\n\t" \ ++ " vstrczfs %%v22,%%v17,%%v20,%%v21\n\t" \ ++ " jno 11f\n\t" \ ++ " vstrczfs %%v22,%%v18,%%v20,%%v21\n\t" \ ++ " jno 12f\n\t" \ ++ " vstrczfs %%v22,%%v19,%%v20,%%v21\n\t" \ ++ " jno 13f\n\t" \ ++ /* Shorten to byte values. */ \ ++ " vpkf %%v16,%%v16,%%v17\n\t" \ ++ " vpkf %%v18,%%v18,%%v19\n\t" \ ++ " vpkh %%v16,%%v16,%%v18\n\t" \ ++ /* Store 16bytes to buf. */ \ ++ " vst %%v16,0(%[R_I],%[R_BUF])\n\t" \ ++ /* Loop until all blocks are processed. */ \ ++ " la %[R_IN],64(%[R_IN])\n\t" \ ++ " aghi %[R_I],16\n\t" \ ++ " brct %[R_LI],0b\n\t" \ ++ " j 20f\n\t" \ ++ /* Found error ch >= max or ch == 0. */ \ ++ "13: aghi %[R_TMP],4\n\t" \ ++ "12: aghi %[R_TMP],4\n\t" \ ++ "11: aghi %[R_TMP],4\n\t" \ ++ "10: vlgvb %[R_I],%%v22,7\n\t" \ ++ " srlg %[R_I],%[R_I],2\n\t" \ ++ " agr %[R_I],%[R_TMP]\n\t" \ ++ "20: \n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (inptr) \ ++ , [R_I] "=&a" (index) \ ++ , [R_TMP] "=d" (tmp) \ ++ , [R_LI] "+d" (remaining_loop_count) \ ++ : /* inputs */ [R_BUF] "a" (buf) \ ++ , [R_MAX] "d" (max) \ ++ : /* clobber list*/ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ ++ ASM_CLOBBER_VR ("v22") \ ++ ); \ ++ /* Error occured in step 1? An error (ch >= max || ch == 0) \ ++ occured, if remaining_loop_count > 0. The error occured \ ++ at character-index (index) after already processed blocks. */ \ ++ loop_count -= remaining_loop_count; \ ++ if (loop_count > 0) \ ++ { \ ++ /* Step 2: Translate already processed blocks in buf and \ ++ check for errors (from_ucs4[ch] == 0). */ \ ++ __asm__ volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ " sllk %[R_I],%[R_LI],4\n\t" \ ++ " ahi %[R_I],-1\n\t" \ ++ /* Execute tr with correct len. */ \ ++ " exrl %[R_I],21f\n\t" \ ++ /* Post-processing. */ \ ++ " lghi %[R_I],0\n\t" \ ++ "0: \n\t" \ ++ /* Find invalid character - value == 0. */ \ ++ " vl %%v16,0(%[R_I],%[R_BUF])\n\t" \ ++ " vfenezbs %%v17,%%v16,%%v16\n\t" \ ++ " je 10f\n\t" \ ++ /* Store 16bytes to buf_out. */ \ ++ " vst %%v16,0(%[R_I],%[R_OUT])\n\t" \ ++ " aghi %[R_I],16\n\t" \ ++ " brct %[R_LI],0b\n\t" \ ++ " la %[R_OUT],0(%[R_I],%[R_OUT])\n\t" \ ++ " j 20f\n\t" \ ++ "21: tr 0(1,%[R_BUF]),0(%[R_TBL])\n\t" \ ++ /* Found an error: from_ucs4[ch] == 0. */ \ ++ "10: la %[R_OUT],0(%[R_I],%[R_OUT])\n\t" \ ++ " vlgvb %[R_I],%%v17,7\n\t" \ ++ "20: \n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_OUT] "+a" (outptr) \ ++ , [R_I] "=&a" (tmp) \ ++ , [R_LI] "+d" (loop_count) \ ++ : /* inputs */ [R_BUF] "a" (buf) \ ++ , [R_TBL] "a" (from_ucs4) \ ++ : /* clobber list*/ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") \ ++ ASM_CLOBBER_VR ("v17") \ ++ ); \ ++ /* Error occured in processed bytes of step 2? \ ++ Thus possible error in step 1 is obselete.*/ \ ++ if (tmp < 16) \ ++ { \ ++ index = tmp; \ ++ inptr -= loop_count * 64; \ ++ } \ ++ } \ ++ /* Error occured in step 1/2? */ \ ++ if (index < 16) \ ++ { \ ++ /* Found an invalid character (see step 2) or zero \ ++ (see step 1) at index! Convert the chars before index \ ++ manually. If there is a zero at index detected by step 1, \ ++ there could be invalid characters before this zero. */ \ ++ int i; \ ++ uint32_t ch; \ ++ for (i = 0; i < index; i++) \ ++ { \ ++ ch = get32 (inptr); \ ++ if (__builtin_expect (from_ucs4[ch], '\1') == '\0') \ ++ break; \ ++ *outptr++ = from_ucs4[ch]; \ ++ inptr += 4; \ ++ } \ ++ if (i == index) \ ++ { \ ++ ch = get32 (inptr); \ ++ if (ch == 0) \ ++ { \ ++ /* This is no error, but handled differently. */ \ ++ *outptr++ = from_ucs4[ch]; \ ++ inptr += 4; \ ++ continue; \ ++ } \ ++ } \ ++ \ ++ UNICODE_TAG_HANDLER (ch, 4); \ ++ \ ++ /* This is an illegal character. */ \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } \ ++ } \ ++ } ++ ++# define LOOP_NEED_FLAGS ++# include ++ ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__from_generic_c) ++__attribute__ ((ifunc ("__from_generic_resolver"))) ++__from_generic; ++ ++static void * ++__from_generic_resolver (unsigned long int dl_hwcap) ++{ ++ if (sizeof (from_ucs4) / sizeof (from_ucs4[0]) <= 256 ++ && dl_hwcap & HWCAP_S390_VX) ++ return &__from_generic_vx; ++ else ++ return &__from_generic_c; ++} ++ ++__typeof(__to_generic_c) ++__attribute__ ((ifunc ("__to_generic_resolver"))) ++__to_generic; ++ ++static void * ++__to_generic_resolver (unsigned long int dl_hwcap) ++{ ++ if (sizeof (from_ucs4) / sizeof (from_ucs4[0]) <= 256 ++ && dl_hwcap & HWCAP_S390_VX) ++ return &__to_generic_vx; ++ else ++ return &__to_generic_c; ++} ++ ++strong_alias (__to_generic_c_single, __to_generic_single) ++ ++# undef FROM_LOOP ++# undef TO_LOOP ++# define FROM_LOOP __from_generic ++# define TO_LOOP __to_generic ++# include ++ ++#else ++/* Generate this module without ifunc if build environment lacks vector ++ support. Instead the common 8bit-generic.c is used. */ ++# include "iconvdata/8bit-generic.c" ++#endif /* !defined HAVE_S390_VX_ASM_SUPPORT */ +diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile +index 0805b07..11ad2b9 100644 +--- a/sysdeps/s390/multiarch/Makefile ++++ b/sysdeps/s390/multiarch/Makefile +@@ -42,3 +42,13 @@ sysdep_routines += wcslen wcslen-vx wcslen-c \ + wmemset wmemset-vx wmemset-c \ + wmemcmp wmemcmp-vx wmemcmp-c + endif ++ ++ifeq ($(subdir),iconvdata) ++override define generate-8bit-table ++$(make-target-directory) ++LC_ALL=C $(SHELL) ./gen-8bit.sh $< > $(@:stmp=T) ++LC_ALL=C $(SHELL) ../sysdeps/s390/multiarch/gen-8bit.sh $< >> $(@:stmp=T) ++$(move-if-change) $(@:stmp=T) $(@:stmp=h) ++touch $@ ++endef ++endif +diff --git a/sysdeps/s390/multiarch/gen-8bit.sh b/sysdeps/s390/multiarch/gen-8bit.sh +new file mode 100644 +index 0000000..6f88c4b +--- /dev/null ++++ b/sysdeps/s390/multiarch/gen-8bit.sh +@@ -0,0 +1,6 @@ ++#!/bin/sh ++echo "static const uint8_t to_ucs1[256] = {" ++sed -ne '/^[^[:space:]]*[[:space:]]*.x00/d;/^END/q' \ ++ -e 's/^[[:space:]]*.x\(..\).*/ [0x\2] = 0x\1,/p' \ ++ "$@" | sort -u ++echo "};" +diff --git a/sysdeps/s390/multiarch/iconv/skeleton.c b/sysdeps/s390/multiarch/iconv/skeleton.c +new file mode 100644 +index 0000000..3a90031 +--- /dev/null ++++ b/sysdeps/s390/multiarch/iconv/skeleton.c +@@ -0,0 +1,21 @@ ++/* Skeleton for a conversion module - S390 version. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef IGNORE_ICONV_SKELETON ++# include_next ++#endif +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-4.patch b/SOURCES/glibc-rh1380680-4.patch new file mode 100644 index 0000000..e92c75b --- /dev/null +++ b/SOURCES/glibc-rh1380680-4.patch @@ -0,0 +1,1314 @@ +From 53f860e80162b09c44b48f207342c1452289072c Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 15:37:29 +0100 +Subject: [PATCH 04/17] S390: Optimize builtin iconv-modules. + +Upstream commit 3b704e26b33e35d99de920f8462d8e438f89be39 + +This patch introduces a s390 specific gconv_simple.c file which provides +optimized versions for z13 with vector instructions, which will be chosen at +runtime via ifunc. +The optimized conversions can convert between internal and ascii, ucs4, ucs4le, +ucs2, ucs2le. +If the build-environment lacks vector support, then iconv/gconv_simple.c +is used wihtout any change. Otherwise iconvdata/gconv_simple.c is used to create +conversion loop routines without vector instructions as fallback, if vector +instructions aren't available at runtime. + +ChangeLog: + + * sysdeps/s390/multiarch/gconv_simple.c: New File. + * sysdeps/s390/multiarch/Makefile (sysdep_routines): Add gconv_simple. +--- + sysdeps/s390/multiarch/Makefile | 4 + + sysdeps/s390/multiarch/gconv_simple.c | 1266 +++++++++++++++++++++++++++++++++ + 2 files changed, 1270 insertions(+) + create mode 100644 sysdeps/s390/multiarch/gconv_simple.c + +diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile +index 11ad2b9..24949cd 100644 +--- a/sysdeps/s390/multiarch/Makefile ++++ b/sysdeps/s390/multiarch/Makefile +@@ -52,3 +52,7 @@ $(move-if-change) $(@:stmp=T) $(@:stmp=h) + touch $@ + endef + endif ++ ++ifeq ($(subdir),iconv) ++sysdep_routines += gconv_simple ++endif +diff --git a/sysdeps/s390/multiarch/gconv_simple.c b/sysdeps/s390/multiarch/gconv_simple.c +new file mode 100644 +index 0000000..dc53a48 +--- /dev/null ++++ b/sysdeps/s390/multiarch/gconv_simple.c +@@ -0,0 +1,1266 @@ ++/* Simple transformations functions - s390 version. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++# include ++ ++# if defined HAVE_S390_VX_GCC_SUPPORT ++# define ASM_CLOBBER_VR(NR) , NR ++# else ++# define ASM_CLOBBER_VR(NR) ++# endif ++ ++# define ICONV_C_NAME(NAME) __##NAME##_c ++# define ICONV_VX_NAME(NAME) __##NAME##_vx ++# define ICONV_VX_IFUNC(FUNC) \ ++ extern __typeof (ICONV_C_NAME (FUNC)) __##FUNC; \ ++ s390_vx_libc_ifunc (__##FUNC) \ ++ int FUNC (struct __gconv_step *step, struct __gconv_step_data *data, \ ++ const unsigned char **inptrp, const unsigned char *inend, \ ++ unsigned char **outbufstart, size_t *irreversible, \ ++ int do_flush, int consume_incomplete) \ ++ { \ ++ return __##FUNC (step, data, inptrp, inend,outbufstart, \ ++ irreversible, do_flush, consume_incomplete); \ ++ } ++# define ICONV_VX_SINGLE(NAME) \ ++ static __typeof (NAME##_single) __##NAME##_vx_single __attribute__((alias(#NAME "_single"))); ++ ++/* Generate the transformations which are used, if the target machine does not ++ support vector instructions. */ ++# define __gconv_transform_ascii_internal \ ++ ICONV_C_NAME (__gconv_transform_ascii_internal) ++# define __gconv_transform_internal_ascii \ ++ ICONV_C_NAME (__gconv_transform_internal_ascii) ++# define __gconv_transform_internal_ucs4le \ ++ ICONV_C_NAME (__gconv_transform_internal_ucs4le) ++# define __gconv_transform_ucs4_internal \ ++ ICONV_C_NAME (__gconv_transform_ucs4_internal) ++# define __gconv_transform_ucs4le_internal \ ++ ICONV_C_NAME (__gconv_transform_ucs4le_internal) ++# define __gconv_transform_ucs2_internal \ ++ ICONV_C_NAME (__gconv_transform_ucs2_internal) ++# define __gconv_transform_ucs2reverse_internal \ ++ ICONV_C_NAME (__gconv_transform_ucs2reverse_internal) ++# define __gconv_transform_internal_ucs2 \ ++ ICONV_C_NAME (__gconv_transform_internal_ucs2) ++# define __gconv_transform_internal_ucs2reverse \ ++ ICONV_C_NAME (__gconv_transform_internal_ucs2reverse) ++ ++ ++# include ++ ++# undef __gconv_transform_ascii_internal ++# undef __gconv_transform_internal_ascii ++# undef __gconv_transform_internal_ucs4le ++# undef __gconv_transform_ucs4_internal ++# undef __gconv_transform_ucs4le_internal ++# undef __gconv_transform_ucs2_internal ++# undef __gconv_transform_ucs2reverse_internal ++# undef __gconv_transform_internal_ucs2 ++# undef __gconv_transform_internal_ucs2reverse ++ ++/* Now define the functions with vector support. */ ++# if defined __s390x__ ++# define CONVERT_32BIT_SIZE_T(REG) ++# else ++# define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t" ++# endif ++ ++/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ ++# define DEFINE_INIT 0 ++# define DEFINE_FINI 0 ++# define MIN_NEEDED_FROM 1 ++# define MIN_NEEDED_TO 4 ++# define FROM_DIRECTION 1 ++# define FROM_LOOP ICONV_VX_NAME (ascii_internal_loop) ++# define TO_LOOP ICONV_VX_NAME (ascii_internal_loop) /* This is not used. */ ++# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ascii_internal) ++# define ONE_DIRECTION 1 ++ ++# define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++# define LOOPFCT FROM_LOOP ++# define BODY_ORIG_ERROR \ ++ /* The value is too large. We don't try transliteration here since \ ++ this is not an error because of the lack of possibilities to \ ++ represent the result. This is a genuine bug in the input since \ ++ ASCII does not allow such values. */ \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (1); ++ ++# define BODY_ORIG \ ++ { \ ++ if (__glibc_unlikely (*inptr > '\x7f')) \ ++ { \ ++ BODY_ORIG_ERROR \ ++ } \ ++ else \ ++ { \ ++ /* It's an one byte sequence. */ \ ++ *((uint32_t *) outptr) = *inptr++; \ ++ outptr += sizeof (uint32_t); \ ++ } \ ++ } ++# define BODY \ ++ { \ ++ size_t len = inend - inptr; \ ++ if (len > (outend - outptr) / 4) \ ++ len = (outend - outptr) / 4; \ ++ size_t loop_count, tmp; \ ++ __asm__ volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ CONVERT_32BIT_SIZE_T ([R_LEN]) \ ++ " vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \ ++ " srlg %[R_LI],%[R_LEN],4\n\t" \ ++ " vrepib %%v31,0x20\n\t" \ ++ " clgije %[R_LI],0,1f\n\t" \ ++ "0: \n\t" /* Handle 16-byte blocks. */ \ ++ " vl %%v16,0(%[R_IN])\n\t" \ ++ /* Checking for values > 0x7f. */ \ ++ " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ ++ " jno 10f\n\t" \ ++ /* Enlarge to UCS4. */ \ ++ " vuplhb %%v17,%%v16\n\t" \ ++ " vupllb %%v18,%%v16\n\t" \ ++ " vuplhh %%v19,%%v17\n\t" \ ++ " vupllh %%v20,%%v17\n\t" \ ++ " vuplhh %%v21,%%v18\n\t" \ ++ " vupllh %%v22,%%v18\n\t" \ ++ /* Store 64bytes to buf_out. */ \ ++ " vstm %%v19,%%v22,0(%[R_OUT])\n\t" \ ++ " la %[R_IN],16(%[R_IN])\n\t" \ ++ " la %[R_OUT],64(%[R_OUT])\n\t" \ ++ " brctg %[R_LI],0b\n\t" \ ++ " lghi %[R_LI],15\n\t" \ ++ " ngr %[R_LEN],%[R_LI]\n\t" \ ++ " je 20f\n\t" /* Jump away if no remaining bytes. */ \ ++ /* Handle remaining bytes. */ \ ++ "1: aghik %[R_LI],%[R_LEN],-1\n\t" \ ++ " jl 20f\n\t" /* Jump away if no remaining bytes. */ \ ++ " vll %%v16,%[R_LI],0(%[R_IN])\n\t" \ ++ /* Checking for values > 0x7f. */ \ ++ " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ ++ " vlgvb %[R_TMP],%%v17,7\n\t" \ ++ " clr %[R_TMP],%[R_LI]\n\t" \ ++ " locrh %[R_TMP],%[R_LEN]\n\t" \ ++ " locghih %[R_LEN],0\n\t" \ ++ " j 12f\n\t" \ ++ "10:\n\t" \ ++ /* Found a value > 0x7f. \ ++ Store the preceding chars. */ \ ++ " vlgvb %[R_TMP],%%v17,7\n\t" \ ++ "12: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " sllk %[R_TMP],%[R_TMP],2\n\t" \ ++ " ahi %[R_TMP],-1\n\t" \ ++ " jl 20f\n\t" \ ++ " lgr %[R_LI],%[R_TMP]\n\t" \ ++ " vuplhb %%v17,%%v16\n\t" \ ++ " vuplhh %%v19,%%v17\n\t" \ ++ " vstl %%v19,%[R_LI],0(%[R_OUT])\n\t" \ ++ " ahi %[R_LI],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllh %%v20,%%v17\n\t" \ ++ " vstl %%v20,%[R_LI],16(%[R_OUT])\n\t" \ ++ " ahi %[R_LI],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllb %%v18,%%v16\n\t" \ ++ " vuplhh %%v21,%%v18\n\t" \ ++ " vstl %%v21,%[R_LI],32(%[R_OUT])\n\t" \ ++ " ahi %[R_LI],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllh %%v22,%%v18\n\t" \ ++ " vstl %%v22,%[R_LI],48(%[R_OUT])\n\t" \ ++ "11:\n\t" \ ++ " la %[R_OUT],1(%[R_TMP],%[R_OUT])\n\t" \ ++ "20:\n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_OUT] "+a" (outptr) \ ++ , [R_IN] "+a" (inptr) \ ++ , [R_LEN] "+d" (len) \ ++ , [R_LI] "=d" (loop_count) \ ++ , [R_TMP] "=a" (tmp) \ ++ : /* inputs */ \ ++ : /* clobber list*/ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ ++ ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v30") \ ++ ASM_CLOBBER_VR ("v31") \ ++ ); \ ++ if (len > 0) \ ++ { \ ++ /* Found an invalid character at the next input byte. */ \ ++ BODY_ORIG_ERROR \ ++ } \ ++ } ++ ++# define LOOP_NEED_FLAGS ++# include ++# include ++# undef BODY_ORIG ++# undef BODY_ORIG_ERROR ++ICONV_VX_IFUNC (__gconv_transform_ascii_internal) ++ ++/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ ++# define DEFINE_INIT 0 ++# define DEFINE_FINI 0 ++# define MIN_NEEDED_FROM 4 ++# define MIN_NEEDED_TO 1 ++# define FROM_DIRECTION 1 ++# define FROM_LOOP ICONV_VX_NAME (internal_ascii_loop) ++# define TO_LOOP ICONV_VX_NAME (internal_ascii_loop) /* This is not used. */ ++# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ascii) ++# define ONE_DIRECTION 1 ++ ++# define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++# define LOOPFCT FROM_LOOP ++# define BODY_ORIG_ERROR \ ++ UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); ++ ++# define BODY_ORIG \ ++ { \ ++ if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \ ++ { \ ++ BODY_ORIG_ERROR \ ++ } \ ++ else \ ++ { \ ++ /* It's an one byte sequence. */ \ ++ *outptr++ = *((const uint32_t *) inptr); \ ++ inptr += sizeof (uint32_t); \ ++ } \ ++ } ++# define BODY \ ++ { \ ++ size_t len = (inend - inptr) / 4; \ ++ if (len > outend - outptr) \ ++ len = outend - outptr; \ ++ size_t loop_count, tmp, tmp2; \ ++ __asm__ volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ CONVERT_32BIT_SIZE_T ([R_LEN]) \ ++ /* Setup to check for ch > 0x7f. */ \ ++ " vzero %%v21\n\t" \ ++ " srlg %[R_LI],%[R_LEN],4\n\t" \ ++ " vleih %%v21,8192,0\n\t" /* element 0: > */ \ ++ " vleih %%v21,-8192,2\n\t" /* element 1: =<> */ \ ++ " vleif %%v20,127,0\n\t" /* element 0: 127 */ \ ++ " lghi %[R_TMP],0\n\t" \ ++ " clgije %[R_LI],0,1f\n\t" \ ++ "0:\n\t" \ ++ " vlm %%v16,%%v19,0(%[R_IN])\n\t" \ ++ /* Shorten to byte values. */ \ ++ " vpkf %%v23,%%v16,%%v17\n\t" \ ++ " vpkf %%v24,%%v18,%%v19\n\t" \ ++ " vpkh %%v23,%%v23,%%v24\n\t" \ ++ /* Checking for values > 0x7f. */ \ ++ " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ ++ " jno 10f\n\t" \ ++ " vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ ++ " jno 11f\n\t" \ ++ " vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \ ++ " jno 12f\n\t" \ ++ " vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \ ++ " jno 13f\n\t" \ ++ /* Store 16bytes to outptr. */ \ ++ " vst %%v23,0(%[R_OUT])\n\t" \ ++ " la %[R_IN],64(%[R_IN])\n\t" \ ++ " la %[R_OUT],16(%[R_OUT])\n\t" \ ++ " brctg %[R_LI],0b\n\t" \ ++ " lghi %[R_LI],15\n\t" \ ++ " ngr %[R_LEN],%[R_LI]\n\t" \ ++ " je 20f\n\t" /* Jump away if no remaining bytes. */ \ ++ /* Handle remaining bytes. */ \ ++ "1: sllg %[R_LI],%[R_LEN],2\n\t" \ ++ " aghi %[R_LI],-1\n\t" \ ++ " jl 20f\n\t" /* Jump away if no remaining bytes. */ \ ++ /* Load remaining 1...63 bytes. */ \ ++ " vll %%v16,%[R_LI],0(%[R_IN])\n\t" \ ++ " ahi %[R_LI],-16\n\t" \ ++ " jl 2f\n\t" \ ++ " vll %%v17,%[R_LI],16(%[R_IN])\n\t" \ ++ " ahi %[R_LI],-16\n\t" \ ++ " jl 2f\n\t" \ ++ " vll %%v18,%[R_LI],32(%[R_IN])\n\t" \ ++ " ahi %[R_LI],-16\n\t" \ ++ " jl 2f\n\t" \ ++ " vll %%v19,%[R_LI],48(%[R_IN])\n\t" \ ++ "2:\n\t" \ ++ /* Shorten to byte values. */ \ ++ " vpkf %%v23,%%v16,%%v17\n\t" \ ++ " vpkf %%v24,%%v18,%%v19\n\t" \ ++ " vpkh %%v23,%%v23,%%v24\n\t" \ ++ " sllg %[R_LI],%[R_LEN],2\n\t" \ ++ " aghi %[R_LI],-16\n\t" \ ++ " jl 3f\n\t" /* v16 is not fully loaded. */ \ ++ " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ ++ " jno 10f\n\t" \ ++ " aghi %[R_LI],-16\n\t" \ ++ " jl 4f\n\t" /* v17 is not fully loaded. */ \ ++ " vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ ++ " jno 11f\n\t" \ ++ " aghi %[R_LI],-16\n\t" \ ++ " jl 5f\n\t" /* v18 is not fully loaded. */ \ ++ " vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \ ++ " jno 12f\n\t" \ ++ " aghi %[R_LI],-16\n\t" \ ++ /* v19 is not fully loaded. */ \ ++ " lghi %[R_TMP],12\n\t" \ ++ " vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \ ++ "6: vlgvb %[R_I],%%v22,7\n\t" \ ++ " aghi %[R_LI],16\n\t" \ ++ " clrjl %[R_I],%[R_LI],14f\n\t" \ ++ " lgr %[R_I],%[R_LEN]\n\t" \ ++ " lghi %[R_LEN],0\n\t" \ ++ " j 15f\n\t" \ ++ "3: vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ ++ " j 6b\n\t" \ ++ "4: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ ++ " lghi %[R_TMP],4\n\t" \ ++ " j 6b\n\t" \ ++ "5: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ ++ " lghi %[R_TMP],8\n\t" \ ++ " j 6b\n\t" \ ++ /* Found a value > 0x7f. */ \ ++ "13: ahi %[R_TMP],4\n\t" \ ++ "12: ahi %[R_TMP],4\n\t" \ ++ "11: ahi %[R_TMP],4\n\t" \ ++ "10: vlgvb %[R_I],%%v22,7\n\t" \ ++ "14: srlg %[R_I],%[R_I],2\n\t" \ ++ " agr %[R_I],%[R_TMP]\n\t" \ ++ " je 20f\n\t" \ ++ /* Store characters before invalid one... */ \ ++ "15: aghi %[R_I],-1\n\t" \ ++ " vstl %%v23,%[R_I],0(%[R_OUT])\n\t" \ ++ /* ... and update pointers. */ \ ++ " la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \ ++ " sllg %[R_I],%[R_I],2\n\t" \ ++ " la %[R_IN],4(%[R_I],%[R_IN])\n\t" \ ++ "20:\n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_OUT] "+a" (outptr) \ ++ , [R_IN] "+a" (inptr) \ ++ , [R_LEN] "+d" (len) \ ++ , [R_LI] "=d" (loop_count) \ ++ , [R_I] "=a" (tmp2) \ ++ , [R_TMP] "=d" (tmp) \ ++ : /* inputs */ \ ++ : /* clobber list*/ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ ++ ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ ++ ASM_CLOBBER_VR ("v24") \ ++ ); \ ++ if (len > 0) \ ++ { \ ++ /* Found an invalid character > 0x7f at next character. */ \ ++ BODY_ORIG_ERROR \ ++ } \ ++ } ++# define LOOP_NEED_FLAGS ++# include ++# include ++# undef BODY_ORIG ++# undef BODY_ORIG_ERROR ++ICONV_VX_IFUNC (__gconv_transform_internal_ascii) ++ ++ ++/* Convert from internal UCS4 to UCS4 little endian form. */ ++# define DEFINE_INIT 0 ++# define DEFINE_FINI 0 ++# define MIN_NEEDED_FROM 4 ++# define MIN_NEEDED_TO 4 ++# define FROM_DIRECTION 1 ++# define FROM_LOOP ICONV_VX_NAME (internal_ucs4le_loop) ++# define TO_LOOP ICONV_VX_NAME (internal_ucs4le_loop) /* This is not used. */ ++# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ucs4le) ++# define ONE_DIRECTION 0 ++ ++static inline int ++__attribute ((always_inline)) ++ICONV_VX_NAME (internal_ucs4le_loop) (struct __gconv_step *step, ++ struct __gconv_step_data *step_data, ++ const unsigned char **inptrp, ++ const unsigned char *inend, ++ unsigned char **outptrp, ++ unsigned char *outend, ++ size_t *irreversible) ++{ ++ const unsigned char *inptr = *inptrp; ++ unsigned char *outptr = *outptrp; ++ int result; ++ size_t len = MIN (inend - inptr, outend - outptr) / 4; ++ size_t loop_count; ++ __asm__ volatile (".machine push\n\t" ++ ".machine \"z13\"\n\t" ++ ".machinemode \"zarch_nohighgprs\"\n\t" ++ CONVERT_32BIT_SIZE_T ([R_LEN]) ++ " bras %[R_LI],1f\n\t" ++ /* Vector permute mask: */ ++ " .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t" ++ "1: vl %%v20,0(%[R_LI])\n\t" ++ /* Process 64byte (16char) blocks. */ ++ " srlg %[R_LI],%[R_LEN],4\n\t" ++ " clgije %[R_LI],0,10f\n\t" ++ "0: vlm %%v16,%%v19,0(%[R_IN])\n\t" ++ " vperm %%v16,%%v16,%%v16,%%v20\n\t" ++ " vperm %%v17,%%v17,%%v17,%%v20\n\t" ++ " vperm %%v18,%%v18,%%v18,%%v20\n\t" ++ " vperm %%v19,%%v19,%%v19,%%v20\n\t" ++ " vstm %%v16,%%v19,0(%[R_OUT])\n\t" ++ " la %[R_IN],64(%[R_IN])\n\t" ++ " la %[R_OUT],64(%[R_OUT])\n\t" ++ " brctg %[R_LI],0b\n\t" ++ " llgfr %[R_LEN],%[R_LEN]\n\t" ++ " nilf %[R_LEN],15\n\t" ++ /* Process 16byte (4char) blocks. */ ++ "10: srlg %[R_LI],%[R_LEN],2\n\t" ++ " clgije %[R_LI],0,20f\n\t" ++ "11: vl %%v16,0(%[R_IN])\n\t" ++ " vperm %%v16,%%v16,%%v16,%%v20\n\t" ++ " vst %%v16,0(%[R_OUT])\n\t" ++ " la %[R_IN],16(%[R_IN])\n\t" ++ " la %[R_OUT],16(%[R_OUT])\n\t" ++ " brctg %[R_LI],11b\n\t" ++ " nill %[R_LEN],3\n\t" ++ /* Process <16bytes. */ ++ "20: sll %[R_LEN],2\n\t" ++ " ahi %[R_LEN],-1\n\t" ++ " jl 30f\n\t" ++ " vll %%v16,%[R_LEN],0(%[R_IN])\n\t" ++ " vperm %%v16,%%v16,%%v16,%%v20\n\t" ++ " vstl %%v16,%[R_LEN],0(%[R_OUT])\n\t" ++ " la %[R_IN],1(%[R_LEN],%[R_IN])\n\t" ++ " la %[R_OUT],1(%[R_LEN],%[R_OUT])\n\t" ++ "30: \n\t" ++ ".machine pop" ++ : /* outputs */ [R_OUT] "+a" (outptr) ++ , [R_IN] "+a" (inptr) ++ , [R_LI] "=a" (loop_count) ++ , [R_LEN] "+a" (len) ++ : /* inputs */ ++ : /* clobber list*/ "memory", "cc" ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") ++ ASM_CLOBBER_VR ("v20") ++ ); ++ *inptrp = inptr; ++ *outptrp = outptr; ++ ++ /* Determine the status. */ ++ if (*inptrp == inend) ++ result = __GCONV_EMPTY_INPUT; ++ else if (*outptrp + 4 > outend) ++ result = __GCONV_FULL_OUTPUT; ++ else ++ result = __GCONV_INCOMPLETE_INPUT; ++ ++ return result; ++} ++ ++ICONV_VX_SINGLE (internal_ucs4le_loop) ++# include ++ICONV_VX_IFUNC (__gconv_transform_internal_ucs4le) ++ ++ ++/* Transform from UCS4 to the internal, UCS4-like format. Unlike ++ for the other direction we have to check for correct values here. */ ++# define DEFINE_INIT 0 ++# define DEFINE_FINI 0 ++# define MIN_NEEDED_FROM 4 ++# define MIN_NEEDED_TO 4 ++# define FROM_DIRECTION 1 ++# define FROM_LOOP ICONV_VX_NAME (ucs4_internal_loop) ++# define TO_LOOP ICONV_VX_NAME (ucs4_internal_loop) /* This is not used. */ ++# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs4_internal) ++# define ONE_DIRECTION 0 ++ ++ ++static inline int ++__attribute ((always_inline)) ++ICONV_VX_NAME (ucs4_internal_loop) (struct __gconv_step *step, ++ struct __gconv_step_data *step_data, ++ const unsigned char **inptrp, ++ const unsigned char *inend, ++ unsigned char **outptrp, ++ unsigned char *outend, ++ size_t *irreversible) ++{ ++ int flags = step_data->__flags; ++ const unsigned char *inptr = *inptrp; ++ unsigned char *outptr = *outptrp; ++ int result; ++ size_t len, loop_count; ++ do ++ { ++ len = MIN (inend - inptr, outend - outptr) / 4; ++ __asm__ volatile (".machine push\n\t" ++ ".machine \"z13\"\n\t" ++ ".machinemode \"zarch_nohighgprs\"\n\t" ++ CONVERT_32BIT_SIZE_T ([R_LEN]) ++ /* Setup to check for ch > 0x7fffffff. */ ++ " larl %[R_LI],9f\n\t" ++ " vlm %%v20,%%v21,0(%[R_LI])\n\t" ++ " srlg %[R_LI],%[R_LEN],2\n\t" ++ " clgije %[R_LI],0,1f\n\t" ++ /* Process 16byte (4char) blocks. */ ++ "0: vl %%v16,0(%[R_IN])\n\t" ++ " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" ++ " jno 10f\n\t" ++ " vst %%v16,0(%[R_OUT])\n\t" ++ " la %[R_IN],16(%[R_IN])\n\t" ++ " la %[R_OUT],16(%[R_OUT])\n\t" ++ " brctg %[R_LI],0b\n\t" ++ " llgfr %[R_LEN],%[R_LEN]\n\t" ++ " nilf %[R_LEN],3\n\t" ++ /* Process <16bytes. */ ++ "1: sll %[R_LEN],2\n\t" ++ " ahik %[R_LI],%[R_LEN],-1\n\t" ++ " jl 20f\n\t" /* No further bytes available. */ ++ " vll %%v16,%[R_LI],0(%[R_IN])\n\t" ++ " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" ++ " vlgvb %[R_LI],%%v22,7\n\t" ++ " clr %[R_LI],%[R_LEN]\n\t" ++ " locgrhe %[R_LI],%[R_LEN]\n\t" ++ " locghihe %[R_LEN],0\n\t" ++ " j 11f\n\t" ++ /* v20: Vector string range compare values. */ ++ "9: .long 0x7fffffff,0x0,0x0,0x0\n\t" ++ /* v21: Vector string range compare control-bits. ++ element 0: >; element 1: =<> (always true) */ ++ " .long 0x20000000,0xE0000000,0x0,0x0\n\t" ++ /* Found a value > 0x7fffffff. */ ++ "10: vlgvb %[R_LI],%%v22,7\n\t" ++ /* Store characters before invalid one. */ ++ "11: aghi %[R_LI],-1\n\t" ++ " jl 20f\n\t" ++ " vstl %%v16,%[R_LI],0(%[R_OUT])\n\t" ++ " la %[R_IN],1(%[R_LI],%[R_IN])\n\t" ++ " la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t" ++ "20:\n\t" ++ ".machine pop" ++ : /* outputs */ [R_OUT] "+a" (outptr) ++ , [R_IN] "+a" (inptr) ++ , [R_LI] "=a" (loop_count) ++ , [R_LEN] "+d" (len) ++ : /* inputs */ ++ : /* clobber list*/ "memory", "cc" ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20") ++ ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22") ++ ); ++ if (len > 0) ++ { ++ /* The value is too large. We don't try transliteration here since ++ this is not an error because of the lack of possibilities to ++ represent the result. This is a genuine bug in the input since ++ UCS4 does not allow such values. */ ++ if (irreversible == NULL) ++ /* We are transliterating, don't try to correct anything. */ ++ return __GCONV_ILLEGAL_INPUT; ++ ++ if (flags & __GCONV_IGNORE_ERRORS) ++ { ++ /* Just ignore this character. */ ++ ++*irreversible; ++ inptr += 4; ++ continue; ++ } ++ ++ *inptrp = inptr; ++ *outptrp = outptr; ++ return __GCONV_ILLEGAL_INPUT; ++ } ++ } ++ while (len > 0); ++ ++ *inptrp = inptr; ++ *outptrp = outptr; ++ ++ /* Determine the status. */ ++ if (*inptrp == inend) ++ result = __GCONV_EMPTY_INPUT; ++ else if (*outptrp + 4 > outend) ++ result = __GCONV_FULL_OUTPUT; ++ else ++ result = __GCONV_INCOMPLETE_INPUT; ++ ++ return result; ++} ++ ++ICONV_VX_SINGLE (ucs4_internal_loop) ++# include ++ICONV_VX_IFUNC (__gconv_transform_ucs4_internal) ++ ++ ++/* Transform from UCS4-LE to the internal encoding. */ ++# define DEFINE_INIT 0 ++# define DEFINE_FINI 0 ++# define MIN_NEEDED_FROM 4 ++# define MIN_NEEDED_TO 4 ++# define FROM_DIRECTION 1 ++# define FROM_LOOP ICONV_VX_NAME (ucs4le_internal_loop) ++# define TO_LOOP ICONV_VX_NAME (ucs4le_internal_loop) /* This is not used. */ ++# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs4le_internal) ++# define ONE_DIRECTION 0 ++ ++static inline int ++__attribute ((always_inline)) ++ICONV_VX_NAME (ucs4le_internal_loop) (struct __gconv_step *step, ++ struct __gconv_step_data *step_data, ++ const unsigned char **inptrp, ++ const unsigned char *inend, ++ unsigned char **outptrp, ++ unsigned char *outend, ++ size_t *irreversible) ++{ ++ int flags = step_data->__flags; ++ const unsigned char *inptr = *inptrp; ++ unsigned char *outptr = *outptrp; ++ int result; ++ size_t len, loop_count; ++ do ++ { ++ len = MIN (inend - inptr, outend - outptr) / 4; ++ __asm__ volatile (".machine push\n\t" ++ ".machine \"z13\"\n\t" ++ ".machinemode \"zarch_nohighgprs\"\n\t" ++ CONVERT_32BIT_SIZE_T ([R_LEN]) ++ /* Setup to check for ch > 0x7fffffff. */ ++ " larl %[R_LI],9f\n\t" ++ " vlm %%v20,%%v22,0(%[R_LI])\n\t" ++ " srlg %[R_LI],%[R_LEN],2\n\t" ++ " clgije %[R_LI],0,1f\n\t" ++ /* Process 16byte (4char) blocks. */ ++ "0: vl %%v16,0(%[R_IN])\n\t" ++ " vperm %%v16,%%v16,%%v16,%%v22\n\t" ++ " vstrcfs %%v23,%%v16,%%v20,%%v21\n\t" ++ " jno 10f\n\t" ++ " vst %%v16,0(%[R_OUT])\n\t" ++ " la %[R_IN],16(%[R_IN])\n\t" ++ " la %[R_OUT],16(%[R_OUT])\n\t" ++ " brctg %[R_LI],0b\n\t" ++ " llgfr %[R_LEN],%[R_LEN]\n\t" ++ " nilf %[R_LEN],3\n\t" ++ /* Process <16bytes. */ ++ "1: sll %[R_LEN],2\n\t" ++ " ahik %[R_LI],%[R_LEN],-1\n\t" ++ " jl 20f\n\t" /* No further bytes available. */ ++ " vll %%v16,%[R_LI],0(%[R_IN])\n\t" ++ " vperm %%v16,%%v16,%%v16,%%v22\n\t" ++ " vstrcfs %%v23,%%v16,%%v20,%%v21\n\t" ++ " vlgvb %[R_LI],%%v23,7\n\t" ++ " clr %[R_LI],%[R_LEN]\n\t" ++ " locgrhe %[R_LI],%[R_LEN]\n\t" ++ " locghihe %[R_LEN],0\n\t" ++ " j 11f\n\t" ++ /* v20: Vector string range compare values. */ ++ "9: .long 0x7fffffff,0x0,0x0,0x0\n\t" ++ /* v21: Vector string range compare control-bits. ++ element 0: >; element 1: =<> (always true) */ ++ " .long 0x20000000,0xE0000000,0x0,0x0\n\t" ++ /* v22: Vector permute mask. */ ++ " .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t" ++ /* Found a value > 0x7fffffff. */ ++ "10: vlgvb %[R_LI],%%v23,7\n\t" ++ /* Store characters before invalid one. */ ++ "11: aghi %[R_LI],-1\n\t" ++ " jl 20f\n\t" ++ " vstl %%v16,%[R_LI],0(%[R_OUT])\n\t" ++ " la %[R_IN],1(%[R_LI],%[R_IN])\n\t" ++ " la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t" ++ "20:\n\t" ++ ".machine pop" ++ : /* outputs */ [R_OUT] "+a" (outptr) ++ , [R_IN] "+a" (inptr) ++ , [R_LI] "=a" (loop_count) ++ , [R_LEN] "+d" (len) ++ : /* inputs */ ++ : /* clobber list*/ "memory", "cc" ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20") ++ ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22") ++ ASM_CLOBBER_VR ("v23") ++ ); ++ if (len > 0) ++ { ++ /* The value is too large. We don't try transliteration here since ++ this is not an error because of the lack of possibilities to ++ represent the result. This is a genuine bug in the input since ++ UCS4 does not allow such values. */ ++ if (irreversible == NULL) ++ /* We are transliterating, don't try to correct anything. */ ++ return __GCONV_ILLEGAL_INPUT; ++ ++ if (flags & __GCONV_IGNORE_ERRORS) ++ { ++ /* Just ignore this character. */ ++ ++*irreversible; ++ inptr += 4; ++ continue; ++ } ++ ++ *inptrp = inptr; ++ *outptrp = outptr; ++ return __GCONV_ILLEGAL_INPUT; ++ } ++ } ++ while (len > 0); ++ ++ *inptrp = inptr; ++ *outptrp = outptr; ++ ++ /* Determine the status. */ ++ if (*inptrp == inend) ++ result = __GCONV_EMPTY_INPUT; ++ else if (*inptrp + 4 > inend) ++ result = __GCONV_INCOMPLETE_INPUT; ++ else ++ { ++ assert (*outptrp + 4 > outend); ++ result = __GCONV_FULL_OUTPUT; ++ } ++ ++ return result; ++} ++ICONV_VX_SINGLE (ucs4le_internal_loop) ++# include ++ICONV_VX_IFUNC (__gconv_transform_ucs4le_internal) ++ ++/* Convert from UCS2 to the internal (UCS4-like) format. */ ++# define DEFINE_INIT 0 ++# define DEFINE_FINI 0 ++# define MIN_NEEDED_FROM 2 ++# define MIN_NEEDED_TO 4 ++# define FROM_DIRECTION 1 ++# define FROM_LOOP ICONV_VX_NAME (ucs2_internal_loop) ++# define TO_LOOP ICONV_VX_NAME (ucs2_internal_loop) /* This is not used. */ ++# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs2_internal) ++# define ONE_DIRECTION 1 ++ ++# define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++# define LOOPFCT FROM_LOOP ++# define BODY_ORIG_ERROR \ ++ /* Surrogate characters in UCS-2 input are not valid. Reject \ ++ them. (Catching this here is not security relevant.) */ \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (2); ++# define BODY_ORIG \ ++ { \ ++ uint16_t u1 = get16 (inptr); \ ++ \ ++ if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ ++ { \ ++ BODY_ORIG_ERROR \ ++ } \ ++ \ ++ *((uint32_t *) outptr) = u1; \ ++ outptr += sizeof (uint32_t); \ ++ inptr += 2; \ ++ } ++# define BODY \ ++ { \ ++ size_t len, tmp, tmp2; \ ++ len = MIN ((inend - inptr) / 2, (outend - outptr) / 4); \ ++ __asm__ volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ CONVERT_32BIT_SIZE_T ([R_LEN]) \ ++ /* Setup to check for ch >= 0xd800 && ch < 0xe000. */ \ ++ " larl %[R_TMP],9f\n\t" \ ++ " vlm %%v20,%%v21,0(%[R_TMP])\n\t" \ ++ " srlg %[R_TMP],%[R_LEN],3\n\t" \ ++ " clgije %[R_TMP],0,1f\n\t" \ ++ /* Process 16byte (8char) blocks. */ \ ++ "0: vl %%v16,0(%[R_IN])\n\t" \ ++ " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ ++ /* Enlarge UCS2 to UCS4. */ \ ++ " vuplhh %%v17,%%v16\n\t" \ ++ " vupllh %%v18,%%v16\n\t" \ ++ " jno 10f\n\t" \ ++ /* Store 32bytes to buf_out. */ \ ++ " vstm %%v17,%%v18,0(%[R_OUT])\n\t" \ ++ " la %[R_IN],16(%[R_IN])\n\t" \ ++ " la %[R_OUT],32(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP],0b\n\t" \ ++ " llgfr %[R_LEN],%[R_LEN]\n\t" \ ++ " nilf %[R_LEN],7\n\t" \ ++ /* Process <16bytes. */ \ ++ "1: sll %[R_LEN],1\n\t" \ ++ " ahik %[R_TMP],%[R_LEN],-1\n\t" \ ++ " jl 20f\n\t" /* No further bytes available. */ \ ++ " vll %%v16,%[R_TMP],0(%[R_IN])\n\t" \ ++ " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ ++ /* Enlarge UCS2 to UCS4. */ \ ++ " vuplhh %%v17,%%v16\n\t" \ ++ " vupllh %%v18,%%v16\n\t" \ ++ " vlgvb %[R_TMP],%%v19,7\n\t" \ ++ " clr %[R_TMP],%[R_LEN]\n\t" \ ++ " locgrhe %[R_TMP],%[R_LEN]\n\t" \ ++ " locghihe %[R_LEN],0\n\t" \ ++ " j 11f\n\t" \ ++ /* v20: Vector string range compare values. */ \ ++ "9: .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ /* v21: Vector string range compare control-bits. \ ++ element 0: =>; element 1: < */ \ ++ " .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ /* Found an element: ch >= 0xd800 && ch < 0xe000 */ \ ++ "10: vlgvb %[R_TMP],%%v19,7\n\t" \ ++ "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " sll %[R_TMP],1\n\t" \ ++ " lgr %[R_TMP2],%[R_TMP]\n\t" \ ++ " ahi %[R_TMP],-1\n\t" \ ++ " jl 20f\n\t" \ ++ " vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t" \ ++ " ahi %[R_TMP],-16\n\t" \ ++ " jl 19f\n\t" \ ++ " vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t" \ ++ "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t" \ ++ "20: \n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_OUT] "+a" (outptr) \ ++ , [R_IN] "+a" (inptr) \ ++ , [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=a" (tmp2) \ ++ , [R_LEN] "+d" (len) \ ++ : /* inputs */ \ ++ : /* clobber list*/ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ ++ ); \ ++ if (len > 0) \ ++ { \ ++ /* Found an invalid character at next input-char. */ \ ++ BODY_ORIG_ERROR \ ++ } \ ++ } ++ ++# define LOOP_NEED_FLAGS ++# include ++# include ++# undef BODY_ORIG ++# undef BODY_ORIG_ERROR ++ICONV_VX_IFUNC (__gconv_transform_ucs2_internal) ++ ++/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */ ++# define DEFINE_INIT 0 ++# define DEFINE_FINI 0 ++# define MIN_NEEDED_FROM 2 ++# define MIN_NEEDED_TO 4 ++# define FROM_DIRECTION 1 ++# define FROM_LOOP ICONV_VX_NAME (ucs2reverse_internal_loop) ++# define TO_LOOP ICONV_VX_NAME (ucs2reverse_internal_loop) /* This is not used.*/ ++# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs2reverse_internal) ++# define ONE_DIRECTION 1 ++ ++# define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++# define LOOPFCT FROM_LOOP ++# define BODY_ORIG_ERROR \ ++ /* Surrogate characters in UCS-2 input are not valid. Reject \ ++ them. (Catching this here is not security relevant.) */ \ ++ if (! ignore_errors_p ()) \ ++ { \ ++ result = __GCONV_ILLEGAL_INPUT; \ ++ break; \ ++ } \ ++ inptr += 2; \ ++ ++*irreversible; \ ++ continue; ++ ++# define BODY_ORIG \ ++ { \ ++ uint16_t u1 = bswap_16 (get16 (inptr)); \ ++ \ ++ if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ ++ { \ ++ BODY_ORIG_ERROR \ ++ } \ ++ \ ++ *((uint32_t *) outptr) = u1; \ ++ outptr += sizeof (uint32_t); \ ++ inptr += 2; \ ++ } ++# define BODY \ ++ { \ ++ size_t len, tmp, tmp2; \ ++ len = MIN ((inend - inptr) / 2, (outend - outptr) / 4); \ ++ __asm__ volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ CONVERT_32BIT_SIZE_T ([R_LEN]) \ ++ /* Setup to check for ch >= 0xd800 && ch < 0xe000. */ \ ++ " larl %[R_TMP],9f\n\t" \ ++ " vlm %%v20,%%v22,0(%[R_TMP])\n\t" \ ++ " srlg %[R_TMP],%[R_LEN],3\n\t" \ ++ " clgije %[R_TMP],0,1f\n\t" \ ++ /* Process 16byte (8char) blocks. */ \ ++ "0: vl %%v16,0(%[R_IN])\n\t" \ ++ " vperm %%v16,%%v16,%%v16,%%v22\n\t" \ ++ " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ ++ /* Enlarge UCS2 to UCS4. */ \ ++ " vuplhh %%v17,%%v16\n\t" \ ++ " vupllh %%v18,%%v16\n\t" \ ++ " jno 10f\n\t" \ ++ /* Store 32bytes to buf_out. */ \ ++ " vstm %%v17,%%v18,0(%[R_OUT])\n\t" \ ++ " la %[R_IN],16(%[R_IN])\n\t" \ ++ " la %[R_OUT],32(%[R_OUT])\n\t" \ ++ " brctg %[R_TMP],0b\n\t" \ ++ " llgfr %[R_LEN],%[R_LEN]\n\t" \ ++ " nilf %[R_LEN],7\n\t" \ ++ /* Process <16bytes. */ \ ++ "1: sll %[R_LEN],1\n\t" \ ++ " ahik %[R_TMP],%[R_LEN],-1\n\t" \ ++ " jl 20f\n\t" /* No further bytes available. */ \ ++ " vll %%v16,%[R_TMP],0(%[R_IN])\n\t" \ ++ " vperm %%v16,%%v16,%%v16,%%v22\n\t" \ ++ " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ ++ /* Enlarge UCS2 to UCS4. */ \ ++ " vuplhh %%v17,%%v16\n\t" \ ++ " vupllh %%v18,%%v16\n\t" \ ++ " vlgvb %[R_TMP],%%v19,7\n\t" \ ++ " clr %[R_TMP],%[R_LEN]\n\t" \ ++ " locgrhe %[R_TMP],%[R_LEN]\n\t" \ ++ " locghihe %[R_LEN],0\n\t" \ ++ " j 11f\n\t" \ ++ /* v20: Vector string range compare values. */ \ ++ "9: .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ /* v21: Vector string range compare control-bits. \ ++ element 0: =>; element 1: < */ \ ++ " .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ ++ /* v22: Vector permute mask. */ \ ++ " .short 0x0100,0x0302,0x0504,0x0706\n\t" \ ++ " .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t" \ ++ /* Found an element: ch >= 0xd800 && ch < 0xe000 */ \ ++ "10: vlgvb %[R_TMP],%%v19,7\n\t" \ ++ "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " sll %[R_TMP],1\n\t" \ ++ " lgr %[R_TMP2],%[R_TMP]\n\t" \ ++ " ahi %[R_TMP],-1\n\t" \ ++ " jl 20f\n\t" \ ++ " vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t" \ ++ " ahi %[R_TMP],-16\n\t" \ ++ " jl 19f\n\t" \ ++ " vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t" \ ++ "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t" \ ++ "20: \n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_OUT] "+a" (outptr) \ ++ , [R_IN] "+a" (inptr) \ ++ , [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=a" (tmp2) \ ++ , [R_LEN] "+d" (len) \ ++ : /* inputs */ \ ++ : /* clobber list*/ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ ++ ASM_CLOBBER_VR ("v22") \ ++ ); \ ++ if (len > 0) \ ++ { \ ++ /* Found an invalid character at next input-char. */ \ ++ BODY_ORIG_ERROR \ ++ } \ ++ } ++# define LOOP_NEED_FLAGS ++# include ++# include ++# undef BODY_ORIG ++# undef BODY_ORIG_ERROR ++ICONV_VX_IFUNC (__gconv_transform_ucs2reverse_internal) ++ ++/* Convert from the internal (UCS4-like) format to UCS2. */ ++#define DEFINE_INIT 0 ++#define DEFINE_FINI 0 ++#define MIN_NEEDED_FROM 4 ++#define MIN_NEEDED_TO 2 ++#define FROM_DIRECTION 1 ++#define FROM_LOOP ICONV_VX_NAME (internal_ucs2_loop) ++#define TO_LOOP ICONV_VX_NAME (internal_ucs2_loop) /* This is not used. */ ++#define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ucs2) ++#define ONE_DIRECTION 1 ++ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define LOOPFCT FROM_LOOP ++#define BODY_ORIG \ ++ { \ ++ uint32_t val = *((const uint32_t *) inptr); \ ++ \ ++ if (__glibc_unlikely (val >= 0x10000)) \ ++ { \ ++ UNICODE_TAG_HANDLER (val, 4); \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } \ ++ else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ ++ { \ ++ /* Surrogate characters in UCS-4 input are not valid. \ ++ We must catch this, because the UCS-2 output might be \ ++ interpreted as UTF-16 by other programs. If we let \ ++ surrogates pass through, attackers could make a security \ ++ hole exploit by synthesizing any desired plane 1-16 \ ++ character. */ \ ++ result = __GCONV_ILLEGAL_INPUT; \ ++ if (! ignore_errors_p ()) \ ++ break; \ ++ inptr += 4; \ ++ ++*irreversible; \ ++ continue; \ ++ } \ ++ else \ ++ { \ ++ put16 (outptr, val); \ ++ outptr += sizeof (uint16_t); \ ++ inptr += 4; \ ++ } \ ++ } ++# define BODY \ ++ { \ ++ if (__builtin_expect (inend - inptr < 32, 1) \ ++ || outend - outptr < 16) \ ++ /* Convert remaining bytes with c code. */ \ ++ BODY_ORIG \ ++ else \ ++ { \ ++ /* Convert in 32 byte blocks. */ \ ++ size_t loop_count = (inend - inptr) / 32; \ ++ size_t tmp, tmp2; \ ++ if (loop_count > (outend - outptr) / 16) \ ++ loop_count = (outend - outptr) / 16; \ ++ __asm__ volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ CONVERT_32BIT_SIZE_T ([R_LI]) \ ++ " larl %[R_I],3f\n\t" \ ++ " vlm %%v20,%%v23,0(%[R_I])\n\t" \ ++ "0: \n\t" \ ++ " vlm %%v16,%%v17,0(%[R_IN])\n\t" \ ++ /* Shorten UCS4 to UCS2. */ \ ++ " vpkf %%v18,%%v16,%%v17\n\t" \ ++ " vstrcfs %%v19,%%v16,%%v20,%%v21\n\t" \ ++ " jno 11f\n\t" \ ++ "1: vstrcfs %%v19,%%v17,%%v20,%%v21\n\t" \ ++ " jno 10f\n\t" \ ++ /* Store 16bytes to buf_out. */ \ ++ "2: vst %%v18,0(%[R_OUT])\n\t" \ ++ " la %[R_IN],32(%[R_IN])\n\t" \ ++ " la %[R_OUT],16(%[R_OUT])\n\t" \ ++ " brctg %[R_LI],0b\n\t" \ ++ " j 20f\n\t" \ ++ /* Setup to check for ch >= 0xd800. (v20, v21) */ \ ++ "3: .long 0xd800,0xd800,0x0,0x0\n\t" \ ++ " .long 0xa0000000,0xa0000000,0x0,0x0\n\t" \ ++ /* Setup to check for ch >= 0xe000 \ ++ && ch < 0x10000. (v22,v23) */ \ ++ " .long 0xe000,0x10000,0x0,0x0\n\t" \ ++ " .long 0xa0000000,0x40000000,0x0,0x0\n\t" \ ++ /* v16 contains only valid chars. Check in v17: \ ++ ch >= 0xe000 && ch <= 0xffff. */ \ ++ "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t" \ ++ " jo 2b\n\t" /* All ch's in this range, proceed. */ \ ++ " lghi %[R_TMP],16\n\t" \ ++ " j 12f\n\t" \ ++ /* Maybe v16 contains invalid chars. \ ++ Check ch >= 0xe000 && ch <= 0xffff. */ \ ++ "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t" \ ++ " jo 1b\n\t" /* All ch's in this range, proceed. */ \ ++ " lghi %[R_TMP],0\n\t" \ ++ "12: vlgvb %[R_I],%%v19,7\n\t" \ ++ " agr %[R_I],%[R_TMP]\n\t" \ ++ " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ ++ " srl %[R_I],1\n\t" \ ++ " ahi %[R_I],-1\n\t" \ ++ " jl 20f\n\t" \ ++ " vstl %%v18,%[R_I],0(%[R_OUT])\n\t" \ ++ " la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \ ++ "20:\n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_OUT] "+a" (outptr) \ ++ , [R_IN] "+a" (inptr) \ ++ , [R_LI] "+d" (loop_count) \ ++ , [R_I] "=a" (tmp2) \ ++ , [R_TMP] "=d" (tmp) \ ++ : /* inputs */ \ ++ : /* clobber list*/ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ ++ ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ ++ ); \ ++ if (loop_count > 0) \ ++ { \ ++ /* Found an invalid character at next character. */ \ ++ BODY_ORIG \ ++ } \ ++ } \ ++ } ++#define LOOP_NEED_FLAGS ++#include ++#include ++# undef BODY_ORIG ++ICONV_VX_IFUNC (__gconv_transform_internal_ucs2) ++ ++/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */ ++#define DEFINE_INIT 0 ++#define DEFINE_FINI 0 ++#define MIN_NEEDED_FROM 4 ++#define MIN_NEEDED_TO 2 ++#define FROM_DIRECTION 1 ++#define FROM_LOOP ICONV_VX_NAME (internal_ucs2reverse_loop) ++#define TO_LOOP ICONV_VX_NAME (internal_ucs2reverse_loop)/* This is not used.*/ ++#define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ucs2reverse) ++#define ONE_DIRECTION 1 ++ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define LOOPFCT FROM_LOOP ++#define BODY_ORIG \ ++ { \ ++ uint32_t val = *((const uint32_t *) inptr); \ ++ if (__glibc_unlikely (val >= 0x10000)) \ ++ { \ ++ UNICODE_TAG_HANDLER (val, 4); \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } \ ++ else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ ++ { \ ++ /* Surrogate characters in UCS-4 input are not valid. \ ++ We must catch this, because the UCS-2 output might be \ ++ interpreted as UTF-16 by other programs. If we let \ ++ surrogates pass through, attackers could make a security \ ++ hole exploit by synthesizing any desired plane 1-16 \ ++ character. */ \ ++ if (! ignore_errors_p ()) \ ++ { \ ++ result = __GCONV_ILLEGAL_INPUT; \ ++ break; \ ++ } \ ++ inptr += 4; \ ++ ++*irreversible; \ ++ continue; \ ++ } \ ++ else \ ++ { \ ++ put16 (outptr, bswap_16 (val)); \ ++ outptr += sizeof (uint16_t); \ ++ inptr += 4; \ ++ } \ ++ } ++# define BODY \ ++ { \ ++ if (__builtin_expect (inend - inptr < 32, 1) \ ++ || outend - outptr < 16) \ ++ /* Convert remaining bytes with c code. */ \ ++ BODY_ORIG \ ++ else \ ++ { \ ++ /* Convert in 32 byte blocks. */ \ ++ size_t loop_count = (inend - inptr) / 32; \ ++ size_t tmp, tmp2; \ ++ if (loop_count > (outend - outptr) / 16) \ ++ loop_count = (outend - outptr) / 16; \ ++ __asm__ volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ CONVERT_32BIT_SIZE_T ([R_LI]) \ ++ " larl %[R_I],3f\n\t" \ ++ " vlm %%v20,%%v24,0(%[R_I])\n\t" \ ++ "0: \n\t" \ ++ " vlm %%v16,%%v17,0(%[R_IN])\n\t" \ ++ /* Shorten UCS4 to UCS2 and byteswap. */ \ ++ " vpkf %%v18,%%v16,%%v17\n\t" \ ++ " vperm %%v18,%%v18,%%v18,%%v24\n\t" \ ++ " vstrcfs %%v19,%%v16,%%v20,%%v21\n\t" \ ++ " jno 11f\n\t" \ ++ "1: vstrcfs %%v19,%%v17,%%v20,%%v21\n\t" \ ++ " jno 10f\n\t" \ ++ /* Store 16bytes to buf_out. */ \ ++ "2: vst %%v18,0(%[R_OUT])\n\t" \ ++ " la %[R_IN],32(%[R_IN])\n\t" \ ++ " la %[R_OUT],16(%[R_OUT])\n\t" \ ++ " brctg %[R_LI],0b\n\t" \ ++ " j 20f\n\t" \ ++ /* Setup to check for ch >= 0xd800. (v20, v21) */ \ ++ "3: .long 0xd800,0xd800,0x0,0x0\n\t" \ ++ " .long 0xa0000000,0xa0000000,0x0,0x0\n\t" \ ++ /* Setup to check for ch >= 0xe000 \ ++ && ch < 0x10000. (v22,v23) */ \ ++ " .long 0xe000,0x10000,0x0,0x0\n\t" \ ++ " .long 0xa0000000,0x40000000,0x0,0x0\n\t" \ ++ /* Vector permute mask (v24) */ \ ++ " .short 0x0100,0x0302,0x0504,0x0706\n\t" \ ++ " .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t" \ ++ /* v16 contains only valid chars. Check in v17: \ ++ ch >= 0xe000 && ch <= 0xffff. */ \ ++ "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t" \ ++ " jo 2b\n\t" /* All ch's in this range, proceed. */ \ ++ " lghi %[R_TMP],16\n\t" \ ++ " j 12f\n\t" \ ++ /* Maybe v16 contains invalid chars. \ ++ Check ch >= 0xe000 && ch <= 0xffff. */ \ ++ "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t" \ ++ " jo 1b\n\t" /* All ch's in this range, proceed. */ \ ++ " lghi %[R_TMP],0\n\t" \ ++ "12: vlgvb %[R_I],%%v19,7\n\t" \ ++ " agr %[R_I],%[R_TMP]\n\t" \ ++ " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ ++ " srl %[R_I],1\n\t" \ ++ " ahi %[R_I],-1\n\t" \ ++ " jl 20f\n\t" \ ++ " vstl %%v18,%[R_I],0(%[R_OUT])\n\t" \ ++ " la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \ ++ "20:\n\t" \ ++ ".machine pop" \ ++ : /* outputs */ [R_OUT] "+a" (outptr) \ ++ , [R_IN] "+a" (inptr) \ ++ , [R_LI] "+d" (loop_count) \ ++ , [R_I] "=a" (tmp2) \ ++ , [R_TMP] "=d" (tmp) \ ++ : /* inputs */ \ ++ : /* clobber list*/ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ ++ ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ ++ ASM_CLOBBER_VR ("v24") \ ++ ); \ ++ if (loop_count > 0) \ ++ { \ ++ /* Found an invalid character at next character. */ \ ++ BODY_ORIG \ ++ } \ ++ } \ ++ } ++#define LOOP_NEED_FLAGS ++#include ++#include ++# undef BODY_ORIG ++ICONV_VX_IFUNC (__gconv_transform_internal_ucs2reverse) ++ ++ ++#else ++/* Generate the internal transformations without ifunc if build environment ++ lacks vector support. Instead simply include the common version. */ ++# include ++#endif /* !defined HAVE_S390_VX_ASM_SUPPORT */ +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-5.patch b/SOURCES/glibc-rh1380680-5.patch new file mode 100644 index 0000000..117968b --- /dev/null +++ b/SOURCES/glibc-rh1380680-5.patch @@ -0,0 +1,151 @@ +From 832572eac8a661d25efe0f2bcc6a861e2c29c3b8 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 15:50:46 +0100 +Subject: [PATCH 05/17] S390: Optimize iso-8859-1 to ibm037 iconv-module. + +Upstream commit 81c6380887c6d62c56e5f0f85a241f759f58b2fd + +This patch reworks the s390 specific module which used the z900 +translate one to one instruction. Now the g5 translate instruction is used, +because it outperforms the troo instruction. + +ChangeLog: + + * sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c (TROO_LOOP): + Rename to TR_LOOP and usage of tr instead of troo instruction. +--- + sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c | 95 +++++++++++++++++----------- + 1 file changed, 57 insertions(+), 38 deletions(-) + +diff --git a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c +index 58641f5..3b63e6a 100644 +--- a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c ++++ b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c +@@ -1,8 +1,7 @@ + /* Conversion between ISO 8859-1 and IBM037. + +- This module uses the Z900 variant of the Translate One To One +- instruction. +- Copyright (C) 1997-2009 Free Software Foundation, Inc. ++ This module uses the translate instruction. ++ Copyright (C) 1997-2016 Free Software Foundation, Inc. + + Author: Andreas Krebbel + Based on the work by Ulrich Drepper , 1997. +@@ -176,50 +175,70 @@ __attribute__ ((aligned (8))) = + #define MIN_NEEDED_FROM 1 + #define MIN_NEEDED_TO 1 + +-/* The Z900 variant of troo forces us to always specify a test +- character which ends the translation. So if we run into the +- situation where the translation has been interrupted due to the +- test character we translate the character by hand and jump back +- into the instruction. */ +- +-#define TROO_LOOP(TABLE) \ ++#define TR_LOOP(TABLE) \ + { \ +- register const unsigned char test asm ("0") = 0; \ +- register const unsigned char *pTable asm ("1") = TABLE; \ +- register unsigned char *pOutput asm ("2") = outptr; \ +- register uint64_t length asm ("3"); \ +- const unsigned char* pInput = inptr; \ +- uint64_t tmp; \ +- \ +- length = (inend - inptr < outend - outptr \ +- ? inend - inptr : outend - outptr); \ ++ size_t length = (inend - inptr < outend - outptr \ ++ ? inend - inptr : outend - outptr); \ + \ +- asm volatile ("0: \n\t" \ +- " troo %0,%1 \n\t" \ +- " jz 1f \n\t" \ +- " jo 0b \n\t" \ +- " llgc %3,0(%1) \n\t" \ +- " la %3,0(%3,%4) \n\t" \ +- " mvc 0(1,%0),0(%3) \n\t" \ +- " aghi %1,1 \n\t" \ +- " aghi %0,1 \n\t" \ +- " aghi %2,-1 \n\t" \ +- " j 0b \n\t" \ +- "1: \n" \ ++ /* Process in 256 byte blocks. */ \ ++ if (__builtin_expect (length >= 256, 0)) \ ++ { \ ++ size_t blocks = length / 256; \ ++ __asm__ __volatile__("0: mvc 0(256,%[R_OUT]),0(%[R_IN])\n\t" \ ++ " tr 0(256,%[R_OUT]),0(%[R_TBL])\n\t" \ ++ " la %[R_IN],256(%[R_IN])\n\t" \ ++ " la %[R_OUT],256(%[R_OUT])\n\t" \ ++ " brctg %[R_LI],0b\n\t" \ ++ : /* outputs */ [R_IN] "+a" (inptr) \ ++ , [R_OUT] "+a" (outptr), [R_LI] "+d" (blocks) \ ++ : /* inputs */ [R_TBL] "a" (TABLE) \ ++ : /* clobber list */ "memory" \ ++ ); \ ++ length = length % 256; \ ++ } \ + \ +- : "+a" (pOutput), "+a" (pInput), "+d" (length), "=&a" (tmp) \ +- : "a" (pTable), "d" (test) \ +- : "cc"); \ ++ /* Process remaining 0...248 bytes in 8byte blocks. */ \ ++ if (length >= 8) \ ++ { \ ++ size_t blocks = length / 8; \ ++ for (int i = 0; i < blocks; i++) \ ++ { \ ++ outptr[0] = TABLE[inptr[0]]; \ ++ outptr[1] = TABLE[inptr[1]]; \ ++ outptr[2] = TABLE[inptr[2]]; \ ++ outptr[3] = TABLE[inptr[3]]; \ ++ outptr[4] = TABLE[inptr[4]]; \ ++ outptr[5] = TABLE[inptr[5]]; \ ++ outptr[6] = TABLE[inptr[6]]; \ ++ outptr[7] = TABLE[inptr[7]]; \ ++ inptr += 8; \ ++ outptr += 8; \ ++ } \ ++ length = length % 8; \ ++ } \ + \ +- inptr = pInput; \ +- outptr = pOutput; \ ++ /* Process remaining 0...7 bytes. */ \ ++ switch (length) \ ++ { \ ++ case 7: outptr[6] = TABLE[inptr[6]]; \ ++ case 6: outptr[5] = TABLE[inptr[5]]; \ ++ case 5: outptr[4] = TABLE[inptr[4]]; \ ++ case 4: outptr[3] = TABLE[inptr[3]]; \ ++ case 3: outptr[2] = TABLE[inptr[2]]; \ ++ case 2: outptr[1] = TABLE[inptr[1]]; \ ++ case 1: outptr[0] = TABLE[inptr[0]]; \ ++ case 0: break; \ ++ } \ ++ inptr += length; \ ++ outptr += length; \ + } + ++ + /* First define the conversion function from ISO 8859-1 to CP037. */ + #define MIN_NEEDED_INPUT MIN_NEEDED_FROM + #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO + #define LOOPFCT FROM_LOOP +-#define BODY TROO_LOOP (table_iso8859_1_to_cp037) ++#define BODY TR_LOOP (table_iso8859_1_to_cp037) + + #include + +@@ -228,7 +247,7 @@ __attribute__ ((aligned (8))) = + #define MIN_NEEDED_INPUT MIN_NEEDED_TO + #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM + #define LOOPFCT TO_LOOP +-#define BODY TROO_LOOP (table_cp037_iso8859_1); ++#define BODY TR_LOOP (table_cp037_iso8859_1); + + #include + +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-6.patch b/SOURCES/glibc-rh1380680-6.patch new file mode 100644 index 0000000..9a9cee4 --- /dev/null +++ b/SOURCES/glibc-rh1380680-6.patch @@ -0,0 +1,320 @@ +From 6806b6f3b4870204737e5d465bab2fdbc1c15de0 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 16:03:46 +0100 +Subject: [PATCH 06/17] Use glibc_likely instead __builtin_expect. + +Upstream commit a1ffb40e32741f992c743e7b16c061fefa3747ac + +This part is a prerequirement for the s390 iconv patches. +--- + sysdeps/s390/s390-64/utf16-utf32-z9.c | 10 +++++----- + sysdeps/s390/s390-64/utf8-utf16-z9.c | 26 +++++++++++++------------- + sysdeps/s390/s390-64/utf8-utf32-z9.c | 32 ++++++++++++++++---------------- + 3 files changed, 34 insertions(+), 34 deletions(-) + +diff --git a/sysdeps/s390/s390-64/utf16-utf32-z9.c b/sysdeps/s390/s390-64/utf16-utf32-z9.c +index 9eaa1a5..94a1a33 100644 +--- a/sysdeps/s390/s390-64/utf16-utf32-z9.c ++++ b/sysdeps/s390/s390-64/utf16-utf32-z9.c +@@ -54,7 +54,7 @@ + if (dir == to_utf16) \ + { \ + /* Emit the UTF-16 Byte Order Mark. */ \ +- if (__builtin_expect (outbuf + 2 > outend, 0)) \ ++ if (__glibc_unlikely (outbuf + 2 > outend)) \ + return __GCONV_FULL_OUTPUT; \ + \ + put16u (outbuf, BOM_UTF16); \ +@@ -63,7 +63,7 @@ + else \ + { \ + /* Emit the UTF-32 Byte Order Mark. */ \ +- if (__builtin_expect (outbuf + 4 > outend, 0)) \ ++ if (__glibc_unlikely (outbuf + 4 > outend)) \ + return __GCONV_FULL_OUTPUT; \ + \ + put32u (outbuf, BOM_UTF32); \ +@@ -236,13 +236,13 @@ gconv_end (struct __gconv_step *data) + { \ + /* An isolated low-surrogate was found. This has to be \ + considered ill-formed. */ \ +- if (__builtin_expect (u1 >= 0xdc00, 0)) \ ++ if (__glibc_unlikely (u1 >= 0xdc00)) \ + { \ + STANDARD_FROM_LOOP_ERR_HANDLER (2); \ + } \ + /* It's a surrogate character. At least the first word says \ + it is. */ \ +- if (__builtin_expect (inptr + 4 > inend, 0)) \ ++ if (__glibc_unlikely (inptr + 4 > inend)) \ + { \ + /* We don't have enough input for another complete input \ + character. */ \ +@@ -306,7 +306,7 @@ gconv_end (struct __gconv_step *data) + uint16_t out; \ + \ + /* Generate a surrogate character. */ \ +- if (__builtin_expect (outptr + 4 > outend, 0)) \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ +diff --git a/sysdeps/s390/s390-64/utf8-utf16-z9.c b/sysdeps/s390/s390-64/utf8-utf16-z9.c +index 9f59177..8e0515c 100644 +--- a/sysdeps/s390/s390-64/utf8-utf16-z9.c ++++ b/sysdeps/s390/s390-64/utf8-utf16-z9.c +@@ -50,7 +50,7 @@ + && data->__invocation_counter == 0) \ + { \ + /* Emit the UTF-16 Byte Order Mark. */ \ +- if (__builtin_expect (outbuf + 2 > outend, 0)) \ ++ if (__glibc_unlikely (outbuf + 2 > outend)) \ + return __GCONV_FULL_OUTPUT; \ + \ + put16u (outbuf, BOM_UTF16); \ +@@ -197,7 +197,7 @@ gconv_end (struct __gconv_step *data) + if ((inptr[i] & 0xc0) != 0x80) \ + break; \ + \ +- if (__builtin_expect (inptr + i == inend, 1)) \ ++ if (__glibc_likely (inptr + i == inend)) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ +@@ -210,7 +210,7 @@ gconv_end (struct __gconv_step *data) + /* Next input byte. */ \ + uint16_t ch = *inptr; \ + \ +- if (__builtin_expect (ch < 0x80, 1)) \ ++ if (__glibc_likely (ch < 0x80)) \ + { \ + /* One byte sequence. */ \ + ++inptr; \ +@@ -228,13 +228,13 @@ gconv_end (struct __gconv_step *data) + cnt = 2; \ + ch &= 0x1f; \ + } \ +- else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ ++ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ +- else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ ++ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ +@@ -255,7 +255,7 @@ gconv_end (struct __gconv_step *data) + STANDARD_FROM_LOOP_ERR_HANDLER (i); \ + } \ + \ +- if (__builtin_expect (inptr + cnt > inend, 0)) \ ++ if (__glibc_unlikely (inptr + cnt > inend)) \ + { \ + /* We don't have enough input. But before we report \ + that check that all the bytes are correct. */ \ +@@ -263,7 +263,7 @@ gconv_end (struct __gconv_step *data) + if ((inptr[i] & 0xc0) != 0x80) \ + break; \ + \ +- if (__builtin_expect (inptr + i == inend, 1)) \ ++ if (__glibc_likely (inptr + i == inend)) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ +@@ -278,7 +278,7 @@ gconv_end (struct __gconv_step *data) + low) are needed. */ \ + uint16_t zabcd, high, low; \ + \ +- if (__builtin_expect (outptr + 4 > outend, 0)) \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ +@@ -368,7 +368,7 @@ gconv_end (struct __gconv_step *data) + \ + uint16_t c = get16 (inptr); \ + \ +- if (__builtin_expect (c <= 0x007f, 1)) \ ++ if (__glibc_likely (c <= 0x007f)) \ + { \ + /* Single byte UTF-8 char. */ \ + *outptr = c & 0xff; \ +@@ -378,7 +378,7 @@ gconv_end (struct __gconv_step *data) + { \ + /* Two byte UTF-8 char. */ \ + \ +- if (__builtin_expect (outptr + 2 > outend, 0)) \ ++ if (__glibc_unlikely (outptr + 2 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ +@@ -397,7 +397,7 @@ gconv_end (struct __gconv_step *data) + { \ + /* Three byte UTF-8 char. */ \ + \ +- if (__builtin_expect (outptr + 3 > outend, 0)) \ ++ if (__glibc_unlikely (outptr + 3 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ +@@ -419,14 +419,14 @@ gconv_end (struct __gconv_step *data) + /* Four byte UTF-8 char. */ \ + uint16_t low, uvwxy; \ + \ +- if (__builtin_expect (outptr + 4 > outend, 0)) \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + inptr += 2; \ +- if (__builtin_expect (inptr + 2 > inend, 0)) \ ++ if (__glibc_unlikely (inptr + 2 > inend)) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ +diff --git a/sysdeps/s390/s390-64/utf8-utf32-z9.c b/sysdeps/s390/s390-64/utf8-utf32-z9.c +index a807980..c657a38 100644 +--- a/sysdeps/s390/s390-64/utf8-utf32-z9.c ++++ b/sysdeps/s390/s390-64/utf8-utf32-z9.c +@@ -52,7 +52,7 @@ + && data->__invocation_counter == 0) \ + { \ + /* Emit the Byte Order Mark. */ \ +- if (__builtin_expect (outbuf + 4 > outend, 0)) \ ++ if (__glibc_unlikely (outbuf + 4 > outend)) \ + return __GCONV_FULL_OUTPUT; \ + \ + put32u (outbuf, BOM); \ +@@ -201,7 +201,7 @@ gconv_end (struct __gconv_step *data) + if ((inptr[i] & 0xc0) != 0x80) \ + break; \ + \ +- if (__builtin_expect (inptr + i == inend, 1)) \ ++ if (__glibc_likely (inptr + i == inend)) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ +@@ -214,7 +214,7 @@ gconv_end (struct __gconv_step *data) + /* Next input byte. */ \ + uint32_t ch = *inptr; \ + \ +- if (__builtin_expect (ch < 0x80, 1)) \ ++ if (__glibc_likely (ch < 0x80)) \ + { \ + /* One byte sequence. */ \ + ++inptr; \ +@@ -232,25 +232,25 @@ gconv_end (struct __gconv_step *data) + cnt = 2; \ + ch &= 0x1f; \ + } \ +- else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ ++ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ +- else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ ++ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ + ch &= 0x07; \ + } \ +- else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ ++ else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ + { \ + /* We expect five bytes. */ \ + cnt = 5; \ + ch &= 0x03; \ + } \ +- else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \ ++ else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \ + { \ + /* We expect six bytes. */ \ + cnt = 6; \ +@@ -271,7 +271,7 @@ gconv_end (struct __gconv_step *data) + STANDARD_FROM_LOOP_ERR_HANDLER (i); \ + } \ + \ +- if (__builtin_expect (inptr + cnt > inend, 0)) \ ++ if (__glibc_unlikely (inptr + cnt > inend)) \ + { \ + /* We don't have enough input. But before we report \ + that check that all the bytes are correct. */ \ +@@ -279,7 +279,7 @@ gconv_end (struct __gconv_step *data) + if ((inptr[i] & 0xc0) != 0x80) \ + break; \ + \ +- if (__builtin_expect (inptr + i == inend, 1)) \ ++ if (__glibc_likely (inptr + i == inend)) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ +@@ -338,19 +338,19 @@ gconv_end (struct __gconv_step *data) + cnt = 2; \ + ch &= 0x1f; \ + } \ +- else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ ++ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ +- else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ ++ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ + ch &= 0x07; \ + } \ +- else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ ++ else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ + { \ + /* We expect five bytes. */ \ + cnt = 5; \ +@@ -431,7 +431,7 @@ gconv_end (struct __gconv_step *data) + \ + uint32_t wc = *((const uint32_t *) inptr); \ + \ +- if (__builtin_expect (wc <= 0x7f, 1)) \ ++ if (__glibc_likely (wc <= 0x7f)) \ + { \ + /* Single UTF-8 char. */ \ + *outptr = (uint8_t)wc; \ +@@ -440,7 +440,7 @@ gconv_end (struct __gconv_step *data) + else if (wc <= 0x7ff) \ + { \ + /* Two UTF-8 chars. */ \ +- if (__builtin_expect (outptr + 2 > outend, 0)) \ ++ if (__glibc_unlikely (outptr + 2 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ +@@ -458,7 +458,7 @@ gconv_end (struct __gconv_step *data) + else if (wc <= 0xffff) \ + { \ + /* Three UTF-8 chars. */ \ +- if (__builtin_expect (outptr + 3 > outend, 0)) \ ++ if (__glibc_unlikely (outptr + 3 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ +@@ -478,7 +478,7 @@ gconv_end (struct __gconv_step *data) + else if (wc <= 0x10ffff) \ + { \ + /* Four UTF-8 chars. */ \ +- if (__builtin_expect (outptr + 4 > outend, 0)) \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-7.patch b/SOURCES/glibc-rh1380680-7.patch new file mode 100644 index 0000000..3ea9f2b --- /dev/null +++ b/SOURCES/glibc-rh1380680-7.patch @@ -0,0 +1,63 @@ +From d2cd5e641efe08a4f5f467ec16297a704a391e81 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 16:06:01 +0100 +Subject: [PATCH 07/17] S390: Fix remaining ONE_DIRECTION warning messages + +upstream commit f349489e7e5b5341a9c1a590e9a41c2e07d3bdbb + +This patch fixes the remaining ONE_DIRECTION warnings for s390 specific conversions. +It defines ONE_DIRECTION to 0 like the patch from Steve Ellcey: +https://www.sourceware.org/ml/libc-alpha/2014-05/msg00039.html + + Changelog: + * sysdeps/s390/s390-64/utf16-utf32-z9.c + (ONE_DIRECTION): Define. + * sysdeps/s390/s390-64/utf8-utf16-z9.c + (ONE_DIRECTION): Define. + * sysdeps/s390/s390-64/utf8-utf32-z9.c + (ONE_DIRECTION): Define. +--- + sysdeps/s390/s390-64/utf16-utf32-z9.c | 1 + + sysdeps/s390/s390-64/utf8-utf16-z9.c | 1 + + sysdeps/s390/s390-64/utf8-utf32-z9.c | 1 + + 3 files changed, 3 insertions(+) + +diff --git a/sysdeps/s390/s390-64/utf16-utf32-z9.c b/sysdeps/s390/s390-64/utf16-utf32-z9.c +index 94a1a33..ddc42fe 100644 +--- a/sysdeps/s390/s390-64/utf16-utf32-z9.c ++++ b/sysdeps/s390/s390-64/utf16-utf32-z9.c +@@ -44,6 +44,7 @@ + #define FROM_LOOP from_utf16_loop + #define TO_LOOP to_utf16_loop + #define FROM_DIRECTION (dir == from_utf16) ++#define ONE_DIRECTION 0 + #define PREPARE_LOOP \ + enum direction dir = ((struct utf16_data *) step->__data)->dir; \ + int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \ +diff --git a/sysdeps/s390/s390-64/utf8-utf16-z9.c b/sysdeps/s390/s390-64/utf8-utf16-z9.c +index 8e0515c..2c2d92c 100644 +--- a/sysdeps/s390/s390-64/utf8-utf16-z9.c ++++ b/sysdeps/s390/s390-64/utf8-utf16-z9.c +@@ -42,6 +42,7 @@ + #define FROM_LOOP from_utf8_loop + #define TO_LOOP to_utf8_loop + #define FROM_DIRECTION (dir == from_utf8) ++#define ONE_DIRECTION 0 + #define PREPARE_LOOP \ + enum direction dir = ((struct utf8_data *) step->__data)->dir; \ + int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \ +diff --git a/sysdeps/s390/s390-64/utf8-utf32-z9.c b/sysdeps/s390/s390-64/utf8-utf32-z9.c +index c657a38..c582155 100644 +--- a/sysdeps/s390/s390-64/utf8-utf32-z9.c ++++ b/sysdeps/s390/s390-64/utf8-utf32-z9.c +@@ -44,6 +44,7 @@ + #define FROM_LOOP from_utf8_loop + #define TO_LOOP to_utf8_loop + #define FROM_DIRECTION (dir == from_utf8) ++#define ONE_DIRECTION 0 + #define PREPARE_LOOP \ + enum direction dir = ((struct utf8_data *) step->__data)->dir; \ + int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \ +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-8.patch b/SOURCES/glibc-rh1380680-8.patch new file mode 100644 index 0000000..92d5889 --- /dev/null +++ b/SOURCES/glibc-rh1380680-8.patch @@ -0,0 +1,140 @@ +From a6e4fa635fea4576c65747cc4b9e3a1fe9c9911f Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 16:12:24 +0100 +Subject: [PATCH 08/17] S390: Use __asm__ instead of asm. + +upstream commit 31cf39421bae23ffc7b6c6a229e14f8faa41608f + +This part is a prerequirement for the s390 iconv patches. +--- + sysdeps/s390/s390-64/utf16-utf32-z9.c | 28 ++++++++++++++-------------- + sysdeps/s390/s390-64/utf8-utf16-z9.c | 28 ++++++++++++++-------------- + sysdeps/s390/s390-64/utf8-utf32-z9.c | 28 ++++++++++++++-------------- + 3 files changed, 42 insertions(+), 42 deletions(-) + +diff --git a/sysdeps/s390/s390-64/utf16-utf32-z9.c b/sysdeps/s390/s390-64/utf16-utf32-z9.c +index ddc42fe..e6a033d 100644 +--- a/sysdeps/s390/s390-64/utf16-utf32-z9.c ++++ b/sysdeps/s390/s390-64/utf16-utf32-z9.c +@@ -163,22 +163,22 @@ gconv_end (struct __gconv_step *data) + directions. */ + #define HARDWARE_CONVERT(INSTRUCTION) \ + { \ +- register const unsigned char* pInput asm ("8") = inptr; \ +- register unsigned long long inlen asm ("9") = inend - inptr; \ +- register unsigned char* pOutput asm ("10") = outptr; \ +- register unsigned long long outlen asm("11") = outend - outptr; \ ++ register const unsigned char* pInput __asm__ ("8") = inptr; \ ++ register unsigned long long inlen __asm__ ("9") = inend - inptr; \ ++ register unsigned char* pOutput __asm__ ("10") = outptr; \ ++ register unsigned long long outlen __asm__("11") = outend - outptr; \ + uint64_t cc = 0; \ + \ +- asm volatile (".machine push \n\t" \ +- ".machine \"z9-109\" \n\t" \ +- "0: " INSTRUCTION " \n\t" \ +- ".machine pop \n\t" \ +- " jo 0b \n\t" \ +- " ipm %2 \n" \ +- : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ +- "+d" (outlen), "+d" (inlen) \ +- : \ +- : "cc", "memory"); \ ++ __asm__ volatile (".machine push \n\t" \ ++ ".machine \"z9-109\" \n\t" \ ++ "0: " INSTRUCTION " \n\t" \ ++ ".machine pop \n\t" \ ++ " jo 0b \n\t" \ ++ " ipm %2 \n" \ ++ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ ++ "+d" (outlen), "+d" (inlen) \ ++ : \ ++ : "cc", "memory"); \ + \ + inptr = pInput; \ + outptr = pOutput; \ +diff --git a/sysdeps/s390/s390-64/utf8-utf16-z9.c b/sysdeps/s390/s390-64/utf8-utf16-z9.c +index 2c2d92c..6dad1c2 100644 +--- a/sysdeps/s390/s390-64/utf8-utf16-z9.c ++++ b/sysdeps/s390/s390-64/utf8-utf16-z9.c +@@ -145,22 +145,22 @@ gconv_end (struct __gconv_step *data) + directions. */ + #define HARDWARE_CONVERT(INSTRUCTION) \ + { \ +- register const unsigned char* pInput asm ("8") = inptr; \ +- register unsigned long long inlen asm ("9") = inend - inptr; \ +- register unsigned char* pOutput asm ("10") = outptr; \ +- register unsigned long long outlen asm("11") = outend - outptr; \ ++ register const unsigned char* pInput __asm__ ("8") = inptr; \ ++ register unsigned long long inlen __asm__ ("9") = inend - inptr; \ ++ register unsigned char* pOutput __asm__ ("10") = outptr; \ ++ register unsigned long long outlen __asm__("11") = outend - outptr; \ + uint64_t cc = 0; \ + \ +- asm volatile (".machine push \n\t" \ +- ".machine \"z9-109\" \n\t" \ +- "0: " INSTRUCTION " \n\t" \ +- ".machine pop \n\t" \ +- " jo 0b \n\t" \ +- " ipm %2 \n" \ +- : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ +- "+d" (outlen), "+d" (inlen) \ +- : \ +- : "cc", "memory"); \ ++ __asm__ volatile (".machine push \n\t" \ ++ ".machine \"z9-109\" \n\t" \ ++ "0: " INSTRUCTION " \n\t" \ ++ ".machine pop \n\t" \ ++ " jo 0b \n\t" \ ++ " ipm %2 \n" \ ++ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ ++ "+d" (outlen), "+d" (inlen) \ ++ : \ ++ : "cc", "memory"); \ + \ + inptr = pInput; \ + outptr = pOutput; \ +diff --git a/sysdeps/s390/s390-64/utf8-utf32-z9.c b/sysdeps/s390/s390-64/utf8-utf32-z9.c +index c582155..721279e 100644 +--- a/sysdeps/s390/s390-64/utf8-utf32-z9.c ++++ b/sysdeps/s390/s390-64/utf8-utf32-z9.c +@@ -149,22 +149,22 @@ gconv_end (struct __gconv_step *data) + directions. */ + #define HARDWARE_CONVERT(INSTRUCTION) \ + { \ +- register const unsigned char* pInput asm ("8") = inptr; \ +- register unsigned long long inlen asm ("9") = inend - inptr; \ +- register unsigned char* pOutput asm ("10") = outptr; \ +- register unsigned long long outlen asm("11") = outend - outptr; \ ++ register const unsigned char* pInput __asm__ ("8") = inptr; \ ++ register unsigned long long inlen __asm__ ("9") = inend - inptr; \ ++ register unsigned char* pOutput __asm__ ("10") = outptr; \ ++ register unsigned long long outlen __asm__("11") = outend - outptr; \ + uint64_t cc = 0; \ + \ +- asm volatile (".machine push \n\t" \ +- ".machine \"z9-109\" \n\t" \ +- "0: " INSTRUCTION " \n\t" \ +- ".machine pop \n\t" \ +- " jo 0b \n\t" \ +- " ipm %2 \n" \ +- : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ +- "+d" (outlen), "+d" (inlen) \ +- : \ +- : "cc", "memory"); \ ++ __asm__ volatile (".machine push \n\t" \ ++ ".machine \"z9-109\" \n\t" \ ++ "0: " INSTRUCTION " \n\t" \ ++ ".machine pop \n\t" \ ++ " jo 0b \n\t" \ ++ " ipm %2 \n" \ ++ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ ++ "+d" (outlen), "+d" (inlen) \ ++ : \ ++ : "cc", "memory"); \ + \ + inptr = pInput; \ + outptr = pOutput; \ +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1380680-9.patch b/SOURCES/glibc-rh1380680-9.patch new file mode 100644 index 0000000..2b2ac9e --- /dev/null +++ b/SOURCES/glibc-rh1380680-9.patch @@ -0,0 +1,840 @@ +From 835c3bf23a119a7fcb8c70734d4fdf49461d8195 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Mon, 7 Nov 2016 16:14:07 +0100 +Subject: [PATCH 09/17] S390: Optimize utf8-utf32 module. + +Upstream commit 421c5278d83e72740150259960a431706ac343f9 + +This patch reworks the s390 specific module to convert between utf8 and utf32. +Now ifunc is used to choose either the c or etf3eh (with convert utf +instruction) variants at runtime. +Furthermore a new vector variant for z13 is introduced which will be build +and chosen if vector support is available at build / runtime. +The vector variants optimize input of 1byte utf8 characters. The convert utf +instruction is used if a multibyte utf8 character is found. + +This patch also fixes some whitespace errors. The c variants are rejecting +UTF-16 surrogates and values above 0x10ffff now. +Furthermore, the etf3eh variants are handling the "UTF-xx//IGNORE" case now. +Before they ignored the ignore-case and always stopped at an error. + +ChangeLog: + + * sysdeps/s390/s390-64/utf8-utf32-z9.c: Use ifunc to select c, etf3eh + or new vector loop-variant. +--- + sysdeps/s390/s390-64/utf8-utf32-z9.c | 664 +++++++++++++++++++++++++---------- + 1 file changed, 480 insertions(+), 184 deletions(-) + +diff --git a/sysdeps/s390/s390-64/utf8-utf32-z9.c b/sysdeps/s390/s390-64/utf8-utf32-z9.c +index 721279e..1ce5ac5 100644 +--- a/sysdeps/s390/s390-64/utf8-utf32-z9.c ++++ b/sysdeps/s390/s390-64/utf8-utf32-z9.c +@@ -30,35 +30,25 @@ + #include + #include + +-/* UTF-32 big endian byte order mark. */ +-#define BOM 0x0000feffu ++#if defined HAVE_S390_VX_GCC_SUPPORT ++# define ASM_CLOBBER_VR(NR) , NR ++#else ++# define ASM_CLOBBER_VR(NR) ++#endif + ++/* Defines for skeleton.c. */ + #define DEFINE_INIT 0 + #define DEFINE_FINI 0 +-/* These definitions apply to the UTF-8 to UTF-32 direction. The +- software implementation for UTF-8 still supports multibyte +- characters up to 6 bytes whereas the hardware variant does not. */ + #define MIN_NEEDED_FROM 1 + #define MAX_NEEDED_FROM 6 + #define MIN_NEEDED_TO 4 +-#define FROM_LOOP from_utf8_loop +-#define TO_LOOP to_utf8_loop ++#define FROM_LOOP __from_utf8_loop ++#define TO_LOOP __to_utf8_loop + #define FROM_DIRECTION (dir == from_utf8) + #define ONE_DIRECTION 0 +-#define PREPARE_LOOP \ +- enum direction dir = ((struct utf8_data *) step->__data)->dir; \ +- int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \ +- \ +- if (emit_bom && !data->__internal_use \ +- && data->__invocation_counter == 0) \ +- { \ +- /* Emit the Byte Order Mark. */ \ +- if (__glibc_unlikely (outbuf + 4 > outend)) \ +- return __GCONV_FULL_OUTPUT; \ +- \ +- put32u (outbuf, BOM); \ +- outbuf += 4; \ +- } ++ ++/* UTF-32 big endian byte order mark. */ ++#define BOM 0x0000feffu + + /* Direction of the transformation. */ + enum direction +@@ -155,16 +145,16 @@ gconv_end (struct __gconv_step *data) + register unsigned long long outlen __asm__("11") = outend - outptr; \ + uint64_t cc = 0; \ + \ +- __asm__ volatile (".machine push \n\t" \ +- ".machine \"z9-109\" \n\t" \ +- "0: " INSTRUCTION " \n\t" \ +- ".machine pop \n\t" \ +- " jo 0b \n\t" \ +- " ipm %2 \n" \ +- : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ +- "+d" (outlen), "+d" (inlen) \ +- : \ +- : "cc", "memory"); \ ++ __asm__ __volatile__ (".machine push \n\t" \ ++ ".machine \"z9-109\" \n\t" \ ++ "0: " INSTRUCTION " \n\t" \ ++ ".machine pop \n\t" \ ++ " jo 0b \n\t" \ ++ " ipm %2 \n" \ ++ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ ++ "+d" (outlen), "+d" (inlen) \ ++ : \ ++ : "cc", "memory"); \ + \ + inptr = pInput; \ + outptr = pOutput; \ +@@ -173,49 +163,150 @@ gconv_end (struct __gconv_step *data) + if (cc == 1) \ + { \ + result = __GCONV_FULL_OUTPUT; \ +- break; \ + } \ + else if (cc == 2) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ +- break; \ + } \ + } + ++#define PREPARE_LOOP \ ++ enum direction dir = ((struct utf8_data *) step->__data)->dir; \ ++ int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \ ++ \ ++ if (emit_bom && !data->__internal_use \ ++ && data->__invocation_counter == 0) \ ++ { \ ++ /* Emit the Byte Order Mark. */ \ ++ if (__glibc_unlikely (outbuf + 4 > outend)) \ ++ return __GCONV_FULL_OUTPUT; \ ++ \ ++ put32u (outbuf, BOM); \ ++ outbuf += 4; \ ++ } ++ + /* Conversion function from UTF-8 to UTF-32 internal/BE. */ + +-#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +-#define LOOPFCT FROM_LOOP +-/* The software routine is copied from gconv_simple.c. */ +-#define BODY \ ++#define STORE_REST_COMMON \ ++ { \ ++ /* We store the remaining bytes while converting them into the UCS4 \ ++ format. We can assume that the first byte in the buffer is \ ++ correct and that it requires a larger number of bytes than there \ ++ are in the input buffer. */ \ ++ wint_t ch = **inptrp; \ ++ size_t cnt, r; \ ++ \ ++ state->__count = inend - *inptrp; \ ++ \ ++ assert (ch != 0xc0 && ch != 0xc1); \ ++ if (ch >= 0xc2 && ch < 0xe0) \ ++ { \ ++ /* We expect two bytes. The first byte cannot be 0xc0 or \ ++ 0xc1, otherwise the wide character could have been \ ++ represented using a single byte. */ \ ++ cnt = 2; \ ++ ch &= 0x1f; \ ++ } \ ++ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ ++ { \ ++ /* We expect three bytes. */ \ ++ cnt = 3; \ ++ ch &= 0x0f; \ ++ } \ ++ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ ++ { \ ++ /* We expect four bytes. */ \ ++ cnt = 4; \ ++ ch &= 0x07; \ ++ } \ ++ else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ ++ { \ ++ /* We expect five bytes. */ \ ++ cnt = 5; \ ++ ch &= 0x03; \ ++ } \ ++ else \ ++ { \ ++ /* We expect six bytes. */ \ ++ cnt = 6; \ ++ ch &= 0x01; \ ++ } \ ++ \ ++ /* The first byte is already consumed. */ \ ++ r = cnt - 1; \ ++ while (++(*inptrp) < inend) \ ++ { \ ++ ch <<= 6; \ ++ ch |= **inptrp & 0x3f; \ ++ --r; \ ++ } \ ++ \ ++ /* Shift for the so far missing bytes. */ \ ++ ch <<= r * 6; \ ++ \ ++ /* Store the number of bytes expected for the entire sequence. */ \ ++ state->__count |= cnt << 8; \ ++ \ ++ /* Store the value. */ \ ++ state->__value.__wch = ch; \ ++ } ++ ++#define UNPACK_BYTES_COMMON \ ++ { \ ++ static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \ ++ wint_t wch = state->__value.__wch; \ ++ size_t ntotal = state->__count >> 8; \ ++ \ ++ inlen = state->__count & 255; \ ++ \ ++ bytebuf[0] = inmask[ntotal - 2]; \ ++ \ ++ do \ ++ { \ ++ if (--ntotal < inlen) \ ++ bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ ++ wch >>= 6; \ ++ } \ ++ while (ntotal > 1); \ ++ \ ++ bytebuf[0] |= wch; \ ++ } ++ ++#define CLEAR_STATE_COMMON \ ++ state->__count = 0 ++ ++#define BODY_FROM_HW(ASM) \ + { \ +- if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \ +- { \ +- HARDWARE_CONVERT ("cu14 %0, %1, 1"); \ ++ ASM; \ ++ if (__glibc_likely (inptr == inend) \ ++ || result == __GCONV_FULL_OUTPUT) \ ++ break; \ + \ +- if (inptr != inend) \ +- { \ +- int i; \ +- for (i = 1; inptr + i < inend; ++i) \ +- if ((inptr[i] & 0xc0) != 0x80) \ +- break; \ ++ int i; \ ++ for (i = 1; inptr + i < inend && i < 5; ++i) \ ++ if ((inptr[i] & 0xc0) != 0x80) \ ++ break; \ + \ +- if (__glibc_likely (inptr + i == inend)) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- STANDARD_FROM_LOOP_ERR_HANDLER (i); \ +- } \ +- continue; \ ++ if (__glibc_likely (inptr + i == inend \ ++ && result == __GCONV_EMPTY_INPUT)) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ + } \ +- \ ++ STANDARD_FROM_LOOP_ERR_HANDLER (i); \ ++ } ++ ++/* This hardware routine uses the Convert UTF8 to UTF32 (cu14) instruction. */ ++#define BODY_FROM_ETF3EH BODY_FROM_HW (HARDWARE_CONVERT ("cu14 %0, %1, 1")) ++ ++ ++/* The software routine is copied from gconv_simple.c. */ ++#define BODY_FROM_C \ ++ { \ + /* Next input byte. */ \ + uint32_t ch = *inptr; \ + \ +- if (__glibc_likely (ch < 0x80)) \ ++ if (__glibc_likely (ch < 0x80)) \ + { \ + /* One byte sequence. */ \ + ++inptr; \ +@@ -233,30 +324,18 @@ gconv_end (struct __gconv_step *data) + cnt = 2; \ + ch &= 0x1f; \ + } \ +- else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ ++ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ +- else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ ++ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ + ch &= 0x07; \ + } \ +- else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ +- { \ +- /* We expect five bytes. */ \ +- cnt = 5; \ +- ch &= 0x03; \ +- } \ +- else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \ +- { \ +- /* We expect six bytes. */ \ +- cnt = 6; \ +- ch &= 0x01; \ +- } \ + else \ + { \ + /* Search the end of this ill-formed UTF-8 character. This \ +@@ -272,7 +351,7 @@ gconv_end (struct __gconv_step *data) + STANDARD_FROM_LOOP_ERR_HANDLER (i); \ + } \ + \ +- if (__glibc_unlikely (inptr + cnt > inend)) \ ++ if (__glibc_unlikely (inptr + cnt > inend)) \ + { \ + /* We don't have enough input. But before we report \ + that check that all the bytes are correct. */ \ +@@ -280,7 +359,7 @@ gconv_end (struct __gconv_step *data) + if ((inptr[i] & 0xc0) != 0x80) \ + break; \ + \ +- if (__glibc_likely (inptr + i == inend)) \ ++ if (__glibc_likely (inptr + i == inend)) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ +@@ -305,7 +384,10 @@ gconv_end (struct __gconv_step *data) + /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ + If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ + have been represented with fewer than cnt bytes. */ \ +- if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \ ++ if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ ++ /* Do not accept UTF-16 surrogates. */ \ ++ || (ch >= 0xd800 && ch <= 0xdfff) \ ++ || (ch > 0x10ffff)) \ + { \ + /* This is an illegal encoding. */ \ + goto errout; \ +@@ -318,137 +400,212 @@ gconv_end (struct __gconv_step *data) + *((uint32_t *) outptr) = ch; \ + outptr += sizeof (uint32_t); \ + } +-#define LOOP_NEED_FLAGS + +-#define STORE_REST \ +- { \ +- /* We store the remaining bytes while converting them into the UCS4 \ +- format. We can assume that the first byte in the buffer is \ +- correct and that it requires a larger number of bytes than there \ +- are in the input buffer. */ \ +- wint_t ch = **inptrp; \ +- size_t cnt, r; \ +- \ +- state->__count = inend - *inptrp; \ +- \ +- if (ch >= 0xc2 && ch < 0xe0) \ +- { \ +- /* We expect two bytes. The first byte cannot be 0xc0 or \ +- 0xc1, otherwise the wide character could have been \ +- represented using a single byte. */ \ +- cnt = 2; \ +- ch &= 0x1f; \ +- } \ +- else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ +- { \ +- /* We expect three bytes. */ \ +- cnt = 3; \ +- ch &= 0x0f; \ +- } \ +- else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ +- { \ +- /* We expect four bytes. */ \ +- cnt = 4; \ +- ch &= 0x07; \ +- } \ +- else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ +- { \ +- /* We expect five bytes. */ \ +- cnt = 5; \ +- ch &= 0x03; \ +- } \ +- else \ +- { \ +- /* We expect six bytes. */ \ +- cnt = 6; \ +- ch &= 0x01; \ +- } \ +- \ +- /* The first byte is already consumed. */ \ +- r = cnt - 1; \ +- while (++(*inptrp) < inend) \ +- { \ +- ch <<= 6; \ +- ch |= **inptrp & 0x3f; \ +- --r; \ +- } \ +- \ +- /* Shift for the so far missing bytes. */ \ +- ch <<= r * 6; \ +- \ +- /* Store the number of bytes expected for the entire sequence. */ \ +- state->__count |= cnt << 8; \ +- \ +- /* Store the value. */ \ +- state->__value.__wch = ch; \ ++#define HW_FROM_VX \ ++ { \ ++ register const unsigned char* pInput asm ("8") = inptr; \ ++ register size_t inlen asm ("9") = inend - inptr; \ ++ register unsigned char* pOutput asm ("10") = outptr; \ ++ register size_t outlen asm("11") = outend - outptr; \ ++ unsigned long tmp, tmp2, tmp3; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ " vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \ ++ " vrepib %%v31,0x20\n\t" \ ++ /* Loop which handles UTF-8 chars <=0x7f. */ \ ++ "0: clgijl %[R_INLEN],16,20f\n\t" \ ++ " clgijl %[R_OUTLEN],64,20f\n\t" \ ++ "1: vl %%v16,0(%[R_IN])\n\t" \ ++ " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ ++ " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ ++ UTF8 chars. */ \ ++ /* Enlarge to UCS4. */ \ ++ " vuplhb %%v18,%%v16\n\t" \ ++ " vupllb %%v19,%%v16\n\t" \ ++ " la %[R_IN],16(%[R_IN])\n\t" \ ++ " vuplhh %%v20,%%v18\n\t" \ ++ " aghi %[R_INLEN],-16\n\t" \ ++ " vupllh %%v21,%%v18\n\t" \ ++ " aghi %[R_OUTLEN],-64\n\t" \ ++ " vuplhh %%v22,%%v19\n\t" \ ++ " vupllh %%v23,%%v19\n\t" \ ++ /* Store 64 bytes to buf_out. */ \ ++ " vstm %%v20,%%v23,0(%[R_OUT])\n\t" \ ++ " la %[R_OUT],64(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],16,20f\n\t" \ ++ " clgijl %[R_OUTLEN],64,20f\n\t" \ ++ " j 1b\n\t" \ ++ "10: \n\t" \ ++ /* At least one byte is > 0x7f. \ ++ Store the preceding 1-byte chars. */ \ ++ " vlgvb %[R_TMP],%%v17,7\n\t" \ ++ " sllk %[R_TMP2],%[R_TMP],2\n\t" /* Compute highest \ ++ index to store. */ \ ++ " llgfr %[R_TMP3],%[R_TMP2]\n\t" \ ++ " ahi %[R_TMP2],-1\n\t" \ ++ " jl 20f\n\t" \ ++ " vuplhb %%v18,%%v16\n\t" \ ++ " vuplhh %%v20,%%v18\n\t" \ ++ " vstl %%v20,%[R_TMP2],0(%[R_OUT])\n\t" \ ++ " ahi %[R_TMP2],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllh %%v21,%%v18\n\t" \ ++ " vstl %%v21,%[R_TMP2],16(%[R_OUT])\n\t" \ ++ " ahi %[R_TMP2],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllb %%v19,%%v16\n\t" \ ++ " vuplhh %%v22,%%v19\n\t" \ ++ " vstl %%v22,%[R_TMP2],32(%[R_OUT])\n\t" \ ++ " ahi %[R_TMP2],-16\n\t" \ ++ " jl 11f\n\t" \ ++ " vupllh %%v23,%%v19\n\t" \ ++ " vstl %%v23,%[R_TMP2],48(%[R_OUT])\n\t" \ ++ "11: \n\t" \ ++ /* Update pointers. */ \ ++ " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_TMP]\n\t" \ ++ " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ ++ " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ ++ /* Handle multibyte utf8-char with convert instruction. */ \ ++ "20: cu14 %[R_OUT],%[R_IN],1\n\t" \ ++ " jo 0b\n\t" /* Try vector implemenation again. */ \ ++ " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ ++ " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (pInput) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ ++ , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ ++ ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v30") \ ++ ASM_CLOBBER_VR ("v31") \ ++ ); \ ++ inptr = pInput; \ ++ outptr = pOutput; \ + } ++#define BODY_FROM_VX BODY_FROM_HW (HW_FROM_VX) + +-#define UNPACK_BYTES \ +- { \ +- static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \ +- wint_t wch = state->__value.__wch; \ +- size_t ntotal = state->__count >> 8; \ +- \ +- inlen = state->__count & 255; \ +- \ +- bytebuf[0] = inmask[ntotal - 2]; \ +- \ +- do \ +- { \ +- if (--ntotal < inlen) \ +- bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ +- wch >>= 6; \ +- } \ +- while (ntotal > 1); \ +- \ +- bytebuf[0] |= wch; \ +- } ++/* These definitions apply to the UTF-8 to UTF-32 direction. The ++ software implementation for UTF-8 still supports multibyte ++ characters up to 6 bytes whereas the hardware variant does not. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define LOOPFCT __from_utf8_loop_c + +-#define CLEAR_STATE \ +- state->__count = 0 ++#define LOOP_NEED_FLAGS + ++#define STORE_REST STORE_REST_COMMON ++#define UNPACK_BYTES UNPACK_BYTES_COMMON ++#define CLEAR_STATE CLEAR_STATE_COMMON ++#define BODY BODY_FROM_C + #include + ++ ++/* Generate loop-function with hardware utf-convert instruction. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++#define LOOPFCT __from_utf8_loop_etf3eh ++ ++#define LOOP_NEED_FLAGS ++ ++#define STORE_REST STORE_REST_COMMON ++#define UNPACK_BYTES UNPACK_BYTES_COMMON ++#define CLEAR_STATE CLEAR_STATE_COMMON ++#define BODY BODY_FROM_ETF3EH ++#include ++ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++/* Generate loop-function with hardware vector instructions. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_INPUT MAX_NEEDED_FROM ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO ++# define LOOPFCT __from_utf8_loop_vx ++ ++# define LOOP_NEED_FLAGS ++ ++# define STORE_REST STORE_REST_COMMON ++# define UNPACK_BYTES UNPACK_BYTES_COMMON ++# define CLEAR_STATE CLEAR_STATE_COMMON ++# define BODY BODY_FROM_VX ++# include ++#endif ++ ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__from_utf8_loop_c) ++__attribute__ ((ifunc ("__from_utf8_loop_resolver"))) ++__from_utf8_loop; ++ ++static void * ++__from_utf8_loop_resolver (unsigned long int dl_hwcap) ++{ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __from_utf8_loop_vx; ++ else ++#endif ++ if (dl_hwcap & HWCAP_S390_ETF3EH) ++ return __from_utf8_loop_etf3eh; ++ else ++ return __from_utf8_loop_c; ++} ++ ++strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single) ++ ++ + /* Conversion from UTF-32 internal/BE to UTF-8. */ ++#define BODY_TO_HW(ASM) \ ++ { \ ++ ASM; \ ++ if (__glibc_likely (inptr == inend) \ ++ || result == __GCONV_FULL_OUTPUT) \ ++ break; \ ++ if (inptr + 4 > inend) \ ++ { \ ++ result = __GCONV_INCOMPLETE_INPUT; \ ++ break; \ ++ } \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } ++ ++/* The hardware routine uses the S/390 cu41 instruction. */ ++#define BODY_TO_ETF3EH BODY_TO_HW (HARDWARE_CONVERT ("cu41 %0, %1")) ++ ++/* The hardware routine uses the S/390 vector and cu41 instructions. */ ++#define BODY_TO_VX BODY_TO_HW (HW_TO_VX) + +-#define MIN_NEEDED_INPUT MIN_NEEDED_TO +-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +-#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +-#define LOOPFCT TO_LOOP + /* The software routine mimics the S/390 cu41 instruction. */ +-#define BODY \ ++#define BODY_TO_C \ + { \ +- if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \ +- { \ +- HARDWARE_CONVERT ("cu41 %0, %1"); \ +- \ +- if (inptr != inend) \ +- { \ +- result = __GCONV_INCOMPLETE_INPUT; \ +- break; \ +- } \ +- continue; \ +- } \ +- \ + uint32_t wc = *((const uint32_t *) inptr); \ + \ +- if (__glibc_likely (wc <= 0x7f)) \ ++ if (__glibc_likely (wc <= 0x7f)) \ + { \ +- /* Single UTF-8 char. */ \ +- *outptr = (uint8_t)wc; \ ++ /* Single UTF-8 char. */ \ ++ *outptr = (uint8_t)wc; \ + outptr++; \ + } \ + else if (wc <= 0x7ff) \ + { \ +- /* Two UTF-8 chars. */ \ +- if (__glibc_unlikely (outptr + 2 > outend)) \ ++ /* Two UTF-8 chars. */ \ ++ if (__glibc_unlikely (outptr + 2 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ +- outptr[0] = 0xc0; \ ++ outptr[0] = 0xc0; \ + outptr[0] |= wc >> 6; \ + \ + outptr[1] = 0x80; \ +@@ -459,12 +616,18 @@ gconv_end (struct __gconv_step *data) + else if (wc <= 0xffff) \ + { \ + /* Three UTF-8 chars. */ \ +- if (__glibc_unlikely (outptr + 3 > outend)) \ ++ if (__glibc_unlikely (outptr + 3 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ ++ if (wc >= 0xd800 && wc < 0xdc00) \ ++ { \ ++ /* Do not accept UTF-16 surrogates. */ \ ++ result = __GCONV_ILLEGAL_INPUT; \ ++ STANDARD_TO_LOOP_ERR_HANDLER (4); \ ++ } \ + outptr[0] = 0xe0; \ + outptr[0] |= wc >> 12; \ + \ +@@ -479,7 +642,7 @@ gconv_end (struct __gconv_step *data) + else if (wc <= 0x10ffff) \ + { \ + /* Four UTF-8 chars. */ \ +- if (__glibc_unlikely (outptr + 4 > outend)) \ ++ if (__glibc_unlikely (outptr + 4 > outend)) \ + { \ + /* Overflow in the output buffer. */ \ + result = __GCONV_FULL_OUTPUT; \ +@@ -505,7 +668,140 @@ gconv_end (struct __gconv_step *data) + } \ + inptr += 4; \ + } ++ ++#define HW_TO_VX \ ++ { \ ++ register const unsigned char* pInput asm ("8") = inptr; \ ++ register size_t inlen asm ("9") = inend - inptr; \ ++ register unsigned char* pOutput asm ("10") = outptr; \ ++ register size_t outlen asm("11") = outend - outptr; \ ++ unsigned long tmp, tmp2; \ ++ asm volatile (".machine push\n\t" \ ++ ".machine \"z13\"\n\t" \ ++ ".machinemode \"zarch_nohighgprs\"\n\t" \ ++ " vleif %%v20,127,0\n\t" /* element 0: 127 */ \ ++ " vzero %%v21\n\t" \ ++ " vleih %%v21,8192,0\n\t" /* element 0: > */ \ ++ " vleih %%v21,-8192,2\n\t" /* element 1: =<> */ \ ++ /* Loop which handles UTF-32 chars <=0x7f. */ \ ++ "0: clgijl %[R_INLEN],64,20f\n\t" \ ++ " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ "1: vlm %%v16,%%v19,0(%[R_IN])\n\t" \ ++ " lghi %[R_TMP],0\n\t" \ ++ /* Shorten to byte values. */ \ ++ " vpkf %%v23,%%v16,%%v17\n\t" \ ++ " vpkf %%v24,%%v18,%%v19\n\t" \ ++ " vpkh %%v23,%%v23,%%v24\n\t" \ ++ /* Checking for values > 0x7f. */ \ ++ " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ ++ " jno 10f\n\t" \ ++ " vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ ++ " jno 11f\n\t" \ ++ " vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \ ++ " jno 12f\n\t" \ ++ " vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \ ++ " jno 13f\n\t" \ ++ /* Store 16bytes to outptr. */ \ ++ " vst %%v23,0(%[R_OUT])\n\t" \ ++ " aghi %[R_INLEN],-64\n\t" \ ++ " aghi %[R_OUTLEN],-16\n\t" \ ++ " la %[R_IN],64(%[R_IN])\n\t" \ ++ " la %[R_OUT],16(%[R_OUT])\n\t" \ ++ " clgijl %[R_INLEN],64,20f\n\t" \ ++ " clgijl %[R_OUTLEN],16,20f\n\t" \ ++ " j 1b\n\t" \ ++ /* Found a value > 0x7f. */ \ ++ "13: ahi %[R_TMP],4\n\t" \ ++ "12: ahi %[R_TMP],4\n\t" \ ++ "11: ahi %[R_TMP],4\n\t" \ ++ "10: vlgvb %[R_I],%%v22,7\n\t" \ ++ " srlg %[R_I],%[R_I],2\n\t" \ ++ " agr %[R_I],%[R_TMP]\n\t" \ ++ " je 20f\n\t" \ ++ /* Store characters before invalid one... */ \ ++ " slgr %[R_OUTLEN],%[R_I]\n\t" \ ++ "15: aghi %[R_I],-1\n\t" \ ++ " vstl %%v23,%[R_I],0(%[R_OUT])\n\t" \ ++ /* ... and update pointers. */ \ ++ " aghi %[R_I],1\n\t" \ ++ " la %[R_OUT],0(%[R_I],%[R_OUT])\n\t" \ ++ " sllg %[R_I],%[R_I],2\n\t" \ ++ " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ ++ " slgr %[R_INLEN],%[R_I]\n\t" \ ++ /* Handle multibyte utf8-char with convert instruction. */ \ ++ "20: cu41 %[R_OUT],%[R_IN]\n\t" \ ++ " jo 0b\n\t" /* Try vector implemenation again. */ \ ++ " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ ++ " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ ++ ".machine pop" \ ++ : /* outputs */ [R_IN] "+a" (pInput) \ ++ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ ++ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=d" (tmp) \ ++ , [R_I] "=a" (tmp2) \ ++ , [R_RES] "+d" (result) \ ++ : /* inputs */ \ ++ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ ++ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ ++ : /* clobber list */ "memory", "cc" \ ++ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ ++ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ ++ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ ++ ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ ++ ASM_CLOBBER_VR ("v24") \ ++ ); \ ++ inptr = pInput; \ ++ outptr = pOutput; \ ++ } ++ ++/* Generate loop-function with software routing. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++#define LOOPFCT __to_utf8_loop_c ++#define BODY BODY_TO_C ++#define LOOP_NEED_FLAGS ++#include ++ ++/* Generate loop-function with hardware utf-convert instruction. */ ++#define MIN_NEEDED_INPUT MIN_NEEDED_TO ++#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++#define LOOPFCT __to_utf8_loop_etf3eh + #define LOOP_NEED_FLAGS ++#define BODY BODY_TO_ETF3EH + #include + ++#if defined HAVE_S390_VX_ASM_SUPPORT ++/* Generate loop-function with hardware vector and utf-convert instructions. */ ++# define MIN_NEEDED_INPUT MIN_NEEDED_TO ++# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM ++# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM ++# define LOOPFCT __to_utf8_loop_vx ++# define BODY BODY_TO_VX ++# define LOOP_NEED_FLAGS ++# include ++#endif ++ ++/* Generate ifunc'ed loop function. */ ++__typeof(__to_utf8_loop_c) ++__attribute__ ((ifunc ("__to_utf8_loop_resolver"))) ++__to_utf8_loop; ++ ++static void * ++__to_utf8_loop_resolver (unsigned long int dl_hwcap) ++{ ++#if defined HAVE_S390_VX_ASM_SUPPORT ++ if (dl_hwcap & HWCAP_S390_VX) ++ return __to_utf8_loop_vx; ++ else ++#endif ++ if (dl_hwcap & HWCAP_S390_ETF3EH) ++ return __to_utf8_loop_etf3eh; ++ else ++ return __to_utf8_loop_c; ++} ++ ++strong_alias (__to_utf8_loop_c_single, __to_utf8_loop_single) ++ ++ + #include +-- +1.8.3.1 + diff --git a/SOURCES/glibc-rh1383951.patch b/SOURCES/glibc-rh1383951.patch new file mode 100644 index 0000000..0a323ff --- /dev/null +++ b/SOURCES/glibc-rh1383951.patch @@ -0,0 +1,71 @@ +From a014cecd82b71b70a6a843e250e06b541ad524f7 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Thu, 15 Oct 2015 09:23:07 +0200 +Subject: [PATCH] Always enable pointer guard [BZ #18928] + +Honoring the LD_POINTER_GUARD environment variable in AT_SECURE mode +has security implications. This commit enables pointer guard +unconditionally, and the environment variable is now ignored. + + [BZ #18928] + * sysdeps/generic/ldsodefs.h (struct rtld_global_ro): Remove + _dl_pointer_guard member. + * elf/rtld.c (_rtld_global_ro): Remove _dl_pointer_guard + initializer. + (security_init): Always set up pointer guard. + (process_envvars): Do not process LD_POINTER_GUARD. + +diff -rup a/elf/rtld.c b/elf/rtld.c +--- a/elf/rtld.c 2017-03-06 14:38:55.000000000 -0500 ++++ b/elf/rtld.c 2017-03-06 14:41:19.502556336 -0500 +@@ -160,7 +160,6 @@ struct rtld_global_ro _rtld_global_ro at + ._dl_hwcap_mask = HWCAP_IMPORTANT, + ._dl_lazy = 1, + ._dl_fpu_control = _FPU_DEFAULT, +- ._dl_pointer_guard = 1, + ._dl_pagesize = EXEC_PAGESIZE, + ._dl_inhibit_cache = 0, + +@@ -844,15 +843,12 @@ security_init (void) + #endif + + /* Set up the pointer guard as well, if necessary. */ +- if (GLRO(dl_pointer_guard)) +- { +- uintptr_t pointer_chk_guard = _dl_setup_pointer_guard (_dl_random, +- stack_chk_guard); ++ uintptr_t pointer_chk_guard ++ = _dl_setup_pointer_guard (_dl_random, stack_chk_guard); + #ifdef THREAD_SET_POINTER_GUARD +- THREAD_SET_POINTER_GUARD (pointer_chk_guard); ++ THREAD_SET_POINTER_GUARD (pointer_chk_guard); + #endif +- __pointer_chk_guard_local = pointer_chk_guard; +- } ++ __pointer_chk_guard_local = pointer_chk_guard; + + /* We do not need the _dl_random value anymore. The less + information we leave behind, the better, so clear the +@@ -2599,9 +2595,6 @@ process_envvars (enum mode *modep) + GLRO(dl_use_load_bias) = envline[14] == '1' ? -1 : 0; + break; + } +- +- if (memcmp (envline, "POINTER_GUARD", 13) == 0) +- GLRO(dl_pointer_guard) = envline[14] != '0'; + break; + + case 14: +diff -rup a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +--- a/sysdeps/generic/ldsodefs.h 2017-03-06 14:38:57.000000000 -0500 ++++ b/sysdeps/generic/ldsodefs.h 2017-03-06 14:41:19.506556361 -0500 +@@ -588,9 +588,6 @@ struct rtld_global_ro + /* List of auditing interfaces. */ + struct audit_ifaces *_dl_audit; + unsigned int _dl_naudit; +- +- /* 0 if internal pointer values should not be guarded, 1 if they should. */ +- EXTERN int _dl_pointer_guard; + }; + # define __rtld_global_attribute__ + # if IS_IN (rtld) diff --git a/SOURCES/glibc-rh1385003.patch b/SOURCES/glibc-rh1385003.patch new file mode 100644 index 0000000..4eba78c --- /dev/null +++ b/SOURCES/glibc-rh1385003.patch @@ -0,0 +1,41 @@ +commit 26011b5cfa6a1a8d8005d65f11d97498444a4e95 +Author: Stefan Liebler +Date: Mon Mar 24 16:46:51 2014 +0100 + + S390: Define SIZE_MAX as unsigned long (BZ #16712). + +--- glibc-2.17-c758a686/sysdeps/generic/stdint.h ++++ glibc-2.17-c758a686/sysdeps/generic/stdint.h +@@ -264,7 +264,11 @@ + # if __WORDSIZE == 64 + # define SIZE_MAX (18446744073709551615UL) + # else +-# define SIZE_MAX (4294967295U) ++# ifdef __WORDSIZE32_SIZE_ULONG ++# define SIZE_MAX (4294967295UL) ++# else ++# define SIZE_MAX (4294967295U) ++# endif + # endif + + /* Limits of `wchar_t'. */ +--- glibc-2.17-c758a686/sysdeps/s390/s390-32/bits/wordsize.h ++++ glibc-2.17-c758a686/sysdeps/s390/s390-32/bits/wordsize.h +@@ -4,6 +4,7 @@ + # define __WORDSIZE 64 + #else + # define __WORDSIZE 32 ++# define __WORDSIZE32_SIZE_ULONG 1 + #endif + + #if !defined __NO_LONG_DOUBLE_MATH && !defined __LONG_DOUBLE_MATH_OPTIONAL +--- glibc-2.17-c758a686/sysdeps/s390/s390-64/bits/wordsize.h ++++ glibc-2.17-c758a686/sysdeps/s390/s390-64/bits/wordsize.h +@@ -4,6 +4,7 @@ + # define __WORDSIZE 64 + #else + # define __WORDSIZE 32 ++# define __WORDSIZE32_SIZE_ULONG 1 + #endif + + #if !defined __NO_LONG_DOUBLE_MATH && !defined __LONG_DOUBLE_MATH_OPTIONAL diff --git a/SOURCES/glibc-rh1385004-1.patch b/SOURCES/glibc-rh1385004-1.patch new file mode 100644 index 0000000..dad246d --- /dev/null +++ b/SOURCES/glibc-rh1385004-1.patch @@ -0,0 +1,711 @@ +From 5db5f0071917cf14b454596f847fc7ff6e63e317 Mon Sep 17 00:00:00 2001 +From: Rajalakshmi Srinivasaraghavan +Date: Wed, 24 Jun 2015 02:08:21 -0400 +Subject: [PATCH] powerpc: strstr optimization + +This patch optimizes strstr function for power >= 7 systems. Performance +gain is obtained using aligned memory access and usage of cmpb +instruction for quicker comparison. The average improvement of this +optimization is ~40%. Tested on ppc64 and ppc64le. + +2015-07-16 Rajalakshmi Srinivasaraghavan + + * sysdeps/powerpc/powerpc64/multiarch/Makefile: Add strstr(). + * sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c: Likewise. + * sysdeps/powerpc/powerpc64/power7/strstr.S: New File. + * sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S: New File. + * sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c: New File. + * sysdeps/powerpc/powerpc64/multiarch/strstr.c: New File. + +(cherry picked from commit b42f8cad52ebfbfd43ebf6e42e606b489ffbd466) +--- + ChangeLog | 10 + + sysdeps/powerpc/powerpc64/multiarch/Makefile | 2 +- + .../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 9 + + .../powerpc/powerpc64/multiarch/strstr-power7.S | 44 ++ + sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c | 29 ++ + sysdeps/powerpc/powerpc64/multiarch/strstr.c | 34 ++ + sysdeps/powerpc/powerpc64/power7/strstr.S | 509 +++++++++++++++++++++ + 7 files changed, 636 insertions(+), 1 deletion(-) + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strstr.c + create mode 100644 sysdeps/powerpc/powerpc64/power7/strstr.S + +diff --git a/ChangeLog b/ChangeLog +index 1aeb84b..8e98192 100644 + +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 17265bd..3b0e3a0 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -19,6 +19,7 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + stpncpy-power8 stpncpy-power7 stpncpy-ppc64 \ + strncpy-power8 strncpy-power7 strncpy-ppc64 \ + strncat-power7 \ ++ strstr-power7 strstr-ppc64 \ + rawmemchr-ppc64 strlen-power7 strlen-ppc64 strnlen-power7 \ + strnlen-ppc64 strcasecmp-power7 strcasecmp_l-power7 \ + strncase-power7 strncase_l-power7 \ +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index f5fdea5..364385b 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -322,6 +322,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strcat, 1, + __strcat_ppc)) + ++ /* Support sysdeps/powerpc/powerpc64/multiarch/strstr.c. */ ++ IFUNC_IMPL (i, name, strstr, ++ IFUNC_IMPL_ADD (array, i, strstr, ++ hwcap & PPC_FEATURE_HAS_VSX, ++ __strstr_power7) ++ IFUNC_IMPL_ADD (array, i, strstr, 1, ++ __strstr_ppc)) ++ + /* Support sysdeps/powerpc/powerpc64/multiarch/wcschr.c. */ + IFUNC_IMPL (i, name, wcschr, + IFUNC_IMPL_ADD (array, i, wcschr, +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S +new file mode 100644 +index 0000000..94ce95b +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S +@@ -0,0 +1,44 @@ ++/* Optimized strstr implementation for POWER7. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#undef EALIGN ++#define EALIGN(name, alignt, words) \ ++ .section ".text"; \ ++ ENTRY_2(__strstr_power7) \ ++ .align ALIGNARG(alignt); \ ++ EALIGN_W_##words; \ ++ BODY_LABEL(__strstr_power7): \ ++ cfi_startproc; \ ++ LOCALENTRY(__strstr_power7) ++ ++#undef END ++#define END(name) \ ++ cfi_endproc; \ ++ TRACEBACK(__strstr_power7) \ ++ END_2(__strstr_power7) ++ ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++#define STRLEN __strlen_power7 ++#define STRNLEN __strnlen_power7 ++#define STRCHR __strchr_power7 ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c +new file mode 100644 +index 0000000..7fa2ace +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c +@@ -0,0 +1,29 @@ ++/* Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#define STRSTR __strstr_ppc ++#if IS_IN (libc) && defined(SHARED) ++# undef libc_hidden_builtin_def ++# define libc_hidden_builtin_def(name) \ ++ __hidden_ver1(__strstr_ppc, __GI_strstr, __strstr_ppc); ++#endif ++ ++extern __typeof (strstr) __strstr_ppc attribute_hidden; ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strstr.c b/sysdeps/powerpc/powerpc64/multiarch/strstr.c +new file mode 100644 +index 0000000..2be7646 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strstr.c +@@ -0,0 +1,34 @@ ++/* Multiple versions of strstr. PowerPC64 version. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for definition in libc. */ ++#if IS_IN (libc) ++# include ++# include ++# include "init-arch.h" ++ ++extern __typeof (strstr) __strstr_ppc attribute_hidden; ++extern __typeof (strstr) __strstr_power7 attribute_hidden; ++ ++/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ++ ifunc symbol properly. */ ++libc_ifunc (strstr, ++ (hwcap & PPC_FEATURE_HAS_VSX) ++ ? __strstr_power7 ++ : __strstr_ppc); ++#endif +diff --git a/sysdeps/powerpc/powerpc64/power7/strstr.S b/sysdeps/powerpc/powerpc64/power7/strstr.S +new file mode 100644 +index 0000000..8dca31c +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power7/strstr.S +@@ -0,0 +1,509 @@ ++/* Optimized strstr implementation for PowerPC64/POWER7. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++/* Char * [r3] strstr (char *s [r3], char * pat[r4]) */ ++ ++/* The performance gain is obtained using aligned memory access, load ++ * doubleword and usage of cmpb instruction for quicker comparison. */ ++ ++#ifndef STRLEN ++/* For builds with no IFUNC support, local calls should be made to internal ++ GLIBC symbol (created by libc_hidden_builtin_def). */ ++# ifdef SHARED ++# define STRLEN __GI_strlen ++# else ++# define STRLEN strlen ++# endif ++#endif ++ ++#ifndef STRNLEN ++/* For builds with no IFUNC support, local calls should be made to internal ++ GLIBC symbol (created by libc_hidden_builtin_def). */ ++# ifdef SHARED ++# define STRNLEN __GI_strnlen ++# else ++# define STRNLEN strnlen ++# endif ++#endif ++ ++#ifndef STRCHR ++# ifdef SHARED ++# define STRCHR __GI_strchr ++# else ++# define STRCHR strchr ++# endif ++#endif ++ ++#define FRAMESIZE (FRAME_MIN_SIZE+32) ++ .machine power7 ++EALIGN (strstr, 4, 0) ++ CALL_MCOUNT 2 ++ mflr r0 /* Load link register LR to r0. */ ++ std r31, -8(r1) /* Save callers register r31. */ ++ cfi_offset(r31, -8) ++ std r30, -16(r1) /* Save callers register r30. */ ++ cfi_offset(r30, -16) ++ std r29, -24(r1) /* Save callers register r29. */ ++ cfi_offset(r29, -24) ++ std r0, 16(r1) /* Store the link register. */ ++ cfi_offset(lr, 16) ++ stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */ ++ cfi_adjust_cfa_offset(FRAMESIZE) ++ ++ dcbt 0, r3 ++ dcbt 0, r4 ++ ++ cmpdi cr7, r3, 0 ++ beq cr7, L(retnull) ++ cmpdi cr7, r4, 0 ++ beq cr7, L(retnull) ++ ++ mr r29, r3 ++ mr r30, r4 ++ mr r3, r4 ++ bl STRLEN ++ nop ++ ++ cmpdi cr7, r3, 0 /* If search str is null. */ ++ beq cr7, L(ret_r3) ++ ++ /* Call __strstr_ppc if needle len > 2048 */ ++ cmpdi cr7, r3, 2048 ++ bgt cr7, L(default) ++ ++ mr r31, r3 ++ mr r4, r3 ++ mr r3, r29 ++ bl STRNLEN ++ nop ++ ++ cmpd cr7, r3, r31 /* If len(r3) < len(r4). */ ++ blt cr7, L(retnull) ++ mr r3, r29 ++ lbz r4, 0(r30) ++ bl STRCHR ++ nop ++ ++ mr r11, r3 ++ /* If first char of search str is not present. */ ++ cmpdi cr7, r3, 0 ++ ble cr7, L(end) ++ ++ rldicl r8, r3, 0, 52 /* Page cross check. */ ++ cmpldi cr7, r8, 4096-16 ++ bgt cr7, L(bytebybyte) ++ ++ rldicl r8, r30, 0, 52 ++ cmpldi cr7, r8, 4096-16 ++ bgt cr7, L(bytebybyte) ++ ++ /* If len(r4) < 8 handle in a different way. */ ++ /* Shift position based on null and use cmpb. */ ++ cmpdi cr7, r31, 8 ++ blt cr7, L(lessthan8) ++ ++ /* Len(r4) >= 8 reaches here. */ ++ mr r8, r3 /* Save r3 for future use. */ ++ mr r4, r30 /* Restore r4. */ ++ li r0, 0 ++ rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */ ++ clrrdi r4, r4, 3 /* Make r4 aligned to 8. */ ++ ld r6, 0(r4) ++ addi r4, r4, 8 ++ cmpdi cr7, r10, 0 /* Check if its already aligned? */ ++ beq cr7, L(begin1) ++#ifdef __LITTLE_ENDIAN__ ++ srd r6, r6, r10 /* Discard unwanted bits. */ ++#else ++ sld r6, r6, r10 ++#endif ++ ld r9, 0(r4) ++ subfic r10, r10, 64 ++#ifdef __LITTLE_ENDIAN__ ++ sld r9, r9, r10 /* Discard unwanted bits. */ ++#else ++ srd r9, r9, r10 ++#endif ++ or r6, r6, r9 /* Form complete search str. */ ++L(begin1): ++ mr r29, r6 ++ rlwinm r10, r3, 3, 26, 28 ++ clrrdi r3, r3, 3 ++ ld r5, 0(r3) ++ cmpb r9, r0, r6 /* Check if input has null. */ ++ cmpdi cr7, r9, 0 ++ bne cr7, L(return3) ++ cmpb r9, r0, r5 /* Check if input has null. */ ++#ifdef __LITTLE_ENDIAN__ ++ srd r9, r9, r10 ++#else ++ sld r9, r9, r10 ++#endif ++ cmpdi cr7, r9, 0 ++ bne cr7, L(retnull) ++ ++ li r12, -8 /* Shift values. */ ++ li r11, 72 /* Shift values. */ ++ cmpdi cr7, r10, 0 ++ beq cr7, L(nextbyte1) ++ mr r12, r10 ++ addi r12, r12, -8 ++ subfic r11, r12, 64 ++ ++L(nextbyte1): ++ ldu r7, 8(r3) /* Load next dw. */ ++ addi r12, r12, 8 /* Shift one byte and compare. */ ++ addi r11, r11, -8 ++#ifdef __LITTLE_ENDIAN__ ++ srd r9, r5, r12 /* Rotate based on mask. */ ++ sld r10, r7, r11 ++#else ++ sld r9, r5, r12 ++ srd r10, r7, r11 ++#endif ++ /* Form single dw from few bytes on first load and second load. */ ++ or r10, r9, r10 ++ /* Check for null in the formed dw. */ ++ cmpb r9, r0, r10 ++ cmpdi cr7, r9, 0 ++ bne cr7, L(retnull) ++ /* Cmpb search str and input str. */ ++ cmpb r9, r10, r6 ++ cmpdi cr7, r9, -1 ++ beq cr7, L(match) ++ addi r8, r8, 1 ++ b L(begin) ++ ++ .align 4 ++L(match): ++ /* There is a match of 8 bytes, check next bytes. */ ++ cmpdi cr7, r31, 8 ++ beq cr7, L(return) ++ /* Update next starting point r8. */ ++ srdi r9, r11, 3 ++ subf r9, r9, r3 ++ mr r8, r9 ++ ++L(secondmatch): ++ mr r5, r7 ++ rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */ ++ ld r6, 0(r4) ++ addi r4, r4, 8 ++ cmpdi cr7, r10, 0 /* Check if its already aligned? */ ++ beq cr7, L(proceed3) ++#ifdef __LITTLE_ENDIAN__ ++ srd r6, r6, r10 /* Discard unwanted bits. */ ++ cmpb r9, r0, r6 ++ sld r9, r9, r10 ++#else ++ sld r6, r6, r10 ++ cmpb r9, r0, r6 ++ srd r9, r9, r10 ++#endif ++ cmpdi cr7, r9, 0 ++ bne cr7, L(proceed3) ++ ld r9, 0(r4) ++ subfic r10, r10, 64 ++#ifdef __LITTLE_ENDIAN__ ++ sld r9, r9, r10 /* Discard unwanted bits. */ ++#else ++ srd r9, r9, r10 ++#endif ++ or r6, r6, r9 /* Form complete search str. */ ++ ++L(proceed3): ++ li r7, 0 ++ addi r3, r3, 8 ++ cmpb r9, r0, r5 ++ cmpdi cr7, r9, 0 ++ bne cr7, L(proceed4) ++ ld r7, 0(r3) ++L(proceed4): ++#ifdef __LITTLE_ENDIAN__ ++ srd r9, r5, r12 ++ sld r10, r7, r11 ++#else ++ sld r9, r5, r12 ++ srd r10, r7, r11 ++#endif ++ /* Form single dw with few bytes from first and second load. */ ++ or r10, r9, r10 ++ cmpb r9, r0, r6 ++ cmpdi cr7, r9, 0 ++ bne cr7, L(return4) ++ /* Check for null in the formed dw. */ ++ cmpb r9, r0, r10 ++ cmpdi cr7, r9, 0 ++ bne cr7, L(retnull) ++ /* If the next 8 bytes dont match, start search again. */ ++ cmpb r9, r10, r6 ++ cmpdi cr7, r9, -1 ++ bne cr7, L(reset) ++ /* If the next 8 bytes match, load and compare next 8. */ ++ b L(secondmatch) ++ ++ .align 4 ++L(reset): ++ /* Start the search again. */ ++ addi r8, r8, 1 ++ b L(begin) ++ ++ .align 4 ++L(return3): ++ /* Count leading zeros and compare partial dw. */ ++#ifdef __LITTLE_ENDIAN__ ++ addi r7, r9, -1 ++ andc r7, r7, r9 ++ popcntd r7, r7 ++ subfic r7, r7, 64 ++ sld r10, r5, r7 ++ sld r6, r6, r7 ++#else ++ cntlzd r7, r9 ++ subfic r7, r7, 64 ++ srd r10, r5, r7 ++ srd r6, r6, r7 ++#endif ++ cmpb r9, r10, r6 ++ cmpdi cr7, r9, -1 ++ addi r8, r8, 1 ++ /* Start search again if there is no match. */ ++ bne cr7, L(begin) ++ /* If the words match, update return values. */ ++ subfic r7, r7, 64 ++ srdi r7, r7, 3 ++ add r3, r3, r7 ++ subf r3, r31, r3 ++ b L(end) ++ ++ .align 4 ++L(return4): ++ /* Count leading zeros and compare partial dw. */ ++#ifdef __LITTLE_ENDIAN__ ++ addi r7, r9, -1 ++ andc r7, r7, r9 ++ popcntd r7, r7 ++ subfic r7, r7, 64 ++ sld r10, r10, r7 ++ sld r6, r6, r7 ++#else ++ cntlzd r7, r9 ++ subfic r7, r7, 64 ++ srd r10, r10, r7 ++ srd r6, r6, r7 ++#endif ++ cmpb r9, r10, r6 ++ cmpdi cr7, r9, -1 ++ addi r8, r8, 1 ++ bne cr7, L(begin) ++ subfic r7, r7, 64 ++ srdi r11, r11, 3 ++ subf r3, r11, r3 ++ srdi r7, r7, 3 ++ add r3, r3, r7 ++ subf r3, r31, r3 ++ b L(end) ++ ++ .align 4 ++L(begin): ++ mr r3, r8 ++ lbz r4, 0(r30) ++ bl STRCHR ++ nop ++ /* If first char of search str is not present. */ ++ cmpdi cr7, r3, 0 ++ ble cr7, L(end) ++ mr r8, r3 ++ mr r4, r30 /* Restore r4. */ ++ li r0, 0 ++ mr r6, r29 ++ clrrdi r4, r4, 3 ++ addi r4, r4, 8 ++ b L(begin1) ++ ++ /* Handle less than 8 search string. */ ++ .align 4 ++L(lessthan8): ++ mr r4, r3 ++ mr r9, r30 ++ li r0, 0 ++ ++ rlwinm r10, r9, 3, 26, 28 /* Calculate padding in bits. */ ++ srdi r8, r10, 3 /* Padding in bytes. */ ++ clrrdi r9, r9, 3 /* Make r4 aligned to 8. */ ++ ld r6, 0(r9) ++ cmpdi cr7, r10, 0 /* Check if its already aligned? */ ++ beq cr7, L(proceed2) ++#ifdef __LITTLE_ENDIAN__ ++ srd r6, r6, r10 /* Discard unwanted bits. */ ++#else ++ sld r6, r6, r10 ++#endif ++ subfic r8, r8, 8 ++ cmpd cr7, r8, r31 /* Next load needed? */ ++ bge cr7, L(proceed2) ++ ld r7, 8(r9) ++ subfic r10, r10, 64 ++#ifdef __LITTLE_ENDIAN__ ++ sld r7, r7, r10 /* Discard unwanted bits. */ ++#else ++ srd r7, r7, r10 ++#endif ++ or r6, r6, r7 /* Form complete search str. */ ++L(proceed2): ++ mr r29, r6 ++ rlwinm r10, r3, 3, 26, 28 ++ clrrdi r7, r3, 3 /* Make r3 aligned. */ ++ ld r5, 0(r7) ++ sldi r8, r31, 3 ++ subfic r8, r8, 64 ++#ifdef __LITTLE_ENDIAN__ ++ sld r6, r6, r8 ++ cmpb r9, r0, r5 ++ srd r9, r9, r10 ++#else ++ srd r6, r6, r8 ++ cmpb r9, r0, r5 ++ sld r9, r9, r10 ++#endif ++ cmpdi cr7, r9, 0 ++ bne cr7, L(noload) ++ cmpdi cr7, r10, 0 ++ beq cr7, L(continue) ++ ld r7, 8(r7) ++L(continue1): ++ mr r12, r10 ++ addi r12, r12, -8 ++ subfic r11, r12, 64 ++ b L(nextbyte) ++ ++ .align 4 ++L(continue): ++ ld r7, 8(r7) ++ li r12, -8 /* Shift values. */ ++ li r11, 72 /* Shift values. */ ++L(nextbyte): ++ addi r12, r12, 8 /* Mask for rotation. */ ++ addi r11, r11, -8 ++#ifdef __LITTLE_ENDIAN__ ++ srd r9, r5, r12 ++ sld r10, r7, r11 ++ or r10, r9, r10 ++ sld r10, r10, r8 ++ cmpb r9, r0, r10 ++ srd r9, r9, r8 ++#else ++ sld r9, r5, r12 ++ srd r10, r7, r11 ++ or r10, r9, r10 ++ srd r10, r10, r8 ++ cmpb r9, r0, r10 ++ sld r9, r9, r8 ++#endif ++ cmpdi cr7, r9, 0 ++ bne cr7, L(retnull) ++ cmpb r9, r10, r6 ++ cmpdi cr7, r9, -1 ++ beq cr7, L(end) ++ addi r3, r4, 1 ++ lbz r4, 0(r30) ++ bl STRCHR ++ nop ++ /* If first char of search str is not present. */ ++ cmpdi cr7, r3, 0 ++ ble cr7, L(end) ++ mr r4, r3 ++ mr r6, r29 ++ li r0, 0 ++ b L(proceed2) ++ ++ .align 4 ++L(noload): ++ /* Reached null in r3, so skip next load. */ ++ li r7, 0 ++ b L(continue1) ++ ++ .align 4 ++L(return): ++ /* Update return values. */ ++ srdi r9, r11, 3 ++ subf r3, r9, r3 ++ b L(end) ++ ++ /* Handling byte by byte. */ ++ .align 4 ++L(bytebybyte): ++ mr r8, r3 ++ addi r8, r8, -1 ++L(loop1): ++ addi r8, r8, 1 ++ mr r3, r8 ++ mr r4, r30 ++ lbz r6, 0(r4) ++ cmpdi cr7, r6, 0 ++ beq cr7, L(updater3) ++L(loop): ++ lbz r5, 0(r3) ++ cmpdi cr7, r5, 0 ++ beq cr7, L(retnull) ++ cmpld cr7, r6, r5 ++ bne cr7, L(loop1) ++ addi r3, r3, 1 ++ addi r4, r4, 1 ++ lbz r6, 0(r4) ++ cmpdi cr7, r6, 0 ++ beq cr7, L(updater3) ++ b L(loop) ++ ++ /* Handling return values. */ ++ .align 4 ++L(updater3): ++ subf r3, r31, r3 /* Reduce len of r4 from r3. */ ++ b L(end) ++ ++ .align 4 ++L(ret_r3): ++ mr r3, r29 /* Return r3. */ ++ b L(end) ++ ++ .align 4 ++L(retnull): ++ li r3, 0 /* Return NULL. */ ++ b L(end) ++ ++ .align 4 ++L(default): ++ mr r3, r29 ++ mr r4, r30 ++ bl __strstr_ppc ++ nop ++ ++ .align 4 ++L(end): ++ addi r1, r1, FRAMESIZE /* Restore stack pointer. */ ++ cfi_adjust_cfa_offset(-FRAMESIZE) ++ ld r0, 16(r1) /* Restore the saved link register. */ ++ ld r29, -24(r1) /* Restore callers save register r29. */ ++ ld r30, -16(r1) /* Restore callers save register r30. */ ++ ld r31, -8(r1) /* Restore callers save register r31. */ ++ mtlr r0 /* Branch to link register. */ ++ blr ++END (strstr) ++libc_hidden_builtin_def (strstr) +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-10.patch b/SOURCES/glibc-rh1385004-10.patch new file mode 100644 index 0000000..9dd1614 --- /dev/null +++ b/SOURCES/glibc-rh1385004-10.patch @@ -0,0 +1,34 @@ +From cecfeab756960639684ce7b95c39d8f1a80b71cf Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Fri, 16 Sep 2016 17:31:58 -0300 +Subject: [PATCH] powerpc: Fix POWER9 implies + +Fix multiarch build for POWER9 by correcting the order of the +directories listed at sysnames configure variable. + +(cherry picked from commit 1850ce5a2ea3b908b26165e7e951cd4334129f07) +--- + ChangeLog | 8 ++++++++ + sysdeps/powerpc/powerpc32/power9/multiarch/Implies | 2 +- + sysdeps/powerpc/powerpc64/power9/fpu/Implies | 1 - + 3 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index 565da33..496ef12 100644 +diff --git a/sysdeps/powerpc/powerpc32/power9/multiarch/Implies b/sysdeps/powerpc/powerpc32/power9/multiarch/Implies +index 4393b56..1a46ef0 100644 +--- a/sysdeps/powerpc/powerpc32/power9/multiarch/Implies ++++ b/sysdeps/powerpc/powerpc32/power9/multiarch/Implies +@@ -1 +1 @@ +-powerpc/powerpc32/power8/fpu/multiarch ++powerpc/powerpc32/power8/multiarch +diff --git a/sysdeps/powerpc/powerpc64/power9/fpu/Implies b/sysdeps/powerpc/powerpc64/power9/fpu/Implies +index fad2505..ae0dbaf 100644 +--- a/sysdeps/powerpc/powerpc64/power9/fpu/Implies ++++ b/sysdeps/powerpc/powerpc64/power9/fpu/Implies +@@ -1,2 +1 @@ + powerpc/powerpc64/power8/fpu +-powerpc/powerpc64/power8 +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-11.patch b/SOURCES/glibc-rh1385004-11.patch new file mode 100644 index 0000000..ca50829 --- /dev/null +++ b/SOURCES/glibc-rh1385004-11.patch @@ -0,0 +1,364 @@ +From 1cf3bb5ec15f28245a6840b5b0443685c828a467 Mon Sep 17 00:00:00 2001 +From: "Paul E. Murphy" +Date: Mon, 14 Mar 2016 17:40:46 -0400 +Subject: [PATCH] powerpc: Add optimized P8 strspn + +This utilizes vectors and bitmasks. For small needle, large +haystack, the performance improvement is upto 8x. For short +strings (0-4B), the cost of computing the bitmask dominates, +and is a tad slower. + +(cherry picked from commit 25dba0ad054723196fb633ba5d8a463ef5cb775c) +--- + ChangeLog | 15 ++ + sysdeps/powerpc/powerpc64/multiarch/Makefile | 3 +- + .../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 8 + + .../powerpc/powerpc64/multiarch/strspn-power8.S | 40 +++++ + sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c | 25 +++ + sysdeps/powerpc/powerpc64/multiarch/strspn.c | 35 ++++ + sysdeps/powerpc/powerpc64/power8/strspn.S | 179 +++++++++++++++++++++ + 7 files changed, 304 insertions(+), 1 deletion(-) + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strspn.c + create mode 100644 sysdeps/powerpc/powerpc64/power8/strspn.S + +diff --git a/ChangeLog b/ChangeLog +index 496ef12..f030b68 100644 +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 3b0e3a0..7ed56bf 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -19,6 +19,7 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + strncpy-power8 strncpy-power7 strncpy-ppc64 \ + strncat-power7 \ + strstr-power7 strstr-ppc64 \ ++ strspn-power8 strspn-ppc64 \ + rawmemchr-ppc64 strlen-power7 strlen-ppc64 strnlen-power7 \ + strnlen-ppc64 strcasecmp-power7 strcasecmp_l-power7 \ + strncase-power7 strncase_l-power7 \ +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index 364385b..f6c70ba 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -322,6 +322,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strcat, 1, + __strcat_ppc)) + ++ /* Support sysdeps/powerpc/powerpc64/multiarch/strspn.c. */ ++ IFUNC_IMPL (i, name, strspn, ++ IFUNC_IMPL_ADD (array, i, strspn, ++ hwcap2 & PPC_FEATURE2_ARCH_2_07, ++ __strspn_power8) ++ IFUNC_IMPL_ADD (array, i, strspn, 1, ++ __strspn_ppc)) ++ + /* Support sysdeps/powerpc/powerpc64/multiarch/strstr.c. */ + IFUNC_IMPL (i, name, strstr, + IFUNC_IMPL_ADD (array, i, strstr, +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S +new file mode 100644 +index 0000000..86a4e09 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S +@@ -0,0 +1,40 @@ ++/* Optimized strspn implementation for POWER8. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#undef EALIGN ++#define EALIGN(name, alignt, words) \ ++ .section ".text"; \ ++ ENTRY_2(__strspn_power8) \ ++ .align ALIGNARG(alignt); \ ++ EALIGN_W_##words; \ ++ BODY_LABEL(__strspn_power8): \ ++ cfi_startproc; \ ++ LOCALENTRY(__strspn_power8) ++ ++#undef END ++#define END(name) \ ++ cfi_endproc; \ ++ TRACEBACK(__strspn_power8) \ ++ END_2(__strspn_power8) ++ ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c +new file mode 100644 +index 0000000..4c63665 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c +@@ -0,0 +1,25 @@ ++/* Default strspn implementation for PowerPC64. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define STRSPN __strspn_ppc ++#ifdef SHARED ++#undef libc_hidden_def ++#define libc_hidden_def(name) ++#endif ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn.c b/sysdeps/powerpc/powerpc64/multiarch/strspn.c +new file mode 100644 +index 0000000..0e653f3 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strspn.c +@@ -0,0 +1,35 @@ ++/* Multiple versions of strspn. PowerPC64 version. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++# include ++# include ++# include "init-arch.h" ++ ++#undef strspn ++extern __typeof (strspn) __libc_strspn; ++ ++extern __typeof (strspn) __strspn_ppc attribute_hidden; ++extern __typeof (strspn) __strspn_power8 attribute_hidden; ++ ++libc_ifunc (__libc_strspn, ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __strspn_power8 ++ : __strspn_ppc); ++ ++weak_alias (__libc_strspn, strspn) ++libc_hidden_builtin_def (strspn) +diff --git a/sysdeps/powerpc/powerpc64/power8/strspn.S b/sysdeps/powerpc/powerpc64/power8/strspn.S +new file mode 100644 +index 0000000..0dda437 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power8/strspn.S +@@ -0,0 +1,179 @@ ++/* Optimized strspn implementation for Power8. ++ ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* size_t [r3] strspn (const char *string [r3], ++ const char *needleAccept [r4]) */ ++ ++/* This takes a novel approach by computing a 256 bit mask whereby ++ each set bit implies the byte is "accepted". P8 vector hardware ++ has extremely efficient hardware for selecting bits from a mask. ++ ++ One might ask "why not use bpermd for short strings"? It is ++ so slow that its performance about matches the generic PPC64 ++ variant without any fancy masking, with the added expense of ++ making the mask. That was the first variant of this. */ ++ ++ ++ ++#include "sysdep.h" ++ ++/* Simple macro to use VSX instructions in overlapping VR's. */ ++#define XXVR(insn, vrt, vra, vrb) \ ++ insn 32+vrt, 32+vra, 32+vrb ++ ++/* ISA 2.07B instructions are not all defined for older binutils. ++ Macros are defined below for these newer instructions in order ++ to maintain compatibility. */ ++ ++/* Note, TX/SX is always set as VMX regs are the high 32 VSX regs. */ ++#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) ++#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) ++ ++#define VBPERMQ(t,a,b) .long (0x1000054c \ ++ | ((t)<<(32-11)) \ ++ | ((a)<<(32-16)) \ ++ | ((b)<<(32-21)) ) ++ ++ /* This can be updated to power8 once the minimum version of ++ binutils supports power8 and the above instructions. */ ++ .machine power7 ++EALIGN(strspn, 4, 0) ++ CALL_MCOUNT 2 ++ ++ /* Generate useful constants for later on. */ ++ vspltisb v1, 7 ++ vspltisb v2, -1 ++ vslb v1, v1, v1 /* 0x80 to swap high bit for vbpermq. */ ++ vspltisb v10, 0 ++ vsldoi v4, v10, v2, 2 /* 0xFFFF into vr4. */ ++ XXVR(xxmrgld, v4, v4, v10) /* Mask for checking matches. */ ++ ++ /* Prepare to compute 256b mask. */ ++ addi r4, r4, -1 ++ li r5, 0 ++ li r6, 0 ++ li r7, 0 ++ li r8, 0 ++ li r11, 1 ++ sldi r11, r11, 63 ++ ++ /* Start interleaved Mask computation. ++ This will eventually or 1's into ignored bits from vbpermq. */ ++ lvsr v11, 0, r3 ++ vspltb v11, v11, 0 /* Splat shift constant. */ ++ ++ /* Build a 256b mask in r5-r8. */ ++ .align 4 ++L(next_needle): ++ lbzu r9, 1(r4) ++ ++ cmpldi cr0, r9, 0 ++ cmpldi cr1, r9, 128 ++ ++ /* This is a little tricky. srd only uses the first 7 bits, ++ and if bit 7 is set, value is always 0. So, we can ++ effectively shift 128b in this case. */ ++ xori r12, r9, 0x40 /* Invert bit 6. */ ++ srd r10, r11, r9 /* Mask for bits 0-63. */ ++ srd r12, r11, r12 /* Mask for bits 64-127. */ ++ ++ beq cr0, L(start_cmp) ++ ++ /* Now, or the value into the correct GPR. */ ++ bge cr1,L(needle_gt128) ++ or r5, r5, r10 /* 0 - 63. */ ++ or r6, r6, r12 /* 64 - 127. */ ++ b L(next_needle) ++ ++ .align 4 ++L(needle_gt128): ++ or r7, r7, r10 /* 128 - 191. */ ++ or r8, r8, r12 /* 192 - 255. */ ++ b L(next_needle) ++ ++ ++ .align 4 ++L(start_cmp): ++ /* Move and merge bitmap into 2 VRs. bpermd is slower on P8. */ ++ mr r0, r3 /* Save r3 for final length computation. */ ++ MTVRD (v5, r5) ++ MTVRD (v6, r6) ++ MTVRD (v7, r7) ++ MTVRD (v8, r8) ++ ++ /* Continue interleaved mask generation. */ ++#ifdef __LITTLE_ENDIAN__ ++ vsrw v11, v2, v11 /* Note, shift ignores higher order bits. */ ++ vsplth v11, v11, 0 /* Only care about the high 16 bits of v10. */ ++#else ++ vslw v11, v2, v11 /* Note, shift ignores higher order bits. */ ++ vsplth v11, v11, 1 /* Only care about the low 16 bits of v10. */ ++#endif ++ lvx v0, 0, r3 /* Note, unaligned load ignores lower bits. */ ++ ++ /* Do the merging of the bitmask. */ ++ XXVR(xxmrghd, v5, v5, v6) ++ XXVR(xxmrghd, v6, v7, v8) ++ ++ /* Finish mask generation. */ ++ vand v11, v11, v4 /* Throwaway bits not in the mask. */ ++ ++ /* Compare the first 1-16B, while masking unwanted bytes. */ ++ clrrdi r3, r3, 4 /* Note, counts from qw boundaries. */ ++ vxor v9, v0, v1 /* Swap high bit. */ ++ VBPERMQ (v8, v5, v0) ++ VBPERMQ (v7, v6, v9) ++ vor v7, v7, v8 ++ vor v7, v7, v11 /* Ignore non-participating bytes. */ ++ vcmpequh. v8, v7, v4 ++ bnl cr6, L(done) ++ ++ addi r3, r3, 16 ++ ++ .align 4 ++L(vec): ++ lvx v0, 0, r3 ++ addi r3, r3, 16 ++ vxor v9, v0, v1 /* Swap high bit. */ ++ VBPERMQ (v8, v5, v0) ++ VBPERMQ (v7, v6, v9) ++ vor v7, v7, v8 ++ vcmpequh. v8, v7, v4 ++ blt cr6, L(vec) ++ ++ addi r3, r3, -16 ++L(done): ++ subf r3, r0, r3 ++ MFVRD (r10, v7) ++ ++#ifdef __LITTLE_ENDIAN__ ++ addi r0, r10, 1 /* Count the trailing 1's. */ ++ andc r10, r10, r0 ++ popcntd r10, r10 ++#else ++ xori r10, r10, 0xffff /* Count leading 1's by inverting. */ ++ addi r3, r3, -48 /* Account for the extra leading zeros. */ ++ cntlzd r10, r10 ++#endif ++ ++ add r3, r3, r10 ++ blr ++ ++END(strspn) ++libc_hidden_builtin_def (strspn) +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-12.patch b/SOURCES/glibc-rh1385004-12.patch new file mode 100644 index 0000000..35cb194 --- /dev/null +++ b/SOURCES/glibc-rh1385004-12.patch @@ -0,0 +1,421 @@ +From 0d3555b9b4d5cefe116c32bfa38ac70f1d6c25cb Mon Sep 17 00:00:00 2001 +From: Carlos Eduardo Seo +Date: Wed, 11 Nov 2015 17:31:28 -0200 +Subject: [PATCH] powerpc: Optimization for strlen for POWER8. + +This implementation takes advantage of vectorization to improve performance of +the loop over the current strlen implementation for POWER7. + +(cherry picked from commit 1b045ee53e0b8bed75745b931b33f27d21c9ed22) +--- + ChangeLog | 13 + + sysdeps/powerpc/powerpc64/multiarch/Makefile | 2 +- + .../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 2 + + .../powerpc/powerpc64/multiarch/strlen-power8.S | 39 +++ + sysdeps/powerpc/powerpc64/multiarch/strlen.c | 9 +- + sysdeps/powerpc/powerpc64/power8/strlen.S | 297 +++++++++++++++++++++ + 6 files changed, 358 insertions(+), 4 deletions(-) + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S + create mode 100644 sysdeps/powerpc/powerpc64/power8/strlen.S + +diff --git a/ChangeLog b/ChangeLog +index f030b68..e7ea58a 100644 +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 7ed56bf..57abe8f 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -20,7 +20,7 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + strncpy-power8 strncpy-power7 strncpy-ppc64 \ + strncat-power7 \ + strstr-power7 strstr-ppc64 \ +- strspn-power8 strspn-ppc64 \ ++ strspn-power8 strspn-ppc64 strlen-power8 \ + rawmemchr-ppc64 strlen-power7 strlen-ppc64 strnlen-power7 \ + strnlen-ppc64 strcasecmp-power7 strcasecmp_l-power7 \ + strncase-power7 strncase_l-power7 \ +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index f6c70ba..583885c 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -101,6 +101,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/powerpc/powerpc64/multiarch/strlen.c. */ + IFUNC_IMPL (i, name, strlen, ++ IFUNC_IMPL_ADD (array, i, strlen, hwcap2 & PPC_FEATURE2_ARCH_2_07, ++ __strlen_power8) + IFUNC_IMPL_ADD (array, i, strlen, hwcap & PPC_FEATURE_HAS_VSX, + __strlen_power7) + IFUNC_IMPL_ADD (array, i, strlen, 1, +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S +new file mode 100644 +index 0000000..686dc3d +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S +@@ -0,0 +1,39 @@ ++/* Optimized strlen implementation for POWER8. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#undef EALIGN ++#define EALIGN(name, alignt, words) \ ++ .section ".text"; \ ++ ENTRY_2(__strlen_power8) \ ++ .align ALIGNARG(alignt); \ ++ EALIGN_W_##words; \ ++ BODY_LABEL(__strlen_power8): \ ++ cfi_startproc; \ ++ LOCALENTRY(__strlen_power8) ++#undef END ++#define END(name) \ ++ cfi_endproc; \ ++ TRACEBACK(__strlen_power8) \ ++ END_2(__strlen_power8) ++ ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strlen.c b/sysdeps/powerpc/powerpc64/multiarch/strlen.c +index 79a53d9..4b400a5 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strlen.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/strlen.c +@@ -29,11 +29,14 @@ extern __typeof (__redirect_strlen) __libc_strlen; + + extern __typeof (__redirect_strlen) __strlen_ppc attribute_hidden; + extern __typeof (__redirect_strlen) __strlen_power7 attribute_hidden; ++extern __typeof (__redirect_strlen) __strlen_power8 attribute_hidden; + + libc_ifunc (__libc_strlen, +- (hwcap & PPC_FEATURE_HAS_VSX) +- ? __strlen_power7 +- : __strlen_ppc); ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __strlen_power8 : ++ (hwcap & PPC_FEATURE_HAS_VSX) ++ ? __strlen_power7 ++ : __strlen_ppc); + + #undef strlen + strong_alias (__libc_strlen, strlen) +diff --git a/sysdeps/powerpc/powerpc64/power8/strlen.S b/sysdeps/powerpc/powerpc64/power8/strlen.S +new file mode 100644 +index 0000000..0142747 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power8/strlen.S +@@ -0,0 +1,297 @@ ++/* Optimized strlen implementation for PowerPC64/POWER8 using a vectorized ++ loop. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++/* TODO: change these to the actual instructions when the minimum required ++ binutils allows it. */ ++#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) ++#define VBPERMQ(t,a,b) .long (0x1000054c \ ++ | ((t)<<(32-11)) \ ++ | ((a)<<(32-16)) \ ++ | ((b)<<(32-21)) ) ++ ++/* int [r3] strlen (char *s [r3]) */ ++ ++/* TODO: change this to .machine power8 when the minimum required binutils ++ allows it. */ ++ .machine power7 ++EALIGN (strlen, 4, 0) ++ CALL_MCOUNT 1 ++ dcbt 0,r3 ++ clrrdi r4,r3,3 /* Align the address to doubleword boundary. */ ++ rlwinm r6,r3,3,26,28 /* Calculate padding. */ ++ li r0,0 /* Doubleword with null chars to use ++ with cmpb. */ ++ li r5,-1 /* MASK = 0xffffffffffffffff. */ ++ ld r12,0(r4) /* Load doubleword from memory. */ ++#ifdef __LITTLE_ENDIAN__ ++ sld r5,r5,r6 ++#else ++ srd r5,r5,r6 /* MASK = MASK >> padding. */ ++#endif ++ orc r9,r12,r5 /* Mask bits that are not part of the string. */ ++ cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */ ++ cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ ++ bne cr7,L(done) ++ ++ /* For shorter strings (< 64 bytes), we will not use vector registers, ++ as the overhead isn't worth it. So, let's use GPRs instead. This ++ will be done the same way as we do in the POWER7 implementation. ++ Let's see if we are aligned to a quadword boundary. If so, we can ++ jump to the first (non-vectorized) loop. Otherwise, we have to ++ handle the next DWORD first. */ ++ mtcrf 0x01,r4 ++ mr r9,r4 ++ addi r9,r9,8 ++ bt 28,L(align64) ++ ++ /* Handle the next 8 bytes so we are aligned to a quadword ++ boundary. */ ++ ldu r5,8(r4) ++ cmpb r10,r5,r0 ++ cmpdi cr7,r10,0 ++ addi r9,r9,8 ++ bne cr7,L(done) ++ ++L(align64): ++ /* Proceed to the old (POWER7) implementation, checking two doublewords ++ per iteraction. For the first 56 bytes, we will just check for null ++ characters. After that, we will also check if we are 64-byte aligned ++ so we can jump to the vectorized implementation. We will unroll ++ these loops to avoid excessive branching. */ ++ ld r6,8(r4) ++ ldu r5,16(r4) ++ cmpb r10,r6,r0 ++ cmpb r11,r5,r0 ++ or r5,r10,r11 ++ cmpdi cr7,r5,0 ++ addi r9,r9,16 ++ bne cr7,L(dword_zero) ++ ++ ld r6,8(r4) ++ ldu r5,16(r4) ++ cmpb r10,r6,r0 ++ cmpb r11,r5,r0 ++ or r5,r10,r11 ++ cmpdi cr7,r5,0 ++ addi r9,r9,16 ++ bne cr7,L(dword_zero) ++ ++ ld r6,8(r4) ++ ldu r5,16(r4) ++ cmpb r10,r6,r0 ++ cmpb r11,r5,r0 ++ or r5,r10,r11 ++ cmpdi cr7,r5,0 ++ addi r9,r9,16 ++ bne cr7,L(dword_zero) ++ ++ /* Are we 64-byte aligned? If so, jump to the vectorized loop. ++ Note: aligning to 64-byte will necessarily slow down performance for ++ strings around 64 bytes in length due to the extra comparisons ++ required to check alignment for the vectorized loop. This is a ++ necessary tradeoff we are willing to take in order to speed up the ++ calculation for larger strings. */ ++ andi. r10,r9,63 ++ beq cr0,L(preloop) ++ ld r6,8(r4) ++ ldu r5,16(r4) ++ cmpb r10,r6,r0 ++ cmpb r11,r5,r0 ++ or r5,r10,r11 ++ cmpdi cr7,r5,0 ++ addi r9,r9,16 ++ bne cr7,L(dword_zero) ++ ++ andi. r10,r9,63 ++ beq cr0,L(preloop) ++ ld r6,8(r4) ++ ldu r5,16(r4) ++ cmpb r10,r6,r0 ++ cmpb r11,r5,r0 ++ or r5,r10,r11 ++ cmpdi cr7,r5,0 ++ addi r9,r9,16 ++ bne cr7,L(dword_zero) ++ ++ andi. r10,r9,63 ++ beq cr0,L(preloop) ++ ld r6,8(r4) ++ ldu r5,16(r4) ++ cmpb r10,r6,r0 ++ cmpb r11,r5,r0 ++ or r5,r10,r11 ++ cmpdi cr7,r5,0 ++ addi r9,r9,16 ++ bne cr7,L(dword_zero) ++ ++ andi. r10,r9,63 ++ beq cr0,L(preloop) ++ ld r6,8(r4) ++ ldu r5,16(r4) ++ cmpb r10,r6,r0 ++ cmpb r11,r5,r0 ++ or r5,r10,r11 ++ cmpdi cr7,r5,0 ++ addi r9,r9,16 ++ ++ /* At this point, we are necessarily 64-byte aligned. If no zeroes were ++ found, jump to the vectorized loop. */ ++ beq cr7,L(preloop) ++ ++L(dword_zero): ++ /* OK, one (or both) of the doublewords contains a null byte. Check ++ the first doubleword and decrement the address in case the first ++ doubleword really contains a null byte. */ ++ ++ cmpdi cr6,r10,0 ++ addi r4,r4,-8 ++ bne cr6,L(done) ++ ++ /* The null byte must be in the second doubleword. Adjust the address ++ again and move the result of cmpb to r10 so we can calculate the ++ length. */ ++ ++ mr r10,r11 ++ addi r4,r4,8 ++ ++ /* If the null byte was found in the non-vectorized code, compute the ++ final length. r10 has the output of the cmpb instruction, that is, ++ it contains 0xff in the same position as the null byte in the ++ original doubleword from the string. Use that to calculate the ++ length. */ ++L(done): ++#ifdef __LITTLE_ENDIAN__ ++ addi r9, r10,-1 /* Form a mask from trailing zeros. */ ++ andc r9, r9,r10 ++ popcntd r0, r9 /* Count the bits in the mask. */ ++#else ++ cntlzd r0,r10 /* Count leading zeros before the match. */ ++#endif ++ subf r5,r3,r4 ++ srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ ++ add r3,r5,r0 /* Compute final length. */ ++ blr ++ ++ /* Vectorized implementation starts here. */ ++ .p2align 4 ++L(preloop): ++ /* Set up for the loop. */ ++ mr r4,r9 ++ li r7, 16 /* Load required offsets. */ ++ li r8, 32 ++ li r9, 48 ++ li r12, 8 ++ vxor v0,v0,v0 /* VR with null chars to use with ++ vcmpequb. */ ++ ++ /* Main loop to look for the end of the string. We will read in ++ 64-byte chunks. Align it to 32 bytes and unroll it 3 times to ++ leverage the icache performance. */ ++ .p2align 5 ++L(loop): ++ lvx v1,r4,r0 /* Load 4 quadwords. */ ++ lvx v2,r4,r7 ++ lvx v3,r4,r8 ++ lvx v4,r4,r9 ++ vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ ++ vminub v6,v3,v4 ++ vminub v7,v5,v6 ++ vcmpequb. v7,v7,v0 /* Check for NULLs. */ ++ addi r4,r4,64 /* Adjust address for the next iteration. */ ++ bne cr6,L(vmx_zero) ++ ++ lvx v1,r4,r0 /* Load 4 quadwords. */ ++ lvx v2,r4,r7 ++ lvx v3,r4,r8 ++ lvx v4,r4,r9 ++ vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ ++ vminub v6,v3,v4 ++ vminub v7,v5,v6 ++ vcmpequb. v7,v7,v0 /* Check for NULLs. */ ++ addi r4,r4,64 /* Adjust address for the next iteration. */ ++ bne cr6,L(vmx_zero) ++ ++ lvx v1,r4,r0 /* Load 4 quadwords. */ ++ lvx v2,r4,r7 ++ lvx v3,r4,r8 ++ lvx v4,r4,r9 ++ vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ ++ vminub v6,v3,v4 ++ vminub v7,v5,v6 ++ vcmpequb. v7,v7,v0 /* Check for NULLs. */ ++ addi r4,r4,64 /* Adjust address for the next iteration. */ ++ beq cr6,L(loop) ++ ++L(vmx_zero): ++ /* OK, we found a null byte. Let's look for it in the current 64-byte ++ block and mark it in its corresponding VR. */ ++ vcmpequb v1,v1,v0 ++ vcmpequb v2,v2,v0 ++ vcmpequb v3,v3,v0 ++ vcmpequb v4,v4,v0 ++ ++ /* We will now 'compress' the result into a single doubleword, so it ++ can be moved to a GPR for the final calculation. First, we ++ generate an appropriate mask for vbpermq, so we can permute bits into ++ the first halfword. */ ++ vspltisb v10,3 ++ lvsl v11,r0,r0 ++ vslb v10,v11,v10 ++ ++ /* Permute the first bit of each byte into bits 48-63. */ ++ VBPERMQ(v1,v1,v10) ++ VBPERMQ(v2,v2,v10) ++ VBPERMQ(v3,v3,v10) ++ VBPERMQ(v4,v4,v10) ++ ++ /* Shift each component into its correct position for merging. */ ++#ifdef __LITTLE_ENDIAN__ ++ vsldoi v2,v2,v2,2 ++ vsldoi v3,v3,v3,4 ++ vsldoi v4,v4,v4,6 ++#else ++ vsldoi v1,v1,v1,6 ++ vsldoi v2,v2,v2,4 ++ vsldoi v3,v3,v3,2 ++#endif ++ ++ /* Merge the results and move to a GPR. */ ++ vor v1,v2,v1 ++ vor v2,v3,v4 ++ vor v4,v1,v2 ++ MFVRD(r10,v4) ++ ++ /* Adjust address to the begninning of the current 64-byte block. */ ++ addi r4,r4,-64 ++ ++#ifdef __LITTLE_ENDIAN__ ++ addi r9, r10,-1 /* Form a mask from trailing zeros. */ ++ andc r9, r9,r10 ++ popcntd r0, r9 /* Count the bits in the mask. */ ++#else ++ cntlzd r0,r10 /* Count leading zeros before the match. */ ++#endif ++ subf r5,r3,r4 ++ add r3,r5,r0 /* Compute final length. */ ++ blr ++ ++END (strlen) ++libc_hidden_builtin_def (strlen) +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-13.patch b/SOURCES/glibc-rh1385004-13.patch new file mode 100644 index 0000000..cb14c95 --- /dev/null +++ b/SOURCES/glibc-rh1385004-13.patch @@ -0,0 +1,796 @@ +From 7cb28f3e21ff0c9658fad3d021e5a5548e1e49ae Mon Sep 17 00:00:00 2001 +From: Rajalakshmi Srinivasaraghavan +Date: Wed, 20 Apr 2016 23:10:42 +0530 +Subject: [PATCH] powerpc: strcasestr optmization for power8 + +This patch optimizes strcasestr function for power >= 8 systems. The average +improvement of this optimization is ~40% and compares 16 bytes at a time +using vector instructions. This patch is tested on powerpc64 and powerpc64le. + +(cherry picked from commit e413b14e18ac635b5683ab7bbb1c901f79d1b06b) +--- + ChangeLog | 15 + + sysdeps/powerpc/locale-defines.sym | 4 + + sysdeps/powerpc/powerpc64/multiarch/Makefile | 3 +- + .../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 8 + + .../powerpc64/multiarch/strcasestr-power8.S | 49 ++ + .../powerpc/powerpc64/multiarch/strcasestr-ppc64.c | 34 ++ + sysdeps/powerpc/powerpc64/multiarch/strcasestr.c | 37 ++ + sysdeps/powerpc/powerpc64/power8/Makefile | 3 + + .../powerpc/powerpc64/power8/strcasestr-ppc64.c | 29 ++ + sysdeps/powerpc/powerpc64/power8/strcasestr.S | 531 +++++++++++++++++++++ + 10 files changed, 712 insertions(+), 1 deletion(-) + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasestr.c + create mode 100644 sysdeps/powerpc/powerpc64/power8/Makefile + create mode 100644 sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c + create mode 100644 sysdeps/powerpc/powerpc64/power8/strcasestr.S + +diff --git a/ChangeLog b/ChangeLog +index e7ea58a..6677ea2 100644 +diff --git a/sysdeps/powerpc/locale-defines.sym b/sysdeps/powerpc/locale-defines.sym +index af64b92..5c5379c 100644 +--- a/sysdeps/powerpc/locale-defines.sym ++++ b/sysdeps/powerpc/locale-defines.sym +@@ -3,3 +3,7 @@ + -- + + LOCALE_CTYPE_TOLOWER offsetof (struct __locale_struct, __ctype_tolower) ++LOCALE_CTYPE_TOUPPER offsetof (struct __locale_struct, __ctype_toupper) ++_NL_CTYPE_NONASCII_CASE ++LOCALE_DATA_VALUES offsetof (struct __locale_data, values) ++SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0]) +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 57abe8f..7f70ceb 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -20,6 +20,7 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \ + memrchr-power7 memrchr-ppc64 rawmemchr-power7 \ + stpcpy-power8 stpcpy-power7 stpcpy-ppc64 \ ++ strcasestr-power8 strcasestr-ppc64 \ + strcat-power8 strcat-power7 strcat-ppc64 \ + strcmp-power8 strcmp-power7 strcmp-ppc64 \ + strcpy-power8 strcpy-power7 strcpy-ppc64 \ +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index 583885c..994e852 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -341,6 +341,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strstr, 1, + __strstr_ppc)) + ++ /* Support sysdeps/powerpc/powerpc64/multiarch/strcasestr.c. */ ++ IFUNC_IMPL (i, name, strcasestr, ++ IFUNC_IMPL_ADD (array, i, strcasestr, ++ hwcap2 & PPC_FEATURE2_ARCH_2_07, ++ __strcasestr_power8) ++ IFUNC_IMPL_ADD (array, i, strcasestr, 1, ++ __strcasestr_ppc)) ++ + /* Support sysdeps/powerpc/powerpc64/multiarch/wcschr.c. */ + IFUNC_IMPL (i, name, wcschr, + IFUNC_IMPL_ADD (array, i, wcschr, +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S +new file mode 100644 +index 0000000..c77ff9f +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S +@@ -0,0 +1,49 @@ ++/* Optimized strcasestr implementation for POWER8. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#undef EALIGN ++#define EALIGN(name, alignt, words) \ ++ .section ".text"; \ ++ ENTRY_2(__strcasestr_power8) \ ++ .align ALIGNARG(alignt); \ ++ EALIGN_W_##words; \ ++ BODY_LABEL(__strcasestr_power8): \ ++ cfi_startproc; \ ++ LOCALENTRY(__strcasestr_power8) ++ ++#undef END ++#define END(name) \ ++ cfi_endproc; \ ++ TRACEBACK(__strcasestr_power8) \ ++ END_2(__strcasestr_power8) ++ ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++/* The following definitions are used in strcasestr optimization. */ ++ ++/* strlen is used to calculate len of r4. */ ++#define STRLEN __strlen_power8 ++/* strnlen is used to check if len of r3 is more than r4. */ ++#define STRNLEN __strnlen_power7 ++/* strchr is used to check if first char of r4 is present in r3. */ ++#define STRCHR __strchr_power7 ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c +new file mode 100644 +index 0000000..7f7bb9e +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c +@@ -0,0 +1,34 @@ ++/* PowerPC64 default implementation of strcasestr. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#define STRCASESTR __strcasestr_ppc ++#if IS_IN (libc) && defined(SHARED) ++# undef libc_hidden_builtin_def ++# define libc_hidden_builtin_def(name) \ ++ __hidden_ver1(__strcasestr_ppc, __GI_strcasestr, __strcasestr_ppc); ++#endif ++ ++ ++#undef weak_alias ++#define weak_alias(a,b) ++ ++extern __typeof (strcasestr) __strcasestr_ppc attribute_hidden; ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c b/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c +new file mode 100644 +index 0000000..17ba188 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of strcasestr. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#if IS_IN (libc) ++# include ++# include ++# include "init-arch.h" ++ ++extern __typeof (__strcasestr) __strcasestr_ppc attribute_hidden; ++extern __typeof (__strcasestr) __strcasestr_power8 attribute_hidden; ++ ++/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ++ ifunc symbol properly. */ ++libc_ifunc (__strcasestr, ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __strcasestr_power8 ++ : __strcasestr_ppc); ++ ++weak_alias (__strcasestr, strcasestr) ++#else ++#include ++#endif +diff --git a/sysdeps/powerpc/powerpc64/power8/Makefile b/sysdeps/powerpc/powerpc64/power8/Makefile +new file mode 100644 +index 0000000..71a5952 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power8/Makefile +@@ -0,0 +1,3 @@ ++ifeq ($(subdir),string) ++sysdep_routines += strcasestr-ppc64 ++endif +diff --git a/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c b/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c +new file mode 100644 +index 0000000..09a07b0 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c +@@ -0,0 +1,29 @@ ++/* Optimized strcasestr implementation for PowerPC64/POWER8. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#define STRCASESTR __strcasestr_ppc ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(__name) ++ ++#undef weak_alias ++#define weak_alias(a,b) ++extern __typeof (strcasestr) __strcasestr_ppc attribute_hidden; ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/power8/strcasestr.S b/sysdeps/powerpc/powerpc64/power8/strcasestr.S +new file mode 100644 +index 0000000..24b2b76 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power8/strcasestr.S +@@ -0,0 +1,531 @@ ++/* Optimized strcasestr implementation for PowerPC64/POWER8. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++/* Char * [r3] strcasestr (char *s [r3], char * pat[r4]) */ ++ ++/* The performance gain is obtained by comparing 16 bytes. */ ++ ++/* When the first char of r4 is hit ITERATIONS times in r3 ++ fallback to default. */ ++#define ITERATIONS 64 ++ ++#ifndef STRLEN ++/* For builds without IFUNC support, local calls should be made to internal ++ GLIBC symbol (created by libc_hidden_builtin_def). */ ++# ifdef SHARED ++# define STRLEN __GI_strlen ++# else ++# define STRLEN strlen ++# endif ++#endif ++ ++#ifndef STRNLEN ++/* For builds without IFUNC support, local calls should be made to internal ++ GLIBC symbol (created by libc_hidden_builtin_def). */ ++# ifdef SHARED ++# define STRNLEN __GI_strnlen ++# else ++# define STRNLEN __strnlen ++# endif ++#endif ++ ++#ifndef STRCHR ++# ifdef SHARED ++# define STRCHR __GI_strchr ++# else ++# define STRCHR strchr ++# endif ++#endif ++ ++/* Convert 16 bytes of v4 and reg to lowercase and compare. */ ++#define TOLOWER(reg) \ ++ vcmpgtub v6, v4, v1; \ ++ vcmpgtub v7, v2, v4; \ ++ vand v8, v7, v6; \ ++ vand v8, v8, v3; \ ++ vor v4, v8, v4; \ ++ vcmpgtub v6, reg, v1; \ ++ vcmpgtub v7, v2, reg; \ ++ vand v8, v7, v6; \ ++ vand v8, v8, v3; \ ++ vor reg, v8, reg; \ ++ vcmpequb. v6, reg, v4; ++ ++/* TODO: change these to the actual instructions when the minimum required ++ binutils allows it. */ ++#ifdef _ARCH_PWR8 ++#define VCLZD_V8_v7 vclzd v8, v7; ++#else ++#define VCLZD_V8_v7 .long 0x11003fc2 ++#endif ++ ++#define FRAMESIZE (FRAME_MIN_SIZE+48) ++/* TODO: change this to .machine power8 when the minimum required binutils ++ allows it. */ ++ .machine power7 ++EALIGN (strcasestr, 4, 0) ++ CALL_MCOUNT 2 ++ mflr r0 /* Load link register LR to r0. */ ++ std r31, -8(r1) /* Save callers register r31. */ ++ std r30, -16(r1) /* Save callers register r30. */ ++ std r29, -24(r1) /* Save callers register r29. */ ++ std r28, -32(r1) /* Save callers register r28. */ ++ std r27, -40(r1) /* Save callers register r27. */ ++ std r0, 16(r1) /* Store the link register. */ ++ cfi_offset(r31, -8) ++ cfi_offset(r30, -16) ++ cfi_offset(r29, -24) ++ cfi_offset(r28, -32) ++ cfi_offset(r27, -40) ++ cfi_offset(lr, 16) ++ stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */ ++ cfi_adjust_cfa_offset(FRAMESIZE) ++ ++ dcbt 0, r3 ++ dcbt 0, r4 ++ cmpdi cr7, r3, 0 /* Input validation. */ ++ beq cr7, L(retnull) ++ cmpdi cr7, r4, 0 ++ beq cr7, L(retnull) ++ ++ mr r29, r3 ++ mr r30, r4 ++ /* Load first byte from r4 and check if its null. */ ++ lbz r6, 0(r4) ++ cmpdi cr7, r6, 0 ++ beq cr7, L(ret_r3) ++ ++ ld r10, __libc_tsd_LOCALE@got@tprel(r2) ++ add r9, r10, __libc_tsd_LOCALE@tls ++ ld r9, 0(r9) ++ ld r9, LOCALE_CTYPE_TOUPPER(r9) ++ sldi r10, r6, 2 /* Convert to upper case. */ ++ lwzx r28, r9, r10 ++ ++ ld r10, __libc_tsd_LOCALE@got@tprel(r2) ++ add r11, r10, __libc_tsd_LOCALE@tls ++ ld r11, 0(r11) ++ ld r11, LOCALE_CTYPE_TOLOWER(r11) ++ sldi r10, r6, 2 /* Convert to lower case. */ ++ lwzx r27, r11, r10 ++ ++ /* Check if the first char is present. */ ++ mr r4, r27 ++ bl STRCHR ++ nop ++ mr r5, r3 ++ mr r3, r29 ++ mr r29, r5 ++ mr r4, r28 ++ bl STRCHR ++ nop ++ cmpdi cr7, r29, 0 ++ beq cr7, L(firstpos) ++ cmpdi cr7, r3, 0 ++ beq cr7, L(skipcheck) ++ cmpw cr7, r3, r29 ++ ble cr7, L(firstpos) ++ /* Move r3 to the first occurence. */ ++L(skipcheck): ++ mr r3, r29 ++L(firstpos): ++ mr r29, r3 ++ ++ sldi r9, r27, 8 ++ or r28, r9, r28 ++ /* Reg r27 is used to count the number of iterations. */ ++ li r27, 0 ++ /* If first char of search str is not present. */ ++ cmpdi cr7, r3, 0 ++ ble cr7, L(end) ++ ++ /* Find the length of pattern. */ ++ mr r3, r30 ++ bl STRLEN ++ nop ++ ++ cmpdi cr7, r3, 0 /* If search str is null. */ ++ beq cr7, L(ret_r3) ++ ++ mr r31, r3 ++ mr r4, r3 ++ mr r3, r29 ++ bl STRNLEN ++ nop ++ ++ cmpd cr7, r3, r31 /* If len(r3) < len(r4). */ ++ blt cr7, L(retnull) ++ ++ mr r3, r29 ++ ++ /* Locales not matching ASCII for single bytes. */ ++ ld r10, __libc_tsd_LOCALE@got@tprel(r2) ++ add r9, r10, __libc_tsd_LOCALE@tls ++ ld r9, 0(r9) ++ ld r7, 0(r9) ++ addi r7, r7, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES ++ lwz r8, 0(r7) ++ cmpdi cr7, r8, 1 ++ beq cr7, L(bytebybyte) ++ ++ /* If len(r4) < 16 handle byte by byte. */ ++ /* For shorter strings we will not use vector registers. */ ++ cmpdi cr7, r31, 16 ++ blt cr7, L(bytebybyte) ++ ++ /* Comparison values used for TOLOWER. */ ++ /* Load v1 = 64('A' - 1), v2 = 91('Z' + 1), v3 = 32 in each byte. */ ++ vspltish v0, 0 ++ vspltisb v5, 2 ++ vspltisb v4, 4 ++ vsl v3, v5, v4 ++ vaddubm v1, v3, v3 ++ vspltisb v5, 15 ++ vaddubm v2, v5, v5 ++ vaddubm v2, v1, v2 ++ vspltisb v4, -3 ++ vaddubm v2, v2, v4 ++ ++ /* ++ 1. Load 16 bytes from r3 and r4 ++ 2. Check if there is null, If yes, proceed byte by byte path. ++ 3. Else,Convert both to lowercase and compare. ++ 4. If they are same proceed to 1. ++ 5. If they dont match, find if first char of r4 is present in the ++ loaded 16 byte of r3. ++ 6. If yes, move position, load next 16 bytes of r3 and proceed to 2. ++ */ ++ ++ mr r8, r3 /* Save r3 for future use. */ ++ mr r4, r30 /* Restore r4. */ ++ clrldi r10, r4, 60 ++ lvx v5, 0, r4 /* Load 16 bytes from r4. */ ++ cmpdi cr7, r10, 0 ++ beq cr7, L(begin2) ++ /* If r4 is unaligned, load another 16 bytes. */ ++#ifdef __LITTLE_ENDIAN__ ++ lvsr v7, 0, r4 ++#else ++ lvsl v7, 0, r4 ++#endif ++ addi r5, r4, 16 ++ lvx v9, 0, r5 ++#ifdef __LITTLE_ENDIAN__ ++ vperm v5, v9, v5, v7 ++#else ++ vperm v5, v5, v9, v7 ++#endif ++L(begin2): ++ lvx v4, 0, r3 ++ vcmpequb. v7, v0, v4 /* Check for null. */ ++ beq cr6, L(nullchk6) ++ b L(trailcheck) ++ ++ .align 4 ++L(nullchk6): ++ clrldi r10, r3, 60 ++ cmpdi cr7, r10, 0 ++ beq cr7, L(next16) ++#ifdef __LITTLE_ENDIAN__ ++ lvsr v7, 0, r3 ++#else ++ lvsl v7, 0, r3 ++#endif ++ addi r5, r3, 16 ++ /* If r3 is unaligned, load another 16 bytes. */ ++ lvx v10, 0, r5 ++#ifdef __LITTLE_ENDIAN__ ++ vperm v4, v10, v4, v7 ++#else ++ vperm v4, v4, v10, v7 ++#endif ++L(next16): ++ vcmpequb. v6, v0, v5 /* Check for null. */ ++ beq cr6, L(nullchk) ++ b L(trailcheck) ++ ++ .align 4 ++L(nullchk): ++ vcmpequb. v6, v0, v4 ++ beq cr6, L(nullchk1) ++ b L(retnull) ++ ++ .align 4 ++L(nullchk1): ++ /* Convert both v3 and v4 to lower. */ ++ TOLOWER(v5) ++ /* If both are same, branch to match. */ ++ blt cr6, L(match) ++ /* Find if the first char is present in next 15 bytes. */ ++#ifdef __LITTLE_ENDIAN__ ++ vspltb v6, v5, 15 ++ vsldoi v7, v0, v4, 15 ++#else ++ vspltb v6, v5, 0 ++ vspltisb v7, 8 ++ vslo v7, v4, v7 ++#endif ++ vcmpequb v7, v6, v7 ++ vcmpequb. v6, v0, v7 ++ /* Shift r3 by 16 bytes and proceed. */ ++ blt cr6, L(shift16) ++ VCLZD_V8_v7 ++#ifdef __LITTLE_ENDIAN__ ++ vspltb v6, v8, 15 ++#else ++ vspltb v6, v8, 7 ++#endif ++ vcmpequb. v6, v6, v1 ++ /* Shift r3 by 8 bytes and proceed. */ ++ blt cr6, L(shift8) ++ b L(begin) ++ ++ .align 4 ++L(match): ++ /* There is a match of 16 bytes, check next bytes. */ ++ cmpdi cr7, r31, 16 ++ mr r29, r3 ++ beq cr7, L(ret_r3) ++ ++L(secondmatch): ++ addi r3, r3, 16 ++ addi r4, r4, 16 ++ /* Load next 16 bytes of r3 and r4 and compare. */ ++ clrldi r10, r4, 60 ++ cmpdi cr7, r10, 0 ++ beq cr7, L(nextload) ++ /* Handle unaligned case. */ ++ vor v6, v9, v9 ++ vcmpequb. v7, v0, v6 ++ beq cr6, L(nullchk2) ++ b L(trailcheck) ++ ++ .align 4 ++L(nullchk2): ++#ifdef __LITTLE_ENDIAN__ ++ lvsr v7, 0, r4 ++#else ++ lvsl v7, 0, r4 ++#endif ++ addi r5, r4, 16 ++ /* If r4 is unaligned, load another 16 bytes. */ ++ lvx v9, 0, r5 ++#ifdef __LITTLE_ENDIAN__ ++ vperm v11, v9, v6, v7 ++#else ++ vperm v11, v6, v9, v7 ++#endif ++ b L(compare) ++ ++ .align 4 ++L(nextload): ++ lvx v11, 0, r4 ++L(compare): ++ vcmpequb. v7, v0, v11 ++ beq cr6, L(nullchk3) ++ b L(trailcheck) ++ ++ .align 4 ++L(nullchk3): ++ clrldi r10, r3, 60 ++ cmpdi cr7, r10, 0 ++ beq cr7, L(nextload1) ++ /* Handle unaligned case. */ ++ vor v4, v10, v10 ++ vcmpequb. v7, v0, v4 ++ beq cr6, L(nullchk4) ++ b L(retnull) ++ ++ .align 4 ++L(nullchk4): ++#ifdef __LITTLE_ENDIAN__ ++ lvsr v7, 0, r3 ++#else ++ lvsl v7, 0, r3 ++#endif ++ addi r5, r3, 16 ++ /* If r3 is unaligned, load another 16 bytes. */ ++ lvx v10, 0, r5 ++#ifdef __LITTLE_ENDIAN__ ++ vperm v4, v10, v4, v7 ++#else ++ vperm v4, v4, v10, v7 ++#endif ++ b L(compare1) ++ ++ .align 4 ++L(nextload1): ++ lvx v4, 0, r3 ++L(compare1): ++ vcmpequb. v7, v0, v4 ++ beq cr6, L(nullchk5) ++ b L(retnull) ++ ++ .align 4 ++L(nullchk5): ++ /* Convert both v3 and v4 to lower. */ ++ TOLOWER(v11) ++ /* If both are same, branch to secondmatch. */ ++ blt cr6, L(secondmatch) ++ /* Continue the search. */ ++ b L(begin) ++ ++ .align 4 ++L(trailcheck): ++ ld r10, __libc_tsd_LOCALE@got@tprel(r2) ++ add r11, r10, __libc_tsd_LOCALE@tls ++ ld r11, 0(r11) ++ ld r11, LOCALE_CTYPE_TOLOWER(r11) ++L(loop2): ++ lbz r5, 0(r3) /* Load byte from r3. */ ++ lbz r6, 0(r4) /* Load next byte from r4. */ ++ cmpdi cr7, r6, 0 /* Is it null? */ ++ beq cr7, L(updater3) ++ cmpdi cr7, r5, 0 /* Is it null? */ ++ beq cr7, L(retnull) /* If yes, return. */ ++ addi r3, r3, 1 ++ addi r4, r4, 1 /* Increment r4. */ ++ sldi r10, r5, 2 /* Convert to lower case. */ ++ lwzx r10, r11, r10 ++ sldi r7, r6, 2 /* Convert to lower case. */ ++ lwzx r7, r11, r7 ++ cmpw cr7, r7, r10 /* Compare with byte from r4. */ ++ bne cr7, L(begin) ++ b L(loop2) ++ ++ .align 4 ++L(shift8): ++ addi r8, r8, 7 ++ b L(begin) ++ .align 4 ++L(shift16): ++ addi r8, r8, 15 ++ .align 4 ++L(begin): ++ addi r8, r8, 1 ++ mr r3, r8 ++ /* When our iterations exceed ITERATIONS,fall back to default. */ ++ addi r27, r27, 1 ++ cmpdi cr7, r27, ITERATIONS ++ beq cr7, L(default) ++ mr r4, r30 /* Restore r4. */ ++ b L(begin2) ++ ++ /* Handling byte by byte. */ ++ .align 4 ++L(loop1): ++ mr r3, r8 ++ addi r27, r27, 1 ++ cmpdi cr7, r27, ITERATIONS ++ beq cr7, L(default) ++ mr r29, r8 ++ srdi r4, r28, 8 ++ /* Check if the first char is present. */ ++ bl STRCHR ++ nop ++ mr r5, r3 ++ mr r3, r29 ++ mr r29, r5 ++ sldi r4, r28, 56 ++ srdi r4, r4, 56 ++ bl STRCHR ++ nop ++ cmpdi cr7, r29, 0 ++ beq cr7, L(nextpos) ++ cmpdi cr7, r3, 0 ++ beq cr7, L(skipcheck1) ++ cmpw cr7, r3, r29 ++ ble cr7, L(nextpos) ++ /* Move r3 to first occurence. */ ++L(skipcheck1): ++ mr r3, r29 ++L(nextpos): ++ mr r29, r3 ++ cmpdi cr7, r3, 0 ++ ble cr7, L(retnull) ++L(bytebybyte): ++ ld r10, __libc_tsd_LOCALE@got@tprel(r2) ++ add r11, r10, __libc_tsd_LOCALE@tls ++ ld r11, 0(r11) ++ ld r11, LOCALE_CTYPE_TOLOWER(r11) ++ mr r4, r30 /* Restore r4. */ ++ mr r8, r3 /* Save r3. */ ++ addi r8, r8, 1 ++ ++L(loop): ++ addi r3, r3, 1 ++ lbz r5, 0(r3) /* Load byte from r3. */ ++ addi r4, r4, 1 /* Increment r4. */ ++ lbz r6, 0(r4) /* Load next byte from r4. */ ++ cmpdi cr7, r6, 0 /* Is it null? */ ++ beq cr7, L(updater3) ++ cmpdi cr7, r5, 0 /* Is it null? */ ++ beq cr7, L(retnull) /* If yes, return. */ ++ sldi r10, r5, 2 /* Convert to lower case. */ ++ lwzx r10, r11, r10 ++ sldi r7, r6, 2 /* Convert to lower case. */ ++ lwzx r7, r11, r7 ++ cmpw cr7, r7, r10 /* Compare with byte from r4. */ ++ bne cr7, L(loop1) ++ b L(loop) ++ ++ /* Handling return values. */ ++ .align 4 ++L(updater3): ++ subf r3, r31, r3 /* Reduce r31 (len of r4) from r3. */ ++ b L(end) ++ ++ .align 4 ++L(ret_r3): ++ mr r3, r29 /* Return point of match. */ ++ b L(end) ++ ++ .align 4 ++L(retnull): ++ li r3, 0 /* Substring was not found. */ ++ b L(end) ++ ++ .align 4 ++L(default): ++ mr r4, r30 ++ bl __strcasestr_ppc ++ nop ++ ++ .align 4 ++L(end): ++ addi r1, r1, FRAMESIZE /* Restore stack pointer. */ ++ cfi_adjust_cfa_offset(-FRAMESIZE) ++ ld r0, 16(r1) /* Restore the saved link register. */ ++ ld r27, -40(r1) ++ ld r28, -32(r1) ++ ld r29, -24(r1) /* Restore callers save register r29. */ ++ ld r30, -16(r1) /* Restore callers save register r30. */ ++ ld r31, -8(r1) /* Restore callers save register r31. */ ++ cfi_restore(lr) ++ cfi_restore(r27) ++ cfi_restore(r28) ++ cfi_restore(r29) ++ cfi_restore(r30) ++ cfi_restore(r31) ++ mtlr r0 /* Branch to link register. */ ++ blr ++END (strcasestr) ++libc_hidden_builtin_def (strcasestr) +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-14.patch b/SOURCES/glibc-rh1385004-14.patch new file mode 100644 index 0000000..062dc70 --- /dev/null +++ b/SOURCES/glibc-rh1385004-14.patch @@ -0,0 +1,304 @@ +From f4f918430b6b74f1801ebe39a8824cc5437ba9d4 Mon Sep 17 00:00:00 2001 +From: "Paul E. Murphy" +Date: Mon, 25 Apr 2016 09:11:02 -0500 +Subject: [PATCH] powerpc: Add optimized strcspn for P8 + +A few minor adjustments to the P8 strspn gives us +an almost equally optimized P8 strcspn. + +(cherry picked from commit 8f1b841e452dbb083112fd036033b7f4af506ba0) +--- + ChangeLog | 25 ++++++++++++ + sysdeps/powerpc/powerpc64/multiarch/Makefile | 4 +- + .../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 8 ++++ + .../powerpc/powerpc64/multiarch/strcspn-power8.S | 25 ++++++++++++ + .../powerpc/powerpc64/multiarch/strcspn-ppc64.c | 26 +++++++++++++ + sysdeps/powerpc/powerpc64/multiarch/strcspn.c | 35 +++++++++++++++++ + .../powerpc/powerpc64/multiarch/strspn-power8.S | 17 +------- + sysdeps/powerpc/powerpc64/power8/strcspn.S | 20 ++++++++++ + sysdeps/powerpc/powerpc64/power8/strspn.S | 45 ++++++++++++++++------ + 9 files changed, 176 insertions(+), 29 deletions(-) + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcspn.c + create mode 100644 sysdeps/powerpc/powerpc64/power8/strcspn.S + +diff --git a/ChangeLog b/ChangeLog +index 6677ea2..5537fc6 100644 +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 7f70ceb..9ee9bc2 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -20,6 +20,7 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + strcat-power8 strcat-power7 strcat-ppc64 \ + strcmp-power8 strcmp-power7 strcmp-ppc64 \ + strcpy-power8 strcpy-power7 strcpy-ppc64 \ ++ strcspn-power8 strcspn-ppc64 \ + stpncpy-power8 stpncpy-power7 stpncpy-ppc64 \ + strncpy-power8 strncpy-power7 strncpy-ppc64 \ + strncat-power7 \ +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index 994e852..228891f 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -332,6 +332,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strspn, 1, + __strspn_ppc)) + ++ /* Support sysdeps/powerpc/powerpc64/multiarch/strcspn.c. */ ++ IFUNC_IMPL (i, name, strcspn, ++ IFUNC_IMPL_ADD (array, i, strcspn, ++ hwcap2 & PPC_FEATURE2_ARCH_2_07, ++ __strcspn_power8) ++ IFUNC_IMPL_ADD (array, i, strcspn, 1, ++ __strcspn_ppc)) ++ + /* Support sysdeps/powerpc/powerpc64/multiarch/strstr.c. */ + IFUNC_IMPL (i, name, strstr, + IFUNC_IMPL_ADD (array, i, strstr, +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S +new file mode 100644 +index 0000000..25545f8 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S +@@ -0,0 +1,25 @@ ++/* Optimized strcspn implementation for POWER8. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#define STRSPN __strcspn_power8 ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c +new file mode 100644 +index 0000000..4c16386 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c +@@ -0,0 +1,26 @@ ++/* Default strcspn implementation for PowerPC64. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define STRCSPN __strcspn_ppc ++ ++#ifdef SHARED ++# undef libc_hidden_def ++# define libc_hidden_def(name) ++#endif ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn.c b/sysdeps/powerpc/powerpc64/multiarch/strcspn.c +new file mode 100644 +index 0000000..e7343ee +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn.c +@@ -0,0 +1,35 @@ ++/* Multiple versions of strcspn. PowerPC64 version. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include "init-arch.h" ++ ++#undef strcspn ++extern __typeof (strcspn) __libc_strcspn; ++ ++extern __typeof (strcspn) __strcspn_ppc attribute_hidden; ++extern __typeof (strcspn) __strcspn_power8 attribute_hidden; ++ ++libc_ifunc (__libc_strcspn, ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __strcspn_power8 ++ : __strcspn_ppc); ++ ++weak_alias (__libc_strcspn, strcspn) ++libc_hidden_builtin_def (strcspn) +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S +index 86a4e09..27d25e0 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S ++++ b/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S +@@ -18,22 +18,7 @@ + + #include + +-#undef EALIGN +-#define EALIGN(name, alignt, words) \ +- .section ".text"; \ +- ENTRY_2(__strspn_power8) \ +- .align ALIGNARG(alignt); \ +- EALIGN_W_##words; \ +- BODY_LABEL(__strspn_power8): \ +- cfi_startproc; \ +- LOCALENTRY(__strspn_power8) +- +-#undef END +-#define END(name) \ +- cfi_endproc; \ +- TRACEBACK(__strspn_power8) \ +- END_2(__strspn_power8) +- ++#define STRSPN __strspn_power8 + #undef libc_hidden_builtin_def + #define libc_hidden_builtin_def(name) + +diff --git a/sysdeps/powerpc/powerpc64/power8/strcspn.S b/sysdeps/powerpc/powerpc64/power8/strcspn.S +new file mode 100644 +index 0000000..bfc58a8 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power8/strcspn.S +@@ -0,0 +1,20 @@ ++/* Optimized strcspn implementation for PowerPC64/POWER8. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define USE_AS_STRCSPN 1 ++#include +diff --git a/sysdeps/powerpc/powerpc64/power8/strspn.S b/sysdeps/powerpc/powerpc64/power8/strspn.S +index 0dda437..011081d 100644 +--- a/sysdeps/powerpc/powerpc64/power8/strspn.S ++++ b/sysdeps/powerpc/powerpc64/power8/strspn.S +@@ -33,6 +33,21 @@ + + #include "sysdep.h" + ++#ifndef USE_AS_STRCSPN ++# define USE_AS_STRCSPN 0 ++# ifndef STRSPN ++# define STRSPN strspn ++# endif ++# define INITIAL_MASK 0 ++# define UPDATE_MASK(RA, RS, RB) or RA, RS, RB ++#else ++# ifndef STRSPN ++# define STRSPN strcspn ++# endif ++# define INITIAL_MASK -1 ++# define UPDATE_MASK(RA, RS, RB) andc RA, RS, RB ++#endif ++ + /* Simple macro to use VSX instructions in overlapping VR's. */ + #define XXVR(insn, vrt, vra, vrb) \ + insn 32+vrt, 32+vra, 32+vrb +@@ -53,7 +68,7 @@ + /* This can be updated to power8 once the minimum version of + binutils supports power8 and the above instructions. */ + .machine power7 +-EALIGN(strspn, 4, 0) ++EALIGN(STRSPN, 4, 0) + CALL_MCOUNT 2 + + /* Generate useful constants for later on. */ +@@ -66,10 +81,18 @@ EALIGN(strspn, 4, 0) + + /* Prepare to compute 256b mask. */ + addi r4, r4, -1 +- li r5, 0 +- li r6, 0 +- li r7, 0 +- li r8, 0 ++ li r5, INITIAL_MASK ++ li r6, INITIAL_MASK ++ li r7, INITIAL_MASK ++ li r8, INITIAL_MASK ++ ++#if USE_AS_STRCSPN ++ /* Ensure the null character never matches by clearing ISA bit 0 in ++ in r5 which is the bit which will check for it in the later usage ++ of vbpermq. */ ++ srdi r5, r5, 1 ++#endif ++ + li r11, 1 + sldi r11, r11, 63 + +@@ -97,14 +120,14 @@ L(next_needle): + + /* Now, or the value into the correct GPR. */ + bge cr1,L(needle_gt128) +- or r5, r5, r10 /* 0 - 63. */ +- or r6, r6, r12 /* 64 - 127. */ ++ UPDATE_MASK (r5, r5, r10) /* 0 - 63. */ ++ UPDATE_MASK (r6, r6, r12) /* 64 - 127. */ + b L(next_needle) + + .align 4 + L(needle_gt128): +- or r7, r7, r10 /* 128 - 191. */ +- or r8, r8, r12 /* 192 - 255. */ ++ UPDATE_MASK (r7, r7, r10) /* 128 - 191. */ ++ UPDATE_MASK (r8, r8, r12) /* 192 - 255. */ + b L(next_needle) + + +@@ -175,5 +198,5 @@ L(done): + add r3, r3, r10 + blr + +-END(strspn) +-libc_hidden_builtin_def (strspn) ++END(STRSPN) ++libc_hidden_builtin_def (STRSPN) +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-15.patch b/SOURCES/glibc-rh1385004-15.patch new file mode 100644 index 0000000..a885ca9 --- /dev/null +++ b/SOURCES/glibc-rh1385004-15.patch @@ -0,0 +1,633 @@ +From 71ae86478edc7b21872464f43fb29ff650c1681a Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Tue, 15 Jul 2014 12:19:09 -0400 +Subject: [PATCH] PowerPC: memset optimization for POWER8/PPC64 + +This patch adds an optimized memset implementation for POWER8. For +sizes from 0 to 255 bytes, a word/doubleword algorithm similar to +POWER7 optimized one is used. + +For size higher than 255 two strategies are used: + +1. If the constant is different than 0, the memory is written with + altivec vector instruction; + +2. If constant is 0, dbcz instructions are used. The loop is unrolled + to clear 512 byte at time. + +Using vector instructions increases throughput considerable, with a +double performance for sizes larger than 1024. The dcbz loops unrolls +also shows performance improvement, by doubling throughput for sizes +larger than 8192 bytes. +--- + ChangeLog | 15 + + benchtests/bench-memset.c | 5 + + sysdeps/powerpc/powerpc64/multiarch/Makefile | 2 +- + sysdeps/powerpc/powerpc64/multiarch/bzero.c | 11 +- + .../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 6 + + .../powerpc/powerpc64/multiarch/memset-power8.S | 43 ++ + sysdeps/powerpc/powerpc64/multiarch/memset.c | 11 +- + sysdeps/powerpc/powerpc64/power8/memset.S | 449 +++++++++++++++++++++ + 8 files changed, 533 insertions(+), 9 deletions(-) + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/memset-power8.S + create mode 100644 sysdeps/powerpc/powerpc64/power8/memset.S + +diff --git a/ChangeLog b/ChangeLog +index ddaf70f..dc61c87 100644 +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 0de3804..abc9d2e 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -1,7 +1,8 @@ ifeq ($(subdir),string) + ifeq ($(subdir),string) + sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + memcpy-power4 memcpy-ppc64 memcmp-power7 memcmp-power4 \ +- memcmp-ppc64 memset-power7 memset-power6 memset-power4 \ ++ memcmp-ppc64 memset-power8 memset-power7 memset-power6 \ ++ memset-power4 \ + memset-ppc64 bzero-power4 bzero-power6 bzero-power7 \ + mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \ + memrchr-power7 memrchr-ppc64 rawmemchr-power7 \ +diff --git a/sysdeps/powerpc/powerpc64/multiarch/bzero.c b/sysdeps/powerpc/powerpc64/multiarch/bzero.c +index ed83541..298cf00 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/bzero.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/bzero.c +@@ -26,14 +26,17 @@ extern __typeof (bzero) __bzero_ppc attribute_hidden; + extern __typeof (bzero) __bzero_power4 attribute_hidden; + extern __typeof (bzero) __bzero_power6 attribute_hidden; + extern __typeof (bzero) __bzero_power7 attribute_hidden; ++extern __typeof (bzero) __bzero_power8 attribute_hidden; + + libc_ifunc (__bzero, +- (hwcap & PPC_FEATURE_HAS_VSX) +- ? __bzero_power7 : +- (hwcap & PPC_FEATURE_ARCH_2_05) ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __bzero_power8 : ++ (hwcap & PPC_FEATURE_HAS_VSX) ++ ? __bzero_power7 : ++ (hwcap & PPC_FEATURE_ARCH_2_05) + ? __bzero_power6 : + (hwcap & PPC_FEATURE_POWER4) +- ? __bzero_power4 ++ ? __bzero_power4 + : __bzero_ppc); + + weak_alias (__bzero, bzero) +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index a574487..06d5be9 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -69,6 +71,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/powerpc/powerpc64/multiarch/memset.c. */ + IFUNC_IMPL (i, name, memset, ++ IFUNC_IMPL_ADD (array, i, memset, hwcap2 & PPC_FEATURE2_ARCH_2_07, ++ __memset_power8) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_HAS_VSX, + __memset_power7) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_05, +@@ -134,6 +138,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/powerpc/powerpc64/multiarch/bzero.c. */ + IFUNC_IMPL (i, name, bzero, ++ IFUNC_IMPL_ADD (array, i, bzero, hwcap2 & PPC_FEATURE2_ARCH_2_07, ++ __bzero_power8) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_HAS_VSX, + __bzero_power7) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_ARCH_2_05, +diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S b/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S +new file mode 100644 +index 0000000..e8a604b +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S +@@ -0,0 +1,43 @@ ++/* Optimized memset implementation for PowerPC64/POWER8. ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#undef EALIGN ++#define EALIGN(name, alignt, words) \ ++ .section ".text"; \ ++ ENTRY_2(__memset_power8) \ ++ .align ALIGNARG(alignt); \ ++ EALIGN_W_##words; \ ++ BODY_LABEL(__memset_power8): \ ++ cfi_startproc; \ ++ LOCALENTRY(__memset_power8) ++ ++#undef END_GEN_TB ++#define END_GEN_TB(name, mask) \ ++ cfi_endproc; \ ++ TRACEBACK_MASK(__memset_power8,mask) \ ++ END_2(__memset_power8) ++ ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++#undef __bzero ++#define __bzero __bzero_power8 ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset.c b/sysdeps/powerpc/powerpc64/multiarch/memset.c +index aa2ae70..9c7ed10 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/memset.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/memset.c +@@ -32,16 +32,19 @@ extern __typeof (__redirect_memset) __memset_ppc attribute_hidden; + extern __typeof (__redirect_memset) __memset_power4 attribute_hidden; + extern __typeof (__redirect_memset) __memset_power6 attribute_hidden; + extern __typeof (__redirect_memset) __memset_power7 attribute_hidden; ++extern __typeof (__redirect_memset) __memset_power8 attribute_hidden; + + /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ + libc_ifunc (__libc_memset, +- (hwcap & PPC_FEATURE_HAS_VSX) +- ? __memset_power7 : +- (hwcap & PPC_FEATURE_ARCH_2_05) ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __memset_power8 : ++ (hwcap & PPC_FEATURE_HAS_VSX) ++ ? __memset_power7 : ++ (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memset_power6 : + (hwcap & PPC_FEATURE_POWER4) +- ? __memset_power4 ++ ? __memset_power4 + : __memset_ppc); + + #undef memset +diff --git a/sysdeps/powerpc/powerpc64/power8/memset.S b/sysdeps/powerpc/powerpc64/power8/memset.S +new file mode 100644 +index 0000000..191a4df +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power8/memset.S +@@ -0,0 +1,449 @@ ++/* Optimized memset implementation for PowerPC64/POWER8. ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); ++ Returns 's'. */ ++ ++ .machine power8 ++EALIGN (memset, 5, 0) ++ CALL_MCOUNT 3 ++ ++L(_memset): ++ cmpldi cr7,r5,31 ++ neg r0,r3 ++ mr r10,r3 ++ ++ insrdi r4,r4,8,48 ++ insrdi r4,r4,16,32 /* Replicate byte to word. */ ++ ble cr7,L(write_LT_32) ++ ++ andi. r11,r10,15 /* Check alignment of DST. */ ++ insrdi r4,r4,32,0 /* Replicate word to double word. */ ++ ++ beq L(big_aligned) ++ ++ mtocrf 0x01,r0 ++ clrldi r0,r0,60 ++ ++ /* Get DST aligned to 16 bytes. */ ++1: bf 31,2f ++ stb r4,0(r10) ++ addi r10,r10,1 ++ ++2: bf 30,4f ++ sth r4,0(r10) ++ addi r10,r10,2 ++ ++4: bf 29,8f ++ stw r4,0(r10) ++ addi r10,r10,4 ++ ++8: bf 28,16f ++ std r4,0(r10) ++ addi r10,r10,8 ++ ++16: subf r5,r0,r5 ++ ++ .align 4 ++L(big_aligned): ++ /* For sizes larger than 255 two possible paths: ++ - if constant is '0', zero full cache lines with dcbz ++ - otherwise uses vector instructions. */ ++ cmpldi cr5,r5,255 ++ dcbtst 0,r10 ++ cmpldi cr6,r4,0 ++ crand 27,26,21 ++ bt 27,L(huge_dcbz) ++ bge cr5,L(huge_vector) ++ ++ ++ /* Size between 32 and 255 bytes with constant different than 0, use ++ doubleword store instruction to achieve best throughput. */ ++ srdi r8,r5,5 ++ clrldi r11,r5,59 ++ cmpldi cr6,r11,0 ++ cmpdi r8,0 ++ beq L(tail_bytes) ++ mtctr r8 ++ ++ /* Main aligned write loop, writes 32-bytes at a time. */ ++ .align 4 ++L(big_loop): ++ std r4,0(r10) ++ std r4,8(r10) ++ std r4,16(r10) ++ std r4,24(r10) ++ addi r10,r10,32 ++ bdz L(tail_bytes) ++ ++ std r4,0(r10) ++ std r4,8(r10) ++ std r4,16(r10) ++ std r4,24(r10) ++ addi r10,10,32 ++ bdnz L(big_loop) ++ ++ b L(tail_bytes) ++ ++ /* Write remaining 1~31 bytes. */ ++ .align 4 ++L(tail_bytes): ++ beqlr cr6 ++ ++ srdi r7,r11,4 ++ clrldi r8,r11,60 ++ mtocrf 0x01,r7 ++ ++ .align 4 ++ bf 31,8f ++ std r4,0(r10) ++ std r4,8(r10) ++ addi r10,r10,16 ++ ++ .align 4 ++8: mtocrf 0x1,r8 ++ bf 28,4f ++ std r4,0(r10) ++ addi r10,r10,8 ++ ++ .align 4 ++4: bf 29,2f ++ stw 4,0(10) ++ addi 10,10,4 ++ ++ .align 4 ++2: bf 30,1f ++ sth 4,0(10) ++ addi 10,10,2 ++ ++ .align 4 ++1: bflr 31 ++ stb 4,0(10) ++ blr ++ ++ /* Size larger than 255 bytes with constant different than 0, use ++ vector instruction to achieve best throughput. */ ++L(huge_vector): ++ /* Replicate set byte to quadword in VMX register. */ ++ mtvsrd v1,r4 ++ xxpermdi 32,v0,v1,0 ++ vspltb v2,v0,15 ++ ++ /* Main aligned write loop: 128 bytes at a time. */ ++ li r6,16 ++ li r7,32 ++ li r8,48 ++ mtocrf 0x02,r5 ++ srdi r12,r5,7 ++ cmpdi r12,0 ++ beq L(aligned_tail) ++ mtctr r12 ++ b L(aligned_128loop) ++ ++ .align 4 ++L(aligned_128loop): ++ stvx v2,0,r10 ++ stvx v2,r10,r6 ++ stvx v2,r10,r7 ++ stvx v2,r10,r8 ++ addi r10,r10,64 ++ stvx v2,0,r10 ++ stvx v2,r10,r6 ++ stvx v2,r10,r7 ++ stvx v2,r10,r8 ++ addi r10,r10,64 ++ bdnz L(aligned_128loop) ++ ++ /* Write remaining 1~127 bytes. */ ++L(aligned_tail): ++ mtocrf 0x01,r5 ++ bf 25,32f ++ stvx v2,0,r10 ++ stvx v2,r10,r6 ++ stvx v2,r10,r7 ++ stvx v2,r10,r8 ++ addi r10,r10,64 ++ ++32: bf 26,16f ++ stvx v2,0,r10 ++ stvx v2,r10,r6 ++ addi r10,r10,32 ++ ++16: bf 27,8f ++ stvx v2,0,r10 ++ addi r10,r10,16 ++ ++8: bf 28,4f ++ std r4,0(r10) ++ addi r10,r10,8 ++ ++ /* Copies 4~7 bytes. */ ++4: bf 29,L(tail2) ++ stw r4,0(r10) ++ bf 30,L(tail5) ++ sth r4,4(r10) ++ bflr 31 ++ stb r4,6(r10) ++ /* Return original DST pointer. */ ++ blr ++ ++ /* Special case when value is 0 and we have a long length to deal ++ with. Use dcbz to zero out a full cacheline of 128 bytes at a time. ++ Before using dcbz though, we need to get the destination 128-byte ++ aligned. */ ++ .align 4 ++L(huge_dcbz): ++ andi. r11,r10,127 ++ neg r0,r10 ++ beq L(huge_dcbz_aligned) ++ ++ clrldi r0,r0,57 ++ subf r5,r0,r5 ++ srdi r0,r0,3 ++ mtocrf 0x01,r0 ++ ++ /* Write 1~128 bytes until DST is aligned to 128 bytes. */ ++8: bf 28,4f ++ ++ std r4,0(r10) ++ std r4,8(r10) ++ std r4,16(r10) ++ std r4,24(r10) ++ std r4,32(r10) ++ std r4,40(r10) ++ std r4,48(r10) ++ std r4,56(r10) ++ addi r10,r10,64 ++ ++ .align 4 ++4: bf 29,2f ++ std r4,0(r10) ++ std r4,8(r10) ++ std r4,16(r10) ++ std r4,24(r10) ++ addi r10,r10,32 ++ ++ .align 4 ++2: bf 30,1f ++ std r4,0(r10) ++ std r4,8(r10) ++ addi r10,r10,16 ++ ++ .align 4 ++1: bf 31,L(huge_dcbz_aligned) ++ std r4,0(r10) ++ addi r10,r10,8 ++ ++L(huge_dcbz_aligned): ++ /* Setup dcbz unroll offsets and count numbers. */ ++ srdi r8,r5,9 ++ clrldi r11,r5,55 ++ cmpldi cr6,r11,0 ++ li r9,128 ++ cmpdi r8,0 ++ beq L(huge_tail) ++ li r7,256 ++ li r6,384 ++ mtctr r8 ++ ++ .align 4 ++L(huge_loop): ++ /* Sets 512 bytes to zero in each iteration, the loop unrolling shows ++ a throughput boost for large sizes (2048 bytes or higher). */ ++ dcbz 0,r10 ++ dcbz r9,r10 ++ dcbz r7,r10 ++ dcbz r6,r10 ++ addi r10,r10,512 ++ bdnz L(huge_loop) ++ ++ beqlr cr6 ++ ++L(huge_tail): ++ srdi r6,r11,8 ++ srdi r7,r11,4 ++ clrldi r8,r11,4 ++ cmpldi cr6,r8,0 ++ mtocrf 0x01,r6 ++ ++ beq cr6,L(tail) ++ ++ /* We have 1~511 bytes remaining. */ ++ .align 4 ++32: bf 31,16f ++ dcbz 0,r10 ++ dcbz r9,r10 ++ addi r10,r10,256 ++ ++ .align 4 ++16: mtocrf 0x01,r7 ++ bf 28,8f ++ dcbz 0,r10 ++ addi r10,r10,128 ++ ++ .align 4 ++8: bf 29,4f ++ std r4,0(r10) ++ std r4,8(r10) ++ std r4,16(r10) ++ std r4,24(r10) ++ std r4,32(r10) ++ std r4,40(r10) ++ std r4,48(r10) ++ std r4,56(r10) ++ addi r10,r10,64 ++ ++ .align 4 ++4: bf 30,2f ++ std r4,0(r10) ++ std r4,8(r10) ++ std r4,16(r10) ++ std r4,24(r10) ++ addi r10,r10,32 ++ ++ .align 4 ++2: bf 31,L(tail) ++ std r4,0(r10) ++ std r4,8(r10) ++ addi r10,r10,16 ++ .align 4 ++ ++ /* Remaining 1~15 bytes. */ ++L(tail): ++ mtocrf 0x01,r8 ++ ++ .align ++8: bf 28,4f ++ std r4,0(r10) ++ addi r10,r10,8 ++ ++ .align 4 ++4: bf 29,2f ++ stw r4,0(r10) ++ addi r10,r10,4 ++ ++ .align 4 ++2: bf 30,1f ++ sth r4,0(r10) ++ addi r10,r10,2 ++ ++ .align 4 ++1: bflr 31 ++ stb r4,0(r10) ++ blr ++ ++ /* Handle short copies of 0~31 bytes. Best throughput is achieved ++ by just unrolling all operations. */ ++ .align 4 ++L(write_LT_32): ++ cmpldi cr6,5,8 ++ mtocrf 0x01,r5 ++ ble cr6,L(write_LE_8) ++ ++ /* At least 9 bytes to go. */ ++ neg r8,r4 ++ andi. r0,r8,3 ++ cmpldi cr1,r5,16 ++ beq L(write_LT_32_aligned) ++ ++ /* Force 4-byte alignment for SRC. */ ++ mtocrf 0x01,r0 ++ subf r5,r0,r5 ++ ++2: bf 30,1f ++ sth r4,0(r10) ++ addi r10,r10,2 ++ ++1: bf 31,L(end_4bytes_alignment) ++ stb r4,0(r10) ++ addi r10,r10,1 ++ ++ .align 4 ++L(end_4bytes_alignment): ++ cmpldi cr1,r5,16 ++ mtocrf 0x01,r5 ++ ++L(write_LT_32_aligned): ++ blt cr1,8f ++ ++ stw r4,0(r10) ++ stw r4,4(r10) ++ stw r4,8(r10) ++ stw r4,12(r10) ++ addi r10,r10,16 ++ ++8: bf 28,L(tail4) ++ stw r4,0(r10) ++ stw r4,4(r10) ++ addi r10,r10,8 ++ ++ .align 4 ++ /* Copies 4~7 bytes. */ ++L(tail4): ++ bf 29,L(tail2) ++ stw r4,0(r10) ++ bf 30,L(tail5) ++ sth r4,4(r10) ++ bflr 31 ++ stb r4,6(r10) ++ blr ++ ++ .align 4 ++ /* Copies 2~3 bytes. */ ++L(tail2): ++ bf 30,1f ++ sth r4,0(r10) ++ bflr 31 ++ stb r4,2(r10) ++ blr ++ ++ .align 4 ++L(tail5): ++ bflr 31 ++ stb r4,4(r10) ++ blr ++ ++ .align 4 ++1: bflr 31 ++ stb r4,0(r10) ++ blr ++ ++ /* Handles copies of 0~8 bytes. */ ++ .align 4 ++L(write_LE_8): ++ bne cr6,L(tail4) ++ ++ stw r4,0(r10) ++ stw r4,4(r10) ++ blr ++END_GEN_TB (memset,TB_TOCLESS) ++libc_hidden_builtin_def (memset) ++ ++/* Copied from bzero.S to prevent the linker from inserting a stub ++ between bzero and memset. */ ++ENTRY (__bzero) ++ CALL_MCOUNT 3 ++ mr r5,r4 ++ li r4,0 ++ b L(_memset) ++END (__bzero) ++#ifndef __bzero ++weak_alias (__bzero, bzero) ++#endif +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-16.patch b/SOURCES/glibc-rh1385004-16.patch new file mode 100644 index 0000000..2b3db4f --- /dev/null +++ b/SOURCES/glibc-rh1385004-16.patch @@ -0,0 +1,190 @@ +From 2ffa8b8660a7a17572ae5a398171c8be59985eb3 Mon Sep 17 00:00:00 2001 +From: "Gabriel F. T. Gomes" +Date: Mon, 25 Jan 2016 10:50:34 -0500 +Subject: [PATCH] powerpc: Zero pad using memset in strncpy/stpncpy + +Call __memset_power8 to pad, with zeros, the remaining bytes in the +dest string on __strncpy_power8 and __stpncpy_power8. This improves +performance when n is larger than the input string, giving ~30% gain for +larger strings without impacting much shorter strings. + +(cherry picked from commit 72c11b353ede72931cc474c9071d143d9a05c0d7) +--- + ChangeLog | 5 ++ + sysdeps/powerpc/powerpc64/power8/strncpy.S | 123 +++++++++++++---------------- + 2 files changed, 61 insertions(+), 67 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index 5537fc6..8d0e296 100644 +diff --git a/sysdeps/powerpc/powerpc64/power8/strncpy.S b/sysdeps/powerpc/powerpc64/power8/strncpy.S +index 5fda953..80136cc 100644 +--- a/sysdeps/powerpc/powerpc64/power8/strncpy.S ++++ b/sysdeps/powerpc/powerpc64/power8/strncpy.S +@@ -24,6 +24,8 @@ + # define FUNC_NAME strncpy + #endif + ++#define FRAMESIZE (FRAME_MIN_SIZE+48) ++ + /* Implements the function + + char * [r3] strncpy (char *dest [r3], const char *src [r4], size_t n [r5]) +@@ -54,8 +56,7 @@ EALIGN (FUNC_NAME, 4, 0) + addi r10,r4,16 + rlwinm r9,r4,0,19,19 + +- /* Since it is a leaf function, save some non-volatile registers on the +- protected/red zone. */ ++ /* Save some non-volatile registers on the stack. */ + std r26,-48(r1) + std r27,-40(r1) + +@@ -69,6 +70,14 @@ EALIGN (FUNC_NAME, 4, 0) + std r30,-16(r1) + std r31,-8(r1) + ++ /* Update CFI. */ ++ cfi_offset(r26, -48) ++ cfi_offset(r27, -40) ++ cfi_offset(r28, -32) ++ cfi_offset(r29, -24) ++ cfi_offset(r30, -16) ++ cfi_offset(r31, -8) ++ + beq cr7,L(unaligned_lt_16) + rldicl r9,r4,0,61 + subfic r8,r9,8 +@@ -144,74 +153,58 @@ L(short_path_loop_end): + ld r31,-8(r1) + blr + +- /* This code pads the remainder dest with NULL bytes. The algorithm +- calculate the remanining size and issues a doubleword unrolled +- loops followed by a byte a byte set. */ ++ /* This code pads the remainder of dest with NULL bytes. The algorithm ++ calculates the remaining size and calls memset. */ + .align 4 + L(zero_pad_start): + mr r5,r10 + mr r9,r6 + L(zero_pad_start_1): +- srdi. r8,r5,r3 +- mr r10,r9 +-#ifdef USE_AS_STPNCPY +- mr r3,r9 ++ /* At this point: ++ - r5 holds the number of bytes that still have to be written to ++ dest. ++ - r9 points to the position, in dest, where the first null byte ++ will be written. ++ The above statements are true both when control reaches this label ++ from a branch or when falling through the previous lines. */ ++#ifndef USE_AS_STPNCPY ++ mr r30,r3 /* Save the return value of strncpy. */ ++#endif ++ /* Prepare the call to memset. */ ++ mr r3,r9 /* Pointer to the area to be zero-filled. */ ++ li r4,0 /* Byte to be written (zero). */ ++ ++ /* We delayed the creation of the stack frame, as well as the saving of ++ the link register, because only at this point, we are sure that ++ doing so is actually needed. */ ++ ++ /* Save the link register. */ ++ mflr r0 ++ std r0,16(r1) ++ cfi_offset(lr, 16) ++ ++ /* Create the stack frame. */ ++ stdu r1,-FRAMESIZE(r1) ++ cfi_adjust_cfa_offset(FRAMESIZE) ++ ++ bl __memset_power8 ++ nop ++ ++ /* Restore the stack frame. */ ++ addi r1,r1,FRAMESIZE ++ cfi_adjust_cfa_offset(-FRAMESIZE) ++ /* Restore the link register. */ ++ ld r0,16(r1) ++ mtlr r0 ++ ++#ifndef USE_AS_STPNCPY ++ mr r3,r30 /* Restore the return value of strncpy, i.e.: ++ dest. For stpncpy, the return value is the ++ same as return value of memset. */ + #endif +- beq- cr0,L(zero_pad_loop_b_start) +- cmpldi cr7,r8,1 +- li cr7,0 +- std r7,0(r9) +- beq cr7,L(zero_pad_loop_b_prepare) +- addic. r8,r8,-2 +- addi r10,r9,r16 +- std r7,8(r9) +- beq cr0,L(zero_pad_loop_dw_2) +- std r7,16(r9) +- li r9,0 +- b L(zero_pad_loop_dw_1) +- +- .align 4 +-L(zero_pad_loop_dw): +- addi r10,r10,16 +- std r9,-8(r10) +- beq cr0,L(zero_pad_loop_dw_2) +- std r9,0(r10) +-L(zero_pad_loop_dw_1): +- cmpldi cr7,r8,1 +- std r9,0(r10) +- addic. r8,r8,-2 +- bne cr7,L(zero_pad_loop_dw) +- addi r10,r10,8 +-L(zero_pad_loop_dw_2): +- rldicl r5,r5,0,61 +-L(zero_pad_loop_b_start): +- cmpdi cr7,r5,0 +- addi r5,r5,-1 +- addi r9,r10,-1 +- add r10,r10,5 +- subf r10,r9,r10 +- li r8,0 +- beq- cr7,L(short_path_loop_end) +- +- /* Write remaining 1-8 bytes. */ +- .align 4 +- addi r9,r9,1 +- mtocrf 0x1,r10 +- bf 29,4f +- stw r8,0(r9) +- addi r9,r9,4 +- +- .align 4 +-4: bf 30,2f +- sth r8,0(r9) +- addi r9,r9,2 +- +- .align 4 +-2: bf 31,1f +- stb r8,0(r9) + +- /* Restore non-volatile registers. */ +-1: ld r26,-48(r1) ++ /* Restore non-volatile registers and return. */ ++ ld r26,-48(r1) + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) +@@ -407,10 +400,6 @@ L(short_path_prepare_2_3): + mr r4,r28 + mr r9,r29 + b L(short_path_2) +-L(zero_pad_loop_b_prepare): +- addi r10,r9,8 +- rldicl r5,r5,0,61 +- b L(zero_pad_loop_b_start) + L(zero_pad_start_prepare_1): + mr r5,r6 + mr r9,r8 +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-17.patch b/SOURCES/glibc-rh1385004-17.patch new file mode 100644 index 0000000..6146094 --- /dev/null +++ b/SOURCES/glibc-rh1385004-17.patch @@ -0,0 +1,123 @@ +From e883cafe35f52b3e511dbaf85052f8dddd395a2e Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Mon, 30 May 2016 18:00:57 -0300 +Subject: [PATCH] powerpc: Fix --disable-multi-arch build on POWER8 + +Add missing symbols of stpncpy and strcasestr when multi-arch is +disabled. +Fix memset call from strncpy/stpncpy when multi-arch is disabled. + +(cherry picked from commit c24480ce3b5fed848243fc9642932ef2fa670109) +--- + ChangeLog | 15 +++++++++++++++ + sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S | 3 +++ + sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S | 3 +++ + sysdeps/powerpc/powerpc64/power8/stpncpy.S | 4 ++++ + sysdeps/powerpc/powerpc64/power8/strcasestr.S | 7 +++++-- + sysdeps/powerpc/powerpc64/power8/strncpy.S | 16 ++++++++++++---- + 6 files changed, 42 insertions(+), 6 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index 8d0e296..c01d1a0 100644 +diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S b/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S +index d5d835d..d9babb5 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S ++++ b/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S +@@ -36,4 +36,7 @@ + TRACEBACK(__stpncpy_power8) \ + END_2(__stpncpy_power8) + ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ + #include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S +index ed906a4..f86a0f0 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S ++++ b/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S +@@ -37,4 +37,7 @@ + #undef libc_hidden_builtin_def + #define libc_hidden_builtin_def(name) + ++/* memset is used to pad the end of the string. */ ++#define MEMSET __memset_power8 ++ + #include +diff --git a/sysdeps/powerpc/powerpc64/power8/stpncpy.S b/sysdeps/powerpc/powerpc64/power8/stpncpy.S +index 76a1466..e66bd0a 100644 +--- a/sysdeps/powerpc/powerpc64/power8/stpncpy.S ++++ b/sysdeps/powerpc/powerpc64/power8/stpncpy.S +@@ -18,3 +18,7 @@ + + #define USE_AS_STPNCPY + #include ++ ++weak_alias (__stpncpy, stpncpy) ++libc_hidden_def (__stpncpy) ++libc_hidden_builtin_def (stpncpy) +diff --git a/sysdeps/powerpc/powerpc64/power8/strcasestr.S b/sysdeps/powerpc/powerpc64/power8/strcasestr.S +index 24b2b76..60015ae 100644 +--- a/sysdeps/powerpc/powerpc64/power8/strcasestr.S ++++ b/sysdeps/powerpc/powerpc64/power8/strcasestr.S +@@ -81,7 +81,7 @@ + /* TODO: change this to .machine power8 when the minimum required binutils + allows it. */ + .machine power7 +-EALIGN (strcasestr, 4, 0) ++EALIGN (__strcasestr, 4, 0) + CALL_MCOUNT 2 + mflr r0 /* Load link register LR to r0. */ + std r31, -8(r1) /* Save callers register r31. */ +@@ -527,5 +527,8 @@ L(end): + cfi_restore(r31) + mtlr r0 /* Branch to link register. */ + blr +-END (strcasestr) ++END (__strcasestr) ++ ++weak_alias (__strcasestr, strcasestr) ++libc_hidden_def (__strcasestr) + libc_hidden_builtin_def (strcasestr) +diff --git a/sysdeps/powerpc/powerpc64/power8/strncpy.S b/sysdeps/powerpc/powerpc64/power8/strncpy.S +index 80136cc..05c7d8a 100644 +--- a/sysdeps/powerpc/powerpc64/power8/strncpy.S ++++ b/sysdeps/powerpc/powerpc64/power8/strncpy.S +@@ -24,6 +24,16 @@ + # define FUNC_NAME strncpy + #endif + ++#ifndef MEMSET ++/* For builds without IFUNC support, local calls should be made to internal ++ GLIBC symbol (created by libc_hidden_builtin_def). */ ++# ifdef SHARED ++# define MEMSET __GI_memset ++# else ++# define MEMSET memset ++# endif ++#endif ++ + #define FRAMESIZE (FRAME_MIN_SIZE+48) + + /* Implements the function +@@ -187,7 +197,7 @@ L(zero_pad_start_1): + stdu r1,-FRAMESIZE(r1) + cfi_adjust_cfa_offset(FRAMESIZE) + +- bl __memset_power8 ++ bl MEMSET + nop + + /* Restore the stack frame. */ +@@ -406,8 +416,6 @@ L(zero_pad_start_prepare_1): + b L(zero_pad_start_1) + END (FUNC_NAME) + +-#ifdef USE_AS_STPNCPY +-libc_hidden_def (__stpncpy) +-#else ++#ifndef USE_AS_STPNCPY + libc_hidden_builtin_def (strncpy) + #endif +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-18.patch b/SOURCES/glibc-rh1385004-18.patch new file mode 100644 index 0000000..12aacca --- /dev/null +++ b/SOURCES/glibc-rh1385004-18.patch @@ -0,0 +1,804 @@ +From 7dd60718b327b3eb6112ec3900750007b0259189 Mon Sep 17 00:00:00 2001 +From: raji +Date: Tue, 14 Jun 2016 14:51:16 +0530 +Subject: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 + +This implementation utilizes vectors to improve performance +compared to current byte by byte implementation for POWER7. +The performance improvement is upto 4x. This patch is tested +on powerpc64 and powerpc64le. + +(cherry picked from commit c8376f3e07602aaef9cb843bb73cb5f2b860634a) + +Conflicts: + sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S + sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c +--- + ChangeLog | 22 + + sysdeps/powerpc/powerpc64/multiarch/Makefile | 4 +- + .../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 6 + + .../powerpc64/multiarch/strcasecmp-power7.S | 20 +- + .../powerpc64/multiarch/strcasecmp-power8.S | 28 ++ + .../powerpc/powerpc64/multiarch/strcasecmp-ppc64.c | 21 + + sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c | 32 +- + .../powerpc/powerpc64/multiarch/strncase-power8.S | 28 ++ + .../powerpc/powerpc64/multiarch/strncase-ppc64.c | 21 + + sysdeps/powerpc/powerpc64/multiarch/strncase.c | 25 +- + sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 446 +++++++++++++++++++++ + sysdeps/powerpc/powerpc64/power8/strncase.S | 20 + + 12 files changed, 622 insertions(+), 51 deletions(-) + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c + create mode 100644 sysdeps/powerpc/powerpc64/power8/strcasecmp.S + create mode 100644 sysdeps/powerpc/powerpc64/power8/strncase.S + +diff --git a/ChangeLog b/ChangeLog +index c01d1a0..9385bd0 100644 +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 9ee9bc2..e3ac285 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -21,6 +21,8 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \ + memrchr-power7 memrchr-ppc64 rawmemchr-power7 \ + stpcpy-power8 stpcpy-power7 stpcpy-ppc64 \ ++ strcasecmp-ppc64 strcasecmp-power8 \ ++ strncase-ppc64 strncase-power8 \ + strcasestr-power8 strcasestr-ppc64 \ + strcat-power8 strcat-power7 strcat-ppc64 \ + strcmp-power8 strcmp-power7 strcmp-ppc64 \ +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index 228891f..aabd7bc 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -204,6 +204,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c. */ + IFUNC_IMPL (i, name, strcasecmp, + IFUNC_IMPL_ADD (array, i, strcasecmp, ++ hwcap2 & PPC_FEATURE2_ARCH_2_07, ++ __strcasecmp_power8) ++ IFUNC_IMPL_ADD (array, i, strcasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strcasecmp_power7) + IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc)) +@@ -219,6 +222,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/powerpc/powerpc64/multiarch/strncase.c. */ + IFUNC_IMPL (i, name, strncasecmp, + IFUNC_IMPL_ADD (array, i, strncasecmp, ++ hwcap2 & PPC_FEATURE2_ARCH_2_07, ++ __strncasecmp_power8) ++ IFUNC_IMPL_ADD (array, i, strncasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strncasecmp_power7) + IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc)) +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S +index 56eed9a..99cd7bd 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S +@@ -1,5 +1,5 @@ +-/* Optimized strcasecmp implementation foOWER7. +- Copyright (C) 2013-2014 Free Software Foundation, Inc. ++/* Optimized strcasecmp implementation for POWER7. ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -18,21 +18,7 @@ + + #include + +-#undef ENTRY +-#define ENTRY(name) \ +- .section ".text"; \ +- ENTRY_2(__strcasecmp_power7) \ +- .align ALIGNARG(2); \ +- BODY_LABEL(__strcasecmp_power7): \ +- cfi_startproc; \ +- LOCALENTRY(__strcasecmp_power7) +- +-#undef END +-#define END(name) \ +- cfi_endproc; \ +- TRACEBACK(__strcasecmp_power7) \ +- END_2(__strcasecmp_power7) +- ++#define __strcasecmp __strcasecmp_power7 + #undef weak_alias + #define weak_alias(name, alias) + +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S +new file mode 100644 +index 0000000..492047a +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S +@@ -0,0 +1,28 @@ ++/* Optimized strcasecmp implementation for POWER8. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#define __strcasecmp __strcasecmp_power8 ++#undef weak_alias ++#define weak_alias(name, alias) ++ ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c +new file mode 100644 +index 0000000..6318b4a +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c +@@ -0,0 +1,21 @@ ++/* Multiarch strcasecmp for PPC64. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define strcasecmp __strcasecmp_ppc ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c +index 979e9f1..5ec6885 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c +@@ -1,5 +1,5 @@ +-/* Multiple versions of strcasecmp. +- Copyright (C) 2013-2014 Free Software Foundation, Inc. ++/* Multiple versions of strcasecmp ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -16,25 +16,21 @@ + License along with the GNU C Library; if not, see + . */ + +-#if IS_IN (libc) +-# include +-# define strcasecmp __strcasecmp_ppc +-extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; +-extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; +-#endif ++#include ++#include ++#include "init-arch.h" + +-#include +-#undef strcasecmp ++extern __typeof (__strcasecmp) __libc_strcasecmp; + +-#if IS_IN (libc) +-# include +-# include "init-arch.h" ++extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; ++extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; ++extern __typeof (__strcasecmp) __strcasecmp_power8 attribute_hidden; + +-extern __typeof (__strcasecmp) __libc_strcasecmp; + libc_ifunc (__libc_strcasecmp, +- (hwcap & PPC_FEATURE_HAS_VSX) +- ? __strcasecmp_power7 +- : __strcasecmp_ppc); ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __strcasecmp_power8: ++ (hwcap & PPC_FEATURE_HAS_VSX) ++ ? __strcasecmp_power7 ++ : __strcasecmp_ppc); + + weak_alias (__libc_strcasecmp, strcasecmp) +-#endif +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S +new file mode 100644 +index 0000000..01a63b5 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S +@@ -0,0 +1,28 @@ ++/* Optimized strncasecmp implementation for POWER8. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#define __strncasecmp __strncasecmp_power8 ++#undef weak_alias ++#define weak_alias(name, alias) ++ ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c +new file mode 100644 +index 0000000..c245d77 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c +@@ -0,0 +1,21 @@ ++/* Multiarch strncasecmp for PPC64. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define strncasecmp __strncasecmp_ppc ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c +index 4339f3a..5bfaf65 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c +@@ -16,26 +16,21 @@ + License along with the GNU C Library; if not, see + . */ + +-#if IS_IN (libc) +-# include +-# define strncasecmp __strncasecmp_ppc +-extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; +-extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; +-#endif ++#include ++#include ++#include "init-arch.h" + +-#include +-#undef strncasecmp ++extern __typeof (__strncasecmp) __libc_strncasecmp; + +-#if IS_IN (libc) +-# include +-# include "init-arch.h" ++extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; ++extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; ++extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden; + +-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle +- ifunc symbol properly. */ +-extern __typeof (__strncasecmp) __libc_strncasecmp; + libc_ifunc (__libc_strncasecmp, ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __strncasecmp_power8: + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncasecmp_power7 + : __strncasecmp_ppc); ++ + weak_alias (__libc_strncasecmp, strncasecmp) +-#endif +diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S +new file mode 100644 +index 0000000..63f6217 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S +@@ -0,0 +1,446 @@ ++/* Optimized strcasecmp implementation for PowerPC64. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */ ++ ++#ifndef USE_AS_STRNCASECMP ++# define __STRCASECMP __strcasecmp ++# define STRCASECMP strcasecmp ++#else ++# define __STRCASECMP __strncasecmp ++# define STRCASECMP strncasecmp ++#endif ++/* Convert 16 bytes to lowercase and compare */ ++#define TOLOWER() \ ++ vaddubm v8, v4, v1; \ ++ vaddubm v7, v4, v3; \ ++ vcmpgtub v8, v8, v2; \ ++ vsel v4, v7, v4, v8; \ ++ vaddubm v8, v5, v1; \ ++ vaddubm v7, v5, v3; \ ++ vcmpgtub v8, v8, v2; \ ++ vsel v5, v7, v5, v8; \ ++ vcmpequb. v7, v5, v4; ++ ++/* Get 16 bytes for unaligned case. */ ++#ifdef __LITTLE_ENDIAN__ ++#define GET16BYTES(reg1, reg2, reg3) \ ++ lvx reg1, 0, reg2; \ ++ vcmpequb. v8, v0, reg1; \ ++ beq cr6, 1f; \ ++ vspltisb v9, 0; \ ++ b 2f; \ ++ .align 4; \ ++1: \ ++ addi r6, reg2, 16; \ ++ lvx v9, 0, r6; \ ++2: \ ++ vperm reg1, v9, reg1, reg3; ++#else ++#define GET16BYTES(reg1, reg2, reg3) \ ++ lvx reg1, 0, reg2; \ ++ vcmpequb. v8, v0, reg1; \ ++ beq cr6, 1f; \ ++ vspltisb v9, 0; \ ++ b 2f; \ ++ .align 4; \ ++1: \ ++ addi r6, reg2, 16; \ ++ lvx v9, 0, r6; \ ++2: \ ++ vperm reg1, reg1, v9, reg3; ++#endif ++ ++/* Check null in v4, v5 and convert to lower. */ ++#define CHECKNULLANDCONVERT() \ ++ vcmpequb. v7, v0, v5; \ ++ beq cr6, 3f; \ ++ vcmpequb. v7, v0, v4; \ ++ beq cr6, 3f; \ ++ b L(null_found); \ ++ .align 4; \ ++3: \ ++ TOLOWER() ++ ++#ifdef _ARCH_PWR8 ++# define VCLZD_V8_v7 vclzd v8, v7; ++# define MFVRD_R3_V1 mfvrd r3, v1; ++# define VSUBUDM_V9_V8 vsubudm v9, v9, v8; ++# define VPOPCNTD_V8_V8 vpopcntd v8, v8; ++# define VADDUQM_V7_V8 vadduqm v9, v7, v8; ++#else ++# define VCLZD_V8_v7 .long 0x11003fc2 ++# define MFVRD_R3_V1 .long 0x7c230067 ++# define VSUBUDM_V9_V8 .long 0x112944c0 ++# define VPOPCNTD_V8_V8 .long 0x110047c3 ++# define VADDUQM_V7_V8 .long 0x11274100 ++#endif ++ ++ .machine power7 ++ ++ENTRY (__STRCASECMP) ++#ifdef USE_AS_STRNCASECMP ++ CALL_MCOUNT 3 ++#else ++ CALL_MCOUNT 2 ++#endif ++#define rRTN r3 /* Return value */ ++#define rSTR1 r10 /* 1st string */ ++#define rSTR2 r4 /* 2nd string */ ++#define rCHAR1 r6 /* Byte read from 1st string */ ++#define rCHAR2 r7 /* Byte read from 2nd string */ ++#define rADDR1 r8 /* Address of tolower(rCHAR1) */ ++#define rADDR2 r12 /* Address of tolower(rCHAR2) */ ++#define rLWR1 r8 /* Word tolower(rCHAR1) */ ++#define rLWR2 r12 /* Word tolower(rCHAR2) */ ++#define rTMP r9 ++#define rLOC r11 /* Default locale address */ ++ ++ cmpd cr7, rRTN, rSTR2 ++ ++ /* Get locale address. */ ++ ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) ++ add rLOC, rTMP, __libc_tsd_LOCALE@tls ++ ld rLOC, 0(rLOC) ++ ++ mr rSTR1, rRTN ++ li rRTN, 0 ++ beqlr cr7 ++#ifdef USE_AS_STRNCASECMP ++ cmpdi cr7, r5, 0 ++ beq cr7, L(retnull) ++ cmpdi cr7, r5, 16 ++ blt cr7, L(bytebybyte) ++#endif ++ vspltisb v0, 0 ++ vspltisb v8, -1 ++ /* Check for null in initial characters. ++ Check max of 16 char depending on the alignment. ++ If null is present, proceed byte by byte. */ ++ lvx v4, 0, rSTR1 ++#ifdef __LITTLE_ENDIAN__ ++ lvsr v10, 0, rSTR1 /* Compute mask. */ ++ vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */ ++#else ++ lvsl v10, 0, rSTR1 ++ vperm v9, v4, v8, v10 ++#endif ++ vcmpequb. v9, v0, v9 /* Check for null bytes. */ ++ bne cr6, L(bytebybyte) ++ lvx v5, 0, rSTR2 ++ /* Calculate alignment. */ ++#ifdef __LITTLE_ENDIAN__ ++ lvsr v6, 0, rSTR2 ++ vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */ ++#else ++ lvsl v6, 0, rSTR2 ++ vperm v9, v5, v8, v6 ++#endif ++ vcmpequb. v9, v0, v9 /* Check for null bytes. */ ++ bne cr6, L(bytebybyte) ++ /* Check if locale has non ascii characters. */ ++ ld rTMP, 0(rLOC) ++ addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES ++ lwz rTMP, 0(r6) ++ cmpdi cr7, rTMP, 1 ++ beq cr7, L(bytebybyte) ++ ++ /* Load vector registers with values used for TOLOWER. */ ++ /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */ ++ vspltisb v3, 2 ++ vspltisb v9, 4 ++ vsl v3, v3, v9 ++ vaddubm v1, v3, v3 ++ vnor v1, v1, v1 ++ vspltisb v2, 7 ++ vsububm v2, v3, v2 ++ ++ andi. rADDR1, rSTR1, 0xF ++ beq cr0, L(align) ++ addi r6, rSTR1, 16 ++ lvx v9, 0, r6 ++ /* Compute 16 bytes from previous two loads. */ ++#ifdef __LITTLE_ENDIAN__ ++ vperm v4, v9, v4, v10 ++#else ++ vperm v4, v4, v9, v10 ++#endif ++L(align): ++ andi. rADDR2, rSTR2, 0xF ++ beq cr0, L(align1) ++ addi r6, rSTR2, 16 ++ lvx v9, 0, r6 ++ /* Compute 16 bytes from previous two loads. */ ++#ifdef __LITTLE_ENDIAN__ ++ vperm v5, v9, v5, v6 ++#else ++ vperm v5, v5, v9, v6 ++#endif ++L(align1): ++ CHECKNULLANDCONVERT() ++ blt cr6, L(match) ++ b L(different) ++ .align 4 ++L(match): ++ clrldi r6, rSTR1, 60 ++ subfic r7, r6, 16 ++#ifdef USE_AS_STRNCASECMP ++ sub r5, r5, r7 ++#endif ++ add rSTR1, rSTR1, r7 ++ add rSTR2, rSTR2, r7 ++ andi. rADDR2, rSTR2, 0xF ++ addi rSTR1, rSTR1, -16 ++ addi rSTR2, rSTR2, -16 ++ beq cr0, L(aligned) ++#ifdef __LITTLE_ENDIAN__ ++ lvsr v6, 0, rSTR2 ++#else ++ lvsl v6, 0, rSTR2 ++#endif ++ /* There are 2 loops depending on the input alignment. ++ Each loop gets 16 bytes from s1 and s2, check for null, ++ convert to lowercase and compare. Loop till difference ++ or null occurs. */ ++L(s1_align): ++ addi rSTR1, rSTR1, 16 ++ addi rSTR2, rSTR2, 16 ++#ifdef USE_AS_STRNCASECMP ++ cmpdi cr7, r5, 16 ++ blt cr7, L(bytebybyte) ++ addi r5, r5, -16 ++#endif ++ lvx v4, 0, rSTR1 ++ GET16BYTES(v5, rSTR2, v6) ++ CHECKNULLANDCONVERT() ++ blt cr6, L(s1_align) ++ b L(different) ++ .align 4 ++L(aligned): ++ addi rSTR1, rSTR1, 16 ++ addi rSTR2, rSTR2, 16 ++#ifdef USE_AS_STRNCASECMP ++ cmpdi cr7, r5, 16 ++ blt cr7, L(bytebybyte) ++ addi r5, r5, -16 ++#endif ++ lvx v4, 0, rSTR1 ++ lvx v5, 0, rSTR2 ++ CHECKNULLANDCONVERT() ++ blt cr6, L(aligned) ++ ++ /* Calculate and return the difference. */ ++L(different): ++ vaddubm v1, v3, v3 ++ vcmpequb v7, v0, v7 ++#ifdef __LITTLE_ENDIAN__ ++ /* Count trailing zero. */ ++ vspltisb v8, -1 ++ VADDUQM_V7_V8 ++ vandc v8, v9, v7 ++ VPOPCNTD_V8_V8 ++ vspltb v6, v8, 15 ++ vcmpequb. v6, v6, v1 ++ blt cr6, L(shift8) ++#else ++ /* Count leading zero. */ ++ VCLZD_V8_v7 ++ vspltb v6, v8, 7 ++ vcmpequb. v6, v6, v1 ++ blt cr6, L(shift8) ++ vsro v8, v8, v1 ++#endif ++ b L(skipsum) ++ .align 4 ++L(shift8): ++ vsumsws v8, v8, v0 ++L(skipsum): ++#ifdef __LITTLE_ENDIAN__ ++ /* Shift registers based on leading zero count. */ ++ vsro v6, v5, v8 ++ vsro v7, v4, v8 ++ /* Merge and move to GPR. */ ++ vmrglb v6, v6, v7 ++ vslo v1, v6, v1 ++ MFVRD_R3_V1 ++ /* Place the characters that are different in first position. */ ++ sldi rSTR2, rRTN, 56 ++ srdi rSTR2, rSTR2, 56 ++ sldi rSTR1, rRTN, 48 ++ srdi rSTR1, rSTR1, 56 ++#else ++ vslo v6, v5, v8 ++ vslo v7, v4, v8 ++ vmrghb v1, v6, v7 ++ MFVRD_R3_V1 ++ srdi rSTR2, rRTN, 48 ++ sldi rSTR2, rSTR2, 56 ++ srdi rSTR2, rSTR2, 56 ++ srdi rSTR1, rRTN, 56 ++#endif ++ subf rRTN, rSTR1, rSTR2 ++ extsw rRTN, rRTN ++ blr ++ ++ .align 4 ++ /* OK. We've hit the end of the string. We need to be careful that ++ we don't compare two strings as different because of junk beyond ++ the end of the strings... */ ++L(null_found): ++ vaddubm v10, v3, v3 ++#ifdef __LITTLE_ENDIAN__ ++ /* Count trailing zero. */ ++ vspltisb v8, -1 ++ VADDUQM_V7_V8 ++ vandc v8, v9, v7 ++ VPOPCNTD_V8_V8 ++ vspltb v6, v8, 15 ++ vcmpequb. v6, v6, v10 ++ blt cr6, L(shift_8) ++#else ++ /* Count leading zero. */ ++ VCLZD_V8_v7 ++ vspltb v6, v8, 7 ++ vcmpequb. v6, v6, v10 ++ blt cr6, L(shift_8) ++ vsro v8, v8, v10 ++#endif ++ b L(skipsum1) ++ .align 4 ++L(shift_8): ++ vsumsws v8, v8, v0 ++L(skipsum1): ++ /* Calculate shift count based on count of zero. */ ++ vspltisb v10, 7 ++ vslb v10, v10, v10 ++ vsldoi v9, v0, v10, 1 ++ VSUBUDM_V9_V8 ++ vspltisb v8, 8 ++ vsldoi v8, v0, v8, 1 ++ VSUBUDM_V9_V8 ++ /* Shift and remove junk after null character. */ ++#ifdef __LITTLE_ENDIAN__ ++ vslo v5, v5, v9 ++ vslo v4, v4, v9 ++#else ++ vsro v5, v5, v9 ++ vsro v4, v4, v9 ++#endif ++ /* Convert and compare 16 bytes. */ ++ TOLOWER() ++ blt cr6, L(retnull) ++ b L(different) ++ .align 4 ++L(retnull): ++ li rRTN, 0 ++ blr ++ .align 4 ++L(bytebybyte): ++ /* Unrolling loop for POWER: loads are done with 'lbz' plus ++ offset and string descriptors are only updated in the end ++ of loop unrolling. */ ++ ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) ++ lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ ++ lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ ++#ifdef USE_AS_STRNCASECMP ++ rldicl rTMP, r5, 62, 2 ++ cmpdi cr7, rTMP, 0 ++ beq cr7, L(lessthan4) ++ mtctr rTMP ++#endif ++L(loop): ++ cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ ++ sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ ++ sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ ++ lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ ++ lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ ++ cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ ++ crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ ++ beq cr1, L(done) ++ lbz rCHAR1, 1(rSTR1) ++ lbz rCHAR2, 1(rSTR2) ++ cmpdi rCHAR1, 0 ++ sldi rADDR1, rCHAR1, 2 ++ sldi rADDR2, rCHAR2, 2 ++ lwzx rLWR1, rLOC, rADDR1 ++ lwzx rLWR2, rLOC, rADDR2 ++ cmpw cr1, rLWR1, rLWR2 ++ crorc 4*cr1+eq,eq,4*cr1+eq ++ beq cr1, L(done) ++ lbz rCHAR1, 2(rSTR1) ++ lbz rCHAR2, 2(rSTR2) ++ cmpdi rCHAR1, 0 ++ sldi rADDR1, rCHAR1, 2 ++ sldi rADDR2, rCHAR2, 2 ++ lwzx rLWR1, rLOC, rADDR1 ++ lwzx rLWR2, rLOC, rADDR2 ++ cmpw cr1, rLWR1, rLWR2 ++ crorc 4*cr1+eq,eq,4*cr1+eq ++ beq cr1, L(done) ++ lbz rCHAR1, 3(rSTR1) ++ lbz rCHAR2, 3(rSTR2) ++ cmpdi rCHAR1, 0 ++ /* Increment both string descriptors */ ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++ sldi rADDR1, rCHAR1, 2 ++ sldi rADDR2, rCHAR2, 2 ++ lwzx rLWR1, rLOC, rADDR1 ++ lwzx rLWR2, rLOC, rADDR2 ++ cmpw cr1, rLWR1, rLWR2 ++ crorc 4*cr1+eq,eq,4*cr1+eq ++ beq cr1, L(done) ++ lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ ++ lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ ++#ifdef USE_AS_STRNCASECMP ++ bdnz L(loop) ++#else ++ b L(loop) ++#endif ++#ifdef USE_AS_STRNCASECMP ++L(lessthan4): ++ clrldi r5, r5, 62 ++ cmpdi cr7, r5, 0 ++ beq cr7, L(retnull) ++ mtctr r5 ++L(loop1): ++ cmpdi rCHAR1, 0 ++ sldi rADDR1, rCHAR1, 2 ++ sldi rADDR2, rCHAR2, 2 ++ lwzx rLWR1, rLOC, rADDR1 ++ lwzx rLWR2, rLOC, rADDR2 ++ cmpw cr1, rLWR1, rLWR2 ++ crorc 4*cr1+eq,eq,4*cr1+eq ++ beq cr1, L(done) ++ addi rSTR1, rSTR1, 1 ++ addi rSTR2, rSTR2, 1 ++ lbz rCHAR1, 0(rSTR1) ++ lbz rCHAR2, 0(rSTR2) ++ bdnz L(loop1) ++#endif ++L(done): ++ subf r0, rLWR2, rLWR1 ++ extsw rRTN, r0 ++ blr ++END (__STRCASECMP) ++ ++weak_alias (__STRCASECMP, STRCASECMP) ++libc_hidden_builtin_def (__STRCASECMP) +diff --git a/sysdeps/powerpc/powerpc64/power8/strncase.S b/sysdeps/powerpc/powerpc64/power8/strncase.S +new file mode 100644 +index 0000000..7ce2ed0 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power8/strncase.S +@@ -0,0 +1,20 @@ ++/* Optimized strncasecmp implementation for POWER8. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define USE_AS_STRNCASECMP 1 ++#include +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-19.patch b/SOURCES/glibc-rh1385004-19.patch new file mode 100644 index 0000000..be5da29 --- /dev/null +++ b/SOURCES/glibc-rh1385004-19.patch @@ -0,0 +1,59 @@ +From 2fd1041a8d9684978546886d58fdf8add8c8d9f7 Mon Sep 17 00:00:00 2001 +From: Rajalakshmi Srinivasaraghavan +Date: Tue, 5 Jul 2016 21:20:41 +0530 +Subject: [PATCH] powerpc: Fix return code of strcasecmp for unaligned inputs + +If the input values are unaligned and if there are null characters in the +memory before the starting address of the input values, strcasecmp +gives incorrect return code. Fixed it by adding mask the bits that +are not part of the string. + +(cherry picked from commit 30e4cc5413f72c2c728a544389da0c48500d9904) +--- + ChangeLog | 6 ++++++ + sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 17 ++++++++++++++--- + 2 files changed, 20 insertions(+), 3 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index 9385bd0..af5f694 100644 +diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S +index 63f6217..c83dc52 100644 +--- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S ++++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S +@@ -40,11 +40,20 @@ + vsel v5, v7, v5, v8; \ + vcmpequb. v7, v5, v4; + +-/* Get 16 bytes for unaligned case. */ ++/* ++ * Get 16 bytes for unaligned case. ++ * reg1: Vector to hold next 16 bytes. ++ * reg2: Address to read from. ++ * reg3: Permute control vector. ++ * v8: Tmp vector used to mask unwanted bytes. ++ * v9: Tmp vector,0 when null is found on first 16 bytes ++ */ + #ifdef __LITTLE_ENDIAN__ + #define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ +- vcmpequb. v8, v0, reg1; \ ++ vspltisb v8, -1; \ ++ vperm v8, v8, reg1, reg3; \ ++ vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ +@@ -57,7 +66,9 @@ + #else + #define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ +- vcmpequb. v8, v0, reg1; \ ++ vspltisb v8, -1; \ ++ vperm v8, reg1, v8, reg3; \ ++ vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-2.patch b/SOURCES/glibc-rh1385004-2.patch new file mode 100644 index 0000000..09fed87 --- /dev/null +++ b/SOURCES/glibc-rh1385004-2.patch @@ -0,0 +1,73 @@ +From c86e89a09d187da3aca1ee7ff9bfee4957fe70ff Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Tue, 28 Jul 2015 15:34:25 -0300 +Subject: [PATCH] powerpc: Fix strstr/power7 build + +This patch fixes the strstr build with --disable-multi-arch option. +The optimization calls the __strstr_ppc symbol, which always build +for multiarch config but not if it is disable. This patch fixes it +by adding the default C implementation object with the expected +symbol name. + + * sysdeps/powerpc/powerpc64/power7/Makefile [$(subdir) = string] + (sysdep_routines): Add strstr-ppc64. + * sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c: New file. + +(cherry picked from commit 357bb400f1b25e48e265fd55b5996328d2a8c142) +--- + ChangeLog | 6 ++++++ + sysdeps/powerpc/powerpc64/power7/Makefile | 1 + + sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c | 27 +++++++++++++++++++++++++ + 3 files changed, 34 insertions(+) + create mode 100644 sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c + +diff --git a/ChangeLog b/ChangeLog +index 8e98192..d70df5c 100644 +diff --git a/sysdeps/powerpc/powerpc64/power7/Makefile b/sysdeps/powerpc/powerpc64/power7/Makefile +index 40aacfa..89a2296 100644 +--- a/sysdeps/powerpc/powerpc64/power7/Makefile ++++ b/sysdeps/powerpc/powerpc64/power7/Makefile +@@ -5,6 +5,7 @@ CFLAGS-rtld.c += -mno-vsx + endif + + ifeq ($(subdir),string) ++sysdep_routines += strstr-ppc64 + CFLAGS-strncase.c += -funroll-loops + CFLAGS-strncase_l.c += -funroll-loops + endif +diff --git a/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c b/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c +new file mode 100644 +index 0000000..bbab92d +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c +@@ -0,0 +1,27 @@ ++/* Optimized strstr implementation for PowerPC64/POWER7. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#define STRSTR __strstr_ppc ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(__name) ++ ++extern __typeof (strstr) __strstr_ppc attribute_hidden; ++ ++#include +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-20.patch b/SOURCES/glibc-rh1385004-20.patch new file mode 100644 index 0000000..38931a1 --- /dev/null +++ b/SOURCES/glibc-rh1385004-20.patch @@ -0,0 +1,453 @@ +From fdd895c45c52241f786cf4e29ccc45c240b1acb5 Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Mon, 27 Jun 2016 16:03:10 -0300 +Subject: [PATCH] powerpc: Add a POWER8-optimized version of expf() + +This implementation is based on the one already used at +sysdeps/x86_64/fpu/e_expf.S. + +This implementation improves the performance by ~14% on average in synthetic +benchmarks at the cost of decreasing accuracy to 1 ULP. + +(cherry picked from commit 35da2541c382d1d4b7c9a15049a3cd1c7a6863a3) +--- + ChangeLog | 11 + + sysdeps/powerpc/fpu/libm-test-ulps | 2 + + sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile | 3 +- + .../powerpc64/fpu/multiarch/e_expf-power8.S | 26 ++ + .../powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c | 24 ++ + sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c | 31 +++ + sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S | 303 +++++++++++++++++++++ + 7 files changed, 399 insertions(+), 1 deletion(-) + create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S + create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c + create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c + create mode 100644 sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S + +diff --git a/ChangeLog b/ChangeLog +index af5f694..6cb2578 100644 +diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile +index 0e3eac7..331763e 100644 +--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile +@@ -24,7 +24,8 @@ libm-sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ + s_modff-power5+ s_modff-ppc64 e_hypot-ppc64 \ + e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 \ + s_isnan-power8 s_isinf-power8 s_finite-power8 \ +- s_llrint-power8 s_llround-power8 ++ s_llrint-power8 s_llround-power8 \ ++ e_expf-power8 e_expf-ppc64 + + CFLAGS-s_logbf-power7.c = -mcpu=power7 + CFLAGS-s_logbl-power7.c = -mcpu=power7 +diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S +new file mode 100644 +index 0000000..02eff24 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S +@@ -0,0 +1,26 @@ ++/* __ieee754_expf() POWER8 version. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#undef strong_alias ++#define strong_alias(a, b) ++ ++#define __ieee754_expf __ieee754_expf_power8 ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c +new file mode 100644 +index 0000000..40f9e3a +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c +@@ -0,0 +1,24 @@ ++/* __ieee_expf() PowerPC64 version. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#undef strong_alias ++#define strong_alias(a, b) ++ ++#define __ieee754_expf __ieee754_expf_ppc64 ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c +new file mode 100644 +index 0000000..1d9a8c6 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c +@@ -0,0 +1,31 @@ ++/* Multiple versions of ieee754_expf. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include "init-arch.h" ++ ++extern __typeof (__ieee754_expf) __ieee754_expf_ppc64 attribute_hidden; ++extern __typeof (__ieee754_expf) __ieee754_expf_power8 attribute_hidden; ++ ++libc_ifunc (__ieee754_expf, ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __ieee754_expf_power8 ++ : __ieee754_expf_ppc64); ++ ++strong_alias (__ieee754_expf, __expf_finite) +diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S b/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S +new file mode 100644 +index 0000000..a5e68bb +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S +@@ -0,0 +1,303 @@ ++/* Optimized expf(). PowerPC64/POWER8 version. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++/* Short algorithm description: ++ * ++ * Let K = 64 (table size). ++ * e^x = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y)) ++ * where: ++ * x = m*log(2)/K + y, y in [0.0..log(2)/K] ++ * m = n*K + j, m,n,j - signed integer, j in [0..K-1] ++ * values of 2^(j/K) are tabulated as T[j]. ++ * ++ * P(y) is a minimax polynomial approximation of expf(y)-1 ++ * on small interval [0.0..log(2)/K]. ++ * ++ * P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as ++ * z = y*y; P(y) = (P3*z + P1)*z + (P2*z + P0)*y ++ * ++ * Special cases: ++ * expf(NaN) = NaN ++ * expf(+INF) = +INF ++ * expf(-INF) = 0 ++ * expf(x) = 1 for subnormals ++ * for finite argument, only expf(0)=1 is exact ++ * expf(x) overflows if x>88.7228317260742190 ++ * expf(x) underflows if x<-103.972076416015620 ++ */ ++ ++#define C1 0x42ad496b /* Single precision 125*log(2). */ ++#define C2 0x31800000 /* Single precision 2^(-28). */ ++#define SP_INF 0x7f800000 /* Single precision Inf. */ ++#define SP_EXP_BIAS 0x1fc0 /* Single precision exponent bias. */ ++ ++#define DATA_OFFSET r9 ++ ++/* Implements the function ++ ++ float [fp1] expf (float [fp1] x) */ ++ ++ .machine power8 ++EALIGN(__ieee754_expf, 4, 0) ++ addis DATA_OFFSET,r2,.Lanchor@toc@ha ++ addi DATA_OFFSET,DATA_OFFSET,.Lanchor@toc@l ++ ++ xscvdpspn v0,v1 ++ mfvsrd r8,v0 /* r8 = x */ ++ lfd fp2,(.KLN2-.Lanchor)(DATA_OFFSET) ++ lfd fp3,(.P2-.Lanchor)(DATA_OFFSET) ++ rldicl r3,r8,32,33 /* r3 = |x| */ ++ lis r4,C1@ha /* r4 = 125*log(2) */ ++ ori r4,r4,C1@l ++ cmpw r3,r4 ++ lfd fp5,(.P3-.Lanchor)(DATA_OFFSET) ++ lfd fp4,(.RS-.Lanchor)(DATA_OFFSET) ++ fmadd fp2,fp1,fp2,fp4 /* fp2 = x * K/log(2) + (2^23 + 2^22) */ ++ bge L(special_paths) /* |x| >= 125*log(2) ? */ ++ ++ lis r4,C2@ha ++ ori r4,r4,C2@l ++ cmpw r3,r4 ++ blt L(small_args) /* |x| < 2^(-28) ? */ ++ ++ /* Main path: here if 2^(-28) <= |x| < 125*log(2) */ ++ frsp fp6,fp2 ++ xscvdpsp v2,v2 ++ mfvsrd r8,v2 ++ mr r3,r8 /* r3 = m */ ++ rldicl r8,r8,32,58 /* r8 = j */ ++ lfs fp4,(.SP_RS-.Lanchor)(DATA_OFFSET) ++ fsubs fp2,fp6,fp4 /* fp2 = m = x * K/log(2) */ ++ srdi r3,r3,32 ++ clrrwi r3,r3,6 /* r3 = n */ ++ lfd fp6,(.NLN2K-.Lanchor)(DATA_OFFSET) ++ fmadd fp0,fp2,fp6,fp1 /* fp0 = y = x - m*log(2)/K */ ++ fmul fp2,fp0,fp0 /* fp2 = z = y^2 */ ++ lfd fp4,(.P1-.Lanchor)(DATA_OFFSET) ++ lfd fp6,(.P0-.Lanchor)(DATA_OFFSET) ++ lis r4,SP_EXP_BIAS@ha ++ ori r4,r4,SP_EXP_BIAS@l ++ add r3,r3,r4 ++ rldic r3,r3,49,1 /* r3 = 2^n */ ++ fmadd fp4,fp5,fp2,fp4 /* fp4 = P3 * z + P1 */ ++ fmadd fp6,fp3,fp2,fp6 /* fp6 = P2 * z + P0 */ ++ mtvsrd v1,r3 ++ xscvspdp v1,v1 ++ fmul fp4,fp4,fp2 /* fp4 = (P3 * z + P1)*z */ ++ fmadd fp0,fp0,fp6,fp4 /* fp0 = P(y) */ ++ sldi r8,r8,3 /* Access doublewords from T[j]. */ ++ addi r6,DATA_OFFSET,(.Ttable-.Lanchor) ++ lfdx fp3,r6,r8 ++ fmadd fp0,fp0,fp3,fp3 /* fp0 = T[j] * (1 + P(y)) */ ++ fmul fp1,fp1,fp0 /* fp1 = 2^n * T[j] * (1 + P(y)) */ ++ frsp fp1,fp1 ++ blr ++ ++ .align 4 ++/* x is either underflow, overflow, infinite or NaN. */ ++L(special_paths): ++ srdi r8,r8,32 ++ rlwinm r8,r8,3,29,29 /* r8 = 0, if x positive. ++ r8 = 4, otherwise. */ ++ addi r6,DATA_OFFSET,(.SPRANGE-.Lanchor) ++ lwzx r4,r6,r8 /* r4 = .SPRANGE[signbit(x)] */ ++ cmpw r3,r4 ++ /* |x| <= .SPRANGE[signbit(x)] */ ++ ble L(near_under_or_overflow) ++ ++ lis r4,SP_INF@ha ++ ori r4,r4,SP_INF@l ++ cmpw r3,r4 ++ bge L(arg_inf_or_nan) /* |x| > Infinite ? */ ++ ++ addi r6,DATA_OFFSET,(.SPLARGE_SMALL-.Lanchor) ++ lfsx fp1,r6,r8 ++ fmuls fp1,fp1,fp1 ++ blr ++ ++ ++ .align 4 ++L(small_args): ++ /* expf(x) = 1.0, where |x| < |2^(-28)| */ ++ lfs fp2,(.SPone-.Lanchor)(DATA_OFFSET) ++ fadds fp1,fp1,fp2 ++ blr ++ ++ ++ .align 4 ++L(arg_inf_or_nan:) ++ bne L(arg_nan) ++ ++ /* expf(+INF) = +INF ++ expf(-INF) = 0 */ ++ addi r6,DATA_OFFSET,(.INF_ZERO-.Lanchor) ++ lfsx fp1,r6,r8 ++ blr ++ ++ ++ .align 4 ++L(arg_nan): ++ /* expf(NaN) = NaN */ ++ fadd fp1,fp1,fp1 ++ frsp fp1,fp1 ++ blr ++ ++ .align 4 ++L(near_under_or_overflow): ++ frsp fp6,fp2 ++ xscvdpsp v2,v2 ++ mfvsrd r8,v2 ++ mr r3,r8 /* r3 = m */ ++ rldicl r8,r8,32,58 /* r8 = j */ ++ lfs fp4,(.SP_RS-.Lanchor)(DATA_OFFSET) ++ fsubs fp2,fp6,fp4 /* fp2 = m = x * K/log(2) */ ++ srdi r3,r3,32 ++ clrrwi r3,r3,6 /* r3 = n */ ++ lfd fp6,(.NLN2K-.Lanchor)(DATA_OFFSET) ++ fmadd fp0,fp2,fp6,fp1 /* fp0 = y = x - m*log(2)/K */ ++ fmul fp2,fp0,fp0 /* fp2 = z = y^2 */ ++ lfd fp4,(.P1-.Lanchor)(DATA_OFFSET) ++ lfd fp6,(.P0-.Lanchor)(DATA_OFFSET) ++ ld r4,(.DP_EXP_BIAS-.Lanchor)(DATA_OFFSET) ++ add r3,r3,r4 ++ rldic r3,r3,46,1 /* r3 = 2 */ ++ fmadd fp4,fp5,fp2,fp4 /* fp4 = P3 * z + P1 */ ++ fmadd fp6,fp3,fp2,fp6 /* fp6 = P2 * z + P0 */ ++ mtvsrd v1,r3 ++ fmul fp4,fp4,fp2 /* fp4 = (P3*z + P1)*z */ ++ fmadd fp0,fp0,fp6,fp4 /* fp0 = P(y) */ ++ sldi r8,r8,3 /* Access doublewords from T[j]. */ ++ addi r6,DATA_OFFSET,(.Ttable-.Lanchor) ++ lfdx fp3,r6,r8 ++ fmadd fp0,fp0,fp3,fp3 /* fp0 = T[j] * (1 + T[j]) */ ++ fmul fp1,fp1,fp0 /* fp1 = 2^n * T[j] * (1 + T[j]) */ ++ frsp fp1,fp1 ++ blr ++END(__ieee754_expf) ++ ++ .section .rodata, "a",@progbits ++.Lanchor: ++ .balign 8 ++/* Table T[j] = 2^(j/K). Double precision. */ ++.Ttable: ++ .8byte 0x3ff0000000000000 ++ .8byte 0x3ff02c9a3e778061 ++ .8byte 0x3ff059b0d3158574 ++ .8byte 0x3ff0874518759bc8 ++ .8byte 0x3ff0b5586cf9890f ++ .8byte 0x3ff0e3ec32d3d1a2 ++ .8byte 0x3ff11301d0125b51 ++ .8byte 0x3ff1429aaea92de0 ++ .8byte 0x3ff172b83c7d517b ++ .8byte 0x3ff1a35beb6fcb75 ++ .8byte 0x3ff1d4873168b9aa ++ .8byte 0x3ff2063b88628cd6 ++ .8byte 0x3ff2387a6e756238 ++ .8byte 0x3ff26b4565e27cdd ++ .8byte 0x3ff29e9df51fdee1 ++ .8byte 0x3ff2d285a6e4030b ++ .8byte 0x3ff306fe0a31b715 ++ .8byte 0x3ff33c08b26416ff ++ .8byte 0x3ff371a7373aa9cb ++ .8byte 0x3ff3a7db34e59ff7 ++ .8byte 0x3ff3dea64c123422 ++ .8byte 0x3ff4160a21f72e2a ++ .8byte 0x3ff44e086061892d ++ .8byte 0x3ff486a2b5c13cd0 ++ .8byte 0x3ff4bfdad5362a27 ++ .8byte 0x3ff4f9b2769d2ca7 ++ .8byte 0x3ff5342b569d4f82 ++ .8byte 0x3ff56f4736b527da ++ .8byte 0x3ff5ab07dd485429 ++ .8byte 0x3ff5e76f15ad2148 ++ .8byte 0x3ff6247eb03a5585 ++ .8byte 0x3ff6623882552225 ++ .8byte 0x3ff6a09e667f3bcd ++ .8byte 0x3ff6dfb23c651a2f ++ .8byte 0x3ff71f75e8ec5f74 ++ .8byte 0x3ff75feb564267c9 ++ .8byte 0x3ff7a11473eb0187 ++ .8byte 0x3ff7e2f336cf4e62 ++ .8byte 0x3ff82589994cce13 ++ .8byte 0x3ff868d99b4492ed ++ .8byte 0x3ff8ace5422aa0db ++ .8byte 0x3ff8f1ae99157736 ++ .8byte 0x3ff93737b0cdc5e5 ++ .8byte 0x3ff97d829fde4e50 ++ .8byte 0x3ff9c49182a3f090 ++ .8byte 0x3ffa0c667b5de565 ++ .8byte 0x3ffa5503b23e255d ++ .8byte 0x3ffa9e6b5579fdbf ++ .8byte 0x3ffae89f995ad3ad ++ .8byte 0x3ffb33a2b84f15fb ++ .8byte 0x3ffb7f76f2fb5e47 ++ .8byte 0x3ffbcc1e904bc1d2 ++ .8byte 0x3ffc199bdd85529c ++ .8byte 0x3ffc67f12e57d14b ++ .8byte 0x3ffcb720dcef9069 ++ .8byte 0x3ffd072d4a07897c ++ .8byte 0x3ffd5818dcfba487 ++ .8byte 0x3ffda9e603db3285 ++ .8byte 0x3ffdfc97337b9b5f ++ .8byte 0x3ffe502ee78b3ff6 ++ .8byte 0x3ffea4afa2a490da ++ .8byte 0x3ffefa1bee615a27 ++ .8byte 0x3fff50765b6e4540 ++ .8byte 0x3fffa7c1819e90d8 ++ ++.KLN2: ++ .8byte 0x40571547652b82fe /* Double precision K/log(2). */ ++ ++/* Double precision polynomial coefficients. */ ++.P0: ++ .8byte 0x3fefffffffffe7c6 ++.P1: ++ .8byte 0x3fe00000008d6118 ++.P2: ++ .8byte 0x3fc55550da752d4f ++.P3: ++ .8byte 0x3fa56420eb78fa85 ++ ++.RS: ++ .8byte 0x4168000000000000 /* Double precision 2^23 + 2^22. */ ++.NLN2K: ++ .8byte 0xbf862e42fefa39ef /* Double precision -log(2)/K. */ ++.DP_EXP_BIAS: ++ .8byte 0x000000000000ffc0 /* Double precision exponent bias. */ ++ ++ .balign 4 ++.SPone: ++ .4byte 0x3f800000 /* Single precision 1.0. */ ++.SP_RS: ++ .4byte 0x4b400000 /* Single precision 2^23 + 2^22. */ ++ ++.SPRANGE: /* Single precision overflow/underflow bounds. */ ++ .4byte 0x42b17217 /* if x>this bound, then result overflows. */ ++ .4byte 0x42cff1b4 /* if x +Date: Tue, 28 Jun 2016 21:59:40 +1000 +Subject: [PATCH] powerpc: Add a POWER8-optimized version of sinf() + +This uses the implementation of sinf() in sysdeps/x86_64/fpu/s_sinf.S +as inspiration. + +(cherry picked from commit aa95fc13f5b02044eadc3af3d9e1c025f2e1edda) +--- + ChangeLog | 10 + + sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile | 3 +- + .../powerpc64/fpu/multiarch/s_sinf-power8.S | 26 ++ + .../powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c | 26 ++ + sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c | 31 ++ + sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S | 519 +++++++++++++++++++++ + 6 files changed, 614 insertions(+), 1 deletion(-) + create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S + create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c + create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c + create mode 100644 sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S + +diff --git a/ChangeLog b/ChangeLog +index 6cb2578..6d6aab3 100644 +diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile +index 331763e..add1fb8 100644 +--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile +@@ -25,7 +25,8 @@ libm-sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ + e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 \ + s_isnan-power8 s_isinf-power8 s_finite-power8 \ + s_llrint-power8 s_llround-power8 \ +- e_expf-power8 e_expf-ppc64 ++ e_expf-power8 e_expf-ppc64 \ ++ s_sinf-ppc64 s_sinf-power8 + + CFLAGS-s_logbf-power7.c = -mcpu=power7 + CFLAGS-s_logbl-power7.c = -mcpu=power7 +diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S +new file mode 100644 +index 0000000..579019c +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S +@@ -0,0 +1,26 @@ ++/* sinf(). PowerPC64/POWER8 version. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#undef weak_alias ++#define weak_alias(a, b) ++ ++#define __sinf __sinf_power8 ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c +new file mode 100644 +index 0000000..eaf83fa +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c +@@ -0,0 +1,26 @@ ++/* sinf(). PowerPC64 default version. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#undef weak_alias ++#define weak_alias(a, b) ++ ++#define __sinf __sinf_ppc64 ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c +new file mode 100644 +index 0000000..4269d58 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c +@@ -0,0 +1,31 @@ ++/* Multiple versions of sinf. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include "init-arch.h" ++ ++extern __typeof (__sinf) __sinf_ppc64 attribute_hidden; ++extern __typeof (__sinf) __sinf_power8 attribute_hidden; ++ ++libc_ifunc (__sinf, ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __sinf_power8 ++ : __sinf_ppc64); ++ ++weak_alias (__sinf, sinf) +diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S +new file mode 100644 +index 0000000..3b8f5af +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S +@@ -0,0 +1,519 @@ ++/* Optimized sinf(). PowerPC64/POWER8 version. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#define _ERRNO_H 1 ++#include ++ ++#define FRAMESIZE (FRAME_MIN_SIZE+16) ++ ++#define FLOAT_EXPONENT_SHIFT 23 ++#define FLOAT_EXPONENT_BIAS 127 ++#define INTEGER_BITS 3 ++ ++#define PI_4 0x3f490fdb /* PI/4 */ ++#define NINEPI_4 0x40e231d6 /* 9 * PI/4 */ ++#define TWO_PN5 0x3d000000 /* 2^-5 */ ++#define TWO_PN27 0x32000000 /* 2^-27 */ ++#define INFINITY 0x7f800000 ++#define TWO_P23 0x4b000000 /* 2^27 */ ++#define FX_FRACTION_1_28 0x9249250 /* 0x100000000 / 28 + 1 */ ++ ++ /* Implements the function ++ ++ float [fp1] sinf (float [fp1] x) */ ++ ++ .machine power8 ++EALIGN(__sinf, 4, 0) ++ addis r9,r2,L(anchor)@toc@ha ++ addi r9,r9,L(anchor)@toc@l ++ ++ lis r4,PI_4@h ++ ori r4,r4,PI_4@l ++ ++ xscvdpspn v0,v1 ++ mfvsrd r8,v0 ++ rldicl r3,r8,32,33 /* Remove sign bit. */ ++ ++ cmpw r3,r4 ++ bge L(greater_or_equal_pio4) ++ ++ lis r4,TWO_PN5@h ++ ori r4,r4,TWO_PN5@l ++ ++ cmpw r3,r4 ++ blt L(less_2pn5) ++ ++ /* Chebyshev polynomial of the form: ++ * x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ ++ ++ lfd fp9,(L(S0)-L(anchor))(r9) ++ lfd fp10,(L(S1)-L(anchor))(r9) ++ lfd fp11,(L(S2)-L(anchor))(r9) ++ lfd fp12,(L(S3)-L(anchor))(r9) ++ lfd fp13,(L(S4)-L(anchor))(r9) ++ ++ fmul fp2,fp1,fp1 /* x^2 */ ++ fmul fp3,fp2,fp1 /* x^3 */ ++ ++ fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */ ++ fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */ ++ fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */ ++ fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */ ++ fmadd fp1,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */ ++ frsp fp1,fp1 /* Round to single precision. */ ++ ++ blr ++ ++ .balign 16 ++L(greater_or_equal_pio4): ++ lis r4,NINEPI_4@h ++ ori r4,r4,NINEPI_4@l ++ cmpw r3,r4 ++ bge L(greater_or_equal_9pio4) ++ ++ /* Calculate quotient of |x|/(PI/4). */ ++ lfd fp2,(L(invpio4)-L(anchor))(r9) ++ fabs fp1,fp1 /* |x| */ ++ fmul fp2,fp1,fp2 /* |x|/(PI/4) */ ++ fctiduz fp2,fp2 ++ mfvsrd r3,v2 /* n = |x| mod PI/4 */ ++ ++ /* Now use that quotient to find |x| mod (PI/2). */ ++ addi r7,r3,1 ++ rldicr r5,r7,2,60 /* ((n+1) >> 1) << 3 */ ++ addi r6,r9,(L(pio2_table)-L(anchor)) ++ lfdx fp4,r5,r6 ++ fsub fp1,fp1,fp4 ++ ++ .balign 16 ++L(reduced): ++ /* Now we are in the range -PI/4 to PI/4. */ ++ ++ /* Work out if we are in a positive or negative primary interval. */ ++ rldicl r4,r7,62,63 /* ((n+1) >> 2) & 1 */ ++ ++ /* We are operating on |x|, so we need to add back the original ++ sign. */ ++ rldicl r8,r8,33,63 /* (x >> 31) & 1, ie the sign bit. */ ++ xor r4,r4,r8 /* 0 if result should be positive, ++ 1 if negative. */ ++ ++ /* Load a 1.0 or -1.0. */ ++ addi r5,r9,(L(ones)-L(anchor)) ++ sldi r4,r4,3 ++ lfdx fp0,r4,r5 ++ ++ /* Are we in the primary interval of sin or cos? */ ++ andi. r4,r7,0x2 ++ bne L(cos) ++ ++ /* Chebyshev polynomial of the form: ++ x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ ++ ++ lfd fp9,(L(S0)-L(anchor))(r9) ++ lfd fp10,(L(S1)-L(anchor))(r9) ++ lfd fp11,(L(S2)-L(anchor))(r9) ++ lfd fp12,(L(S3)-L(anchor))(r9) ++ lfd fp13,(L(S4)-L(anchor))(r9) ++ ++ fmul fp2,fp1,fp1 /* x^2 */ ++ fmul fp3,fp2,fp1 /* x^3 */ ++ ++ fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */ ++ fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */ ++ fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */ ++ fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */ ++ fmadd fp4,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */ ++ fmul fp4,fp4,fp0 /* Add in the sign. */ ++ frsp fp1,fp4 /* Round to single precision. */ ++ ++ blr ++ ++ .balign 16 ++L(cos): ++ /* Chebyshev polynomial of the form: ++ 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ ++ ++ lfd fp9,(L(C0)-L(anchor))(r9) ++ lfd fp10,(L(C1)-L(anchor))(r9) ++ lfd fp11,(L(C2)-L(anchor))(r9) ++ lfd fp12,(L(C3)-L(anchor))(r9) ++ lfd fp13,(L(C4)-L(anchor))(r9) ++ ++ fmul fp2,fp1,fp1 /* x^2 */ ++ lfd fp3,(L(DPone)-L(anchor))(r9) ++ ++ fmadd fp4,fp2,fp13,fp12 /* C3+x^2*C4 */ ++ fmadd fp4,fp2,fp4,fp11 /* C2+x^2*(C3+x^2*C4) */ ++ fmadd fp4,fp2,fp4,fp10 /* C1+x^2*(C2+x^2*(C3+x^2*C4)) */ ++ fmadd fp4,fp2,fp4,fp9 /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */ ++ fmadd fp4,fp2,fp4,fp3 /* 1.0 + x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */ ++ fmul fp4,fp4,fp0 /* Add in the sign. */ ++ frsp fp1,fp4 /* Round to single precision. */ ++ ++ blr ++ ++ .balign 16 ++L(greater_or_equal_9pio4): ++ lis r4,INFINITY@h ++ ori r4,r4,INFINITY@l ++ cmpw r3,r4 ++ bge L(inf_or_nan) ++ ++ lis r4,TWO_P23@h ++ ori r4,r4,TWO_P23@l ++ cmpw r3,r4 ++ bge L(greater_or_equal_2p23) ++ ++ fabs fp1,fp1 /* |x| */ ++ ++ /* Calculate quotient of |x|/(PI/4). */ ++ lfd fp2,(L(invpio4)-L(anchor))(r9) ++ ++ lfd fp3,(L(DPone)-L(anchor))(r9) ++ lfd fp4,(L(DPhalf)-L(anchor))(r9) ++ fmul fp2,fp1,fp2 /* |x|/(PI/4) */ ++ friz fp2,fp2 /* n = floor(|x|/(PI/4)) */ ++ ++ /* Calculate (n + 1) / 2. */ ++ fadd fp2,fp2,fp3 /* n + 1 */ ++ fmul fp3,fp2,fp4 /* (n + 1) / 2 */ ++ friz fp3,fp3 ++ ++ lfd fp4,(L(pio2hi)-L(anchor))(r9) ++ lfd fp5,(L(pio2lo)-L(anchor))(r9) ++ ++ fmul fp6,fp4,fp3 ++ fadd fp6,fp6,fp1 ++ fmadd fp1,fp5,fp3,fp6 ++ ++ fctiduz fp2,fp2 ++ mfvsrd r7,v2 /* n + 1 */ ++ ++ b L(reduced) ++ ++ .balign 16 ++L(inf_or_nan): ++ bne L(skip_errno_setting) /* Is a NAN? */ ++ ++ /* We delayed the creation of the stack frame, as well as the saving of ++ the link register, because only at this point, we are sure that ++ doing so is actually needed. */ ++ ++ stfd fp1,-8(r1) ++ ++ /* Save the link register. */ ++ mflr r0 ++ std r0,16(r1) ++ cfi_offset(lr, 16) ++ ++ /* Create the stack frame. */ ++ stdu r1,-FRAMESIZE(r1) ++ cfi_adjust_cfa_offset(FRAMESIZE) ++ ++ bl JUMPTARGET(__errno_location) ++ nop ++ ++ /* Restore the stack frame. */ ++ addi r1,r1,FRAMESIZE ++ cfi_adjust_cfa_offset(-FRAMESIZE) ++ /* Restore the link register. */ ++ ld r0,16(r1) ++ mtlr r0 ++ ++ lfd fp1,-8(r1) ++ ++ /* errno = EDOM */ ++ li r4,EDOM ++ stw r4,0(r3) ++ ++L(skip_errno_setting): ++ fsub fp1,fp1,fp1 /* x - x */ ++ blr ++ ++ .balign 16 ++L(greater_or_equal_2p23): ++ fabs fp1,fp1 ++ ++ srwi r4,r3,FLOAT_EXPONENT_SHIFT ++ subi r4,r4,FLOAT_EXPONENT_BIAS ++ ++ /* We reduce the input modulo pi/4, so we need 3 bits of integer ++ to determine where in 2*pi we are. Index into our array ++ accordingly. */ ++ addi r4,r4,INTEGER_BITS ++ ++ /* To avoid an expensive divide, for the range we care about (0 - 127) ++ we can transform x/28 into: ++ ++ x/28 = (x * ((0x100000000 / 28) + 1)) >> 32 ++ ++ mulhwu returns the top 32 bits of the 64 bit result, doing the ++ shift for us in the same instruction. The top 32 bits are undefined, ++ so we have to mask them. */ ++ ++ lis r6,FX_FRACTION_1_28@h ++ ori r6,r6,FX_FRACTION_1_28@l ++ mulhwu r5,r4,r6 ++ clrldi r5,r5,32 ++ ++ /* Get our pointer into the invpio4_table array. */ ++ sldi r4,r5,3 ++ addi r6,r9,(L(invpio4_table)-L(anchor)) ++ add r4,r4,r6 ++ ++ lfd fp2,0(r4) ++ lfd fp3,8(r4) ++ lfd fp4,16(r4) ++ lfd fp5,24(r4) ++ ++ fmul fp6,fp2,fp1 ++ fmul fp7,fp3,fp1 ++ fmul fp8,fp4,fp1 ++ fmul fp9,fp5,fp1 ++ ++ /* Mask off larger integer bits in highest double word that we don't ++ care about to avoid losing precision when combining with smaller ++ values. */ ++ fctiduz fp10,fp6 ++ mfvsrd r7,v10 ++ rldicr r7,r7,0,(63-INTEGER_BITS) ++ mtvsrd v10,r7 ++ fcfidu fp10,fp10 /* Integer bits. */ ++ ++ fsub fp6,fp6,fp10 /* highest -= integer bits */ ++ ++ /* Work out the integer component, rounded down. Use the top two ++ limbs for this. */ ++ fadd fp10,fp6,fp7 /* highest + higher */ ++ ++ fctiduz fp10,fp10 ++ mfvsrd r7,v10 ++ andi. r0,r7,1 ++ fcfidu fp10,fp10 ++ ++ /* Subtract integer component from highest limb. */ ++ fsub fp12,fp6,fp10 ++ ++ beq L(even_integer) ++ ++ /* Our integer component is odd, so we are in the -PI/4 to 0 primary ++ region. We need to shift our result down by PI/4, and to do this ++ in the mod (4/PI) space we simply subtract 1. */ ++ lfd fp11,(L(DPone)-L(anchor))(r9) ++ fsub fp12,fp12,fp11 ++ ++ /* Now add up all the limbs in order. */ ++ fadd fp12,fp12,fp7 ++ fadd fp12,fp12,fp8 ++ fadd fp12,fp12,fp9 ++ ++ /* And finally multiply by pi/4. */ ++ lfd fp13,(L(pio4)-L(anchor))(r9) ++ fmul fp1,fp12,fp13 ++ ++ addi r7,r7,1 ++ b L(reduced) ++ ++L(even_integer): ++ lfd fp11,(L(DPone)-L(anchor))(r9) ++ ++ /* Now add up all the limbs in order. */ ++ fadd fp12,fp12,fp7 ++ fadd fp12,r12,fp8 ++ fadd fp12,r12,fp9 ++ ++ /* We need to check if the addition of all the limbs resulted in us ++ overflowing 1.0. */ ++ fcmpu 0,fp12,fp11 ++ bgt L(greater_than_one) ++ ++ /* And finally multiply by pi/4. */ ++ lfd fp13,(L(pio4)-L(anchor))(r9) ++ fmul fp1,fp12,fp13 ++ ++ addi r7,r7,1 ++ b L(reduced) ++ ++L(greater_than_one): ++ /* We did overflow 1.0 when adding up all the limbs. Add 1.0 to our ++ integer, and subtract 1.0 from our result. Since that makes the ++ integer component odd, we need to subtract another 1.0 as ++ explained above. */ ++ addi r7,r7,1 ++ ++ lfd fp11,(L(DPtwo)-L(anchor))(r9) ++ fsub fp12,fp12,fp11 ++ ++ /* And finally multiply by pi/4. */ ++ lfd fp13,(L(pio4)-L(anchor))(r9) ++ fmul fp1,fp12,fp13 ++ ++ addi r7,r7,1 ++ b L(reduced) ++ ++ .balign 16 ++L(less_2pn5): ++ lis r4,TWO_PN27@h ++ ori r4,r4,TWO_PN27@l ++ ++ cmpw r3,r4 ++ blt L(less_2pn27) ++ ++ /* A simpler Chebyshev approximation is close enough for this range: ++ x+x^3*(SS0+x^2*SS1). */ ++ ++ lfd fp10,(L(SS0)-L(anchor))(r9) ++ lfd fp11,(L(SS1)-L(anchor))(r9) ++ ++ fmul fp2,fp1,fp1 /* x^2 */ ++ fmul fp3,fp2,fp1 /* x^3 */ ++ ++ fmadd fp4,fp2,fp11,fp10 /* SS0+x^2*SS1 */ ++ fmadd fp1,fp3,fp4,fp1 /* x+x^3*(SS0+x^2*SS1) */ ++ ++ frsp fp1,fp1 /* Round to single precision. */ ++ ++ blr ++ ++ .balign 16 ++L(less_2pn27): ++ cmpwi r3,0 ++ beq L(zero) ++ ++ /* Handle some special cases: ++ ++ sinf(subnormal) raises inexact/underflow ++ sinf(min_normalized) raises inexact/underflow ++ sinf(normalized) raises inexact. */ ++ ++ lfd fp2,(L(small)-L(anchor))(r9) ++ ++ fmul fp2,fp1,fp2 /* x * small */ ++ fsub fp1,fp1,fp2 /* x - x * small */ ++ ++ frsp fp1,fp1 ++ ++ blr ++ ++ .balign 16 ++L(zero): ++ blr ++ ++END (__sinf) ++ ++ .section .rodata, "a" ++ ++ .balign 8 ++ ++L(anchor): ++ ++ /* Chebyshev constants for sin, range -PI/4 - PI/4. */ ++L(S0): .8byte 0xbfc5555555551cd9 ++L(S1): .8byte 0x3f81111110c2688b ++L(S2): .8byte 0xbf2a019f8b4bd1f9 ++L(S3): .8byte 0x3ec71d7264e6b5b4 ++L(S4): .8byte 0xbe5a947e1674b58a ++ ++ /* Chebyshev constants for sin, range 2^-27 - 2^-5. */ ++L(SS0): .8byte 0xbfc555555543d49d ++L(SS1): .8byte 0x3f8110f475cec8c5 ++ ++ /* Chebyshev constants for cos, range -PI/4 - PI/4. */ ++L(C0): .8byte 0xbfdffffffffe98ae ++L(C1): .8byte 0x3fa55555545c50c7 ++L(C2): .8byte 0xbf56c16b348b6874 ++L(C3): .8byte 0x3efa00eb9ac43cc0 ++L(C4): .8byte 0xbe923c97dd8844d7 ++ ++L(invpio2): ++ .8byte 0x3fe45f306dc9c883 /* 2/PI */ ++ ++L(invpio4): ++ .8byte 0x3ff45f306dc9c883 /* 4/PI */ ++ ++L(invpio4_table): ++ .8byte 0x0000000000000000 ++ .8byte 0x3ff45f306c000000 ++ .8byte 0x3e3c9c882a000000 ++ .8byte 0x3c54fe13a8000000 ++ .8byte 0x3aaf47d4d0000000 ++ .8byte 0x38fbb81b6c000000 ++ .8byte 0x3714acc9e0000000 ++ .8byte 0x3560e4107c000000 ++ .8byte 0x33bca2c756000000 ++ .8byte 0x31fbd778ac000000 ++ .8byte 0x300b7246e0000000 ++ .8byte 0x2e5d2126e8000000 ++ .8byte 0x2c97003248000000 ++ .8byte 0x2ad77504e8000000 ++ .8byte 0x290921cfe0000000 ++ .8byte 0x274deb1cb0000000 ++ .8byte 0x25829a73e0000000 ++ .8byte 0x23fd1046be000000 ++ .8byte 0x2224baed10000000 ++ .8byte 0x20709d338e000000 ++ .8byte 0x1e535a2f80000000 ++ .8byte 0x1cef904e64000000 ++ .8byte 0x1b0d639830000000 ++ .8byte 0x1964ce7d24000000 ++ .8byte 0x17b908bf16000000 ++ ++L(pio4): ++ .8byte 0x3fe921fb54442d18 /* PI/4 */ ++ ++/* PI/2 as a sum of two doubles. We only use 32 bits of the upper limb ++ to avoid losing significant bits when multiplying with up to ++ (2^22)/(pi/2). */ ++L(pio2hi): ++ .8byte 0xbff921fb54400000 ++ ++L(pio2lo): ++ .8byte 0xbdd0b4611a626332 ++ ++L(pio2_table): ++ .8byte 0 ++ .8byte 0x3ff921fb54442d18 /* 1 * PI/2 */ ++ .8byte 0x400921fb54442d18 /* 2 * PI/2 */ ++ .8byte 0x4012d97c7f3321d2 /* 3 * PI/2 */ ++ .8byte 0x401921fb54442d18 /* 4 * PI/2 */ ++ .8byte 0x401f6a7a2955385e /* 5 * PI/2 */ ++ .8byte 0x4022d97c7f3321d2 /* 6 * PI/2 */ ++ .8byte 0x4025fdbbe9bba775 /* 7 * PI/2 */ ++ .8byte 0x402921fb54442d18 /* 8 * PI/2 */ ++ .8byte 0x402c463abeccb2bb /* 9 * PI/2 */ ++ .8byte 0x402f6a7a2955385e /* 10 * PI/2 */ ++ ++L(small): ++ .8byte 0x3cd0000000000000 /* 2^-50 */ ++ ++L(ones): ++ .8byte 0x3ff0000000000000 /* +1.0 */ ++ .8byte 0xbff0000000000000 /* -1.0 */ ++ ++L(DPhalf): ++ .8byte 0x3fe0000000000000 /* 0.5 */ ++ ++L(DPone): ++ .8byte 0x3ff0000000000000 /* 1.0 */ ++ ++L(DPtwo): ++ .8byte 0x4000000000000000 /* 2.0 */ ++ ++weak_alias(__sinf, sinf) +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-22.patch b/SOURCES/glibc-rh1385004-22.patch new file mode 100644 index 0000000..3723bd0 --- /dev/null +++ b/SOURCES/glibc-rh1385004-22.patch @@ -0,0 +1,411 @@ +From 1e5a0d609f20a613e1e989802bbe479f61bed1ca Mon Sep 17 00:00:00 2001 +From: Rajalakshmi Srinivasaraghavan +Date: Thu, 1 Dec 2016 11:35:43 +0530 +Subject: [PATCH] powerpc: strcmp optimization for power9 + +Vectorized loops are used for strings > 32B when compared +to power8 optimization. + +Tested on power9 ppc64le simulator. + +(cherry picked from commit 80ab6401a9bb566de940cc6a5fb7a6af650f17b9) + +Conflicts: + sysdeps/powerpc/powerpc64/multiarch/strcmp.c +--- + ChangeLog | 11 + + sysdeps/powerpc/powerpc64/multiarch/Makefile | 2 +- + .../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 3 + + .../powerpc/powerpc64/multiarch/strcmp-power9.S | 40 +++ + sysdeps/powerpc/powerpc64/multiarch/strcmp.c | 13 +- + sysdeps/powerpc/powerpc64/power9/strcmp.S | 278 +++++++++++++++++++++ + 6 files changed, 341 insertions(+), 6 deletions(-) + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S + create mode 100644 sysdeps/powerpc/powerpc64/power9/strcmp.S + +diff --git a/ChangeLog b/ChangeLog +index 6d6aab3..57152b8 100644 +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index e3ac285..2c83c22 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -16,7 +16,7 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + strncase-ppc64 strncase-power8 \ + strcasestr-power8 strcasestr-ppc64 \ + strcat-power8 strcat-power7 strcat-ppc64 \ +- strcmp-power8 strcmp-power7 strcmp-ppc64 \ ++ strcmp-power9 strcmp-power8 strcmp-power7 strcmp-ppc64 \ + strcpy-power8 strcpy-power7 strcpy-ppc64 \ + strcspn-power8 strcspn-ppc64 \ + stpncpy-power8 stpncpy-power7 stpncpy-ppc64 \ +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index aabd7bc..404a226 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -311,6 +311,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/powerpc/powerpc64/multiarch/strcmp.c. */ + IFUNC_IMPL (i, name, strcmp, + IFUNC_IMPL_ADD (array, i, strcmp, ++ hwcap2 & PPC_FEATURE2_ARCH_3_00, ++ __strcmp_power9) ++ IFUNC_IMPL_ADD (array, i, strcmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcmp_power8) + IFUNC_IMPL_ADD (array, i, strcmp, +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S +new file mode 100644 +index 0000000..0a09e5b +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S +@@ -0,0 +1,40 @@ ++/* Optimized strcmp implementation for POWER9/PPC64. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#undef EALIGN ++#define EALIGN(name, alignt, words) \ ++ .section ".text"; \ ++ ENTRY_2(__strcmp_power9) \ ++ .align ALIGNARG(alignt); \ ++ EALIGN_W_##words; \ ++ BODY_LABEL(__strcmp_power9): \ ++ cfi_startproc; \ ++ LOCALENTRY(__strcmp_power9) ++ ++#undef END ++#define END(name) \ ++ cfi_endproc; \ ++ TRACEBACK(__strcmp_power9) \ ++ END_2(__strcmp_power9) ++ ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c +index b45ba1f..7345f5a 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c +@@ -24,11 +24,14 @@ + extern __typeof (strcmp) __strcmp_ppc attribute_hidden; + extern __typeof (strcmp) __strcmp_power7 attribute_hidden; + extern __typeof (strcmp) __strcmp_power8 attribute_hidden; ++extern __typeof (strcmp) __strcmp_power9 attribute_hidden; + + libc_ifunc (strcmp, +- (hwcap2 & PPC_FEATURE2_ARCH_2_07) +- ? __strcmp_power8 : +- (hwcap & PPC_FEATURE_HAS_VSX) +- ? __strcmp_power7 +- : __strcmp_ppc); ++ (hwcap2 & PPC_FEATURE2_ARCH_3_00) ++ ? __strcmp_power9 : ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __strcmp_power8 : ++ (hwcap & PPC_FEATURE_HAS_VSX) ++ ? __strcmp_power7 ++ : __strcmp_ppc); + #endif +diff --git a/sysdeps/powerpc/powerpc64/power9/strcmp.S b/sysdeps/powerpc/powerpc64/power9/strcmp.S +new file mode 100644 +index 0000000..754d508 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power9/strcmp.S +@@ -0,0 +1,278 @@ ++/* Optimized strcmp implementation for PowerPC64/POWER9. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++#ifdef __LITTLE_ENDIAN__ ++#include ++ ++/* Implements the function ++ ++ int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) ++ ++ The implementation uses unaligned doubleword access for first 32 bytes ++ as in POWER8 patch and uses vectorised loops after that. */ ++ ++/* TODO: Change this to actual instructions when minimum binutils is upgraded ++ to 2.27. Macros are defined below for these newer instructions in order ++ to maintain compatibility. */ ++# define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) ++ ++# define VEXTUBRX(t,a,b) .long (0x1000070d \ ++ | ((t)<<(32-11)) \ ++ | ((a)<<(32-16)) \ ++ | ((b)<<(32-21)) ) ++ ++# define VCMPNEZB(t,a,b) .long (0x10000507 \ ++ | ((t)<<(32-11)) \ ++ | ((a)<<(32-16)) \ ++ | ((b)<<(32-21)) ) ++ ++/* Get 16 bytes for unaligned case. ++ reg1: Vector to hold next 16 bytes. ++ reg2: Address to read from. ++ reg3: Permute control vector. */ ++# define GET16BYTES(reg1, reg2, reg3) \ ++ lvx reg1, 0, reg2; \ ++ vperm v8, v2, reg1, reg3; \ ++ vcmpequb. v8, v0, v8; \ ++ beq cr6, 1f; \ ++ vspltisb v9, 0; \ ++ b 2f; \ ++ .align 4; \ ++1: \ ++ addi r6, reg2, 16; \ ++ lvx v9, 0, r6; \ ++2: \ ++ vperm reg1, v9, reg1, reg3; ++ ++/* TODO: change this to .machine power9 when the minimum required binutils ++ allows it. */ ++ ++ .machine power7 ++EALIGN (strcmp, 4, 0) ++ li r0, 0 ++ ++ /* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using ++ the code: ++ ++ (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) ++ ++ with PAGE_SIZE being 4096 and ITER_SIZE begin 32. */ ++ ++ rldicl r7, r3, 0, 52 ++ rldicl r9, r4, 0, 52 ++ cmpldi cr7, r7, 4096-32 ++ bgt cr7, L(pagecross_check) ++ cmpldi cr5, r9, 4096-32 ++ bgt cr5, L(pagecross_check) ++ ++ /* For short strings up to 32 bytes, load both s1 and s2 using ++ unaligned dwords and compare. */ ++ ld r8, 0(r3) ++ ld r10, 0(r4) ++ cmpb r12, r8, r0 ++ cmpb r11, r8, r10 ++ orc. r9, r12, r11 ++ bne cr0, L(different_nocmpb) ++ ++ ld r8, 8(r3) ++ ld r10, 8(r4) ++ cmpb r12, r8, r0 ++ cmpb r11, r8, r10 ++ orc. r9, r12, r11 ++ bne cr0, L(different_nocmpb) ++ ++ ld r8, 16(r3) ++ ld r10, 16(r4) ++ cmpb r12, r8, r0 ++ cmpb r11, r8, r10 ++ orc. r9, r12, r11 ++ bne cr0, L(different_nocmpb) ++ ++ ld r8, 24(r3) ++ ld r10, 24(r4) ++ cmpb r12, r8, r0 ++ cmpb r11, r8, r10 ++ orc. r9, r12, r11 ++ bne cr0, L(different_nocmpb) ++ ++ addi r7, r3, 32 ++ addi r4, r4, 32 ++ ++L(align): ++ /* Now it has checked for first 32 bytes. */ ++ vspltisb v0, 0 ++ vspltisb v2, -1 ++ lvsr v6, 0, r4 /* Compute mask. */ ++ or r5, r4, r7 ++ andi. r5, r5, 0xF ++ beq cr0, L(aligned) ++ andi. r5, r7, 0xF ++ beq cr0, L(s1_align) ++ lvsr v10, 0, r7 /* Compute mask. */ ++ ++ /* Both s1 and s2 are unaligned. */ ++ GET16BYTES(v4, r7, v10) ++ GET16BYTES(v5, r4, v6) ++ VCMPNEZB(v7, v5, v4) ++ beq cr6, L(match) ++ b L(different) ++ ++ /* Align s1 to qw and adjust s2 address. */ ++ .align 4 ++L(match): ++ clrldi r6, r7, 60 ++ subfic r5, r6, 16 ++ add r7, r7, r5 ++ add r4, r4, r5 ++ andi. r5, r4, 0xF ++ beq cr0, L(aligned) ++ lvsr v6, 0, r4 ++ /* There are 2 loops depending on the input alignment. ++ Each loop gets 16 bytes from s1 and s2 and compares. ++ Loop until a mismatch or null occurs. */ ++L(s1_align): ++ lvx v4, r7, r0 ++ GET16BYTES(v5, r4, v6) ++ VCMPNEZB(v7, v5, v4) ++ addi r7, r7, 16 ++ addi r4, r4, 16 ++ bne cr6, L(different) ++ ++ lvx v4, r7, r0 ++ GET16BYTES(v5, r4, v6) ++ VCMPNEZB(v7, v5, v4) ++ addi r7, r7, 16 ++ addi r4, r4, 16 ++ bne cr6, L(different) ++ ++ lvx v4, r7, r0 ++ GET16BYTES(v5, r4, v6) ++ VCMPNEZB(v7, v5, v4) ++ addi r7, r7, 16 ++ addi r4, r4, 16 ++ bne cr6, L(different) ++ ++ lvx v4, r7, r0 ++ GET16BYTES(v5, r4, v6) ++ VCMPNEZB(v7, v5, v4) ++ addi r7, r7, 16 ++ addi r4, r4, 16 ++ beq cr6, L(s1_align) ++ b L(different) ++ ++ .align 4 ++L(aligned): ++ lvx v4, 0, r7 ++ lvx v5, 0, r4 ++ VCMPNEZB(v7, v5, v4) ++ addi r7, r7, 16 ++ addi r4, r4, 16 ++ bne cr6, L(different) ++ ++ lvx v4, 0, r7 ++ lvx v5, 0, r4 ++ VCMPNEZB(v7, v5, v4) ++ addi r7, r7, 16 ++ addi r4, r4, 16 ++ bne cr6, L(different) ++ ++ lvx v4, 0, r7 ++ lvx v5, 0, r4 ++ VCMPNEZB(v7, v5, v4) ++ addi r7, r7, 16 ++ addi r4, r4, 16 ++ bne cr6, L(different) ++ ++ lvx v4, 0, r7 ++ lvx v5, 0, r4 ++ VCMPNEZB(v7, v5, v4) ++ addi r7, r7, 16 ++ addi r4, r4, 16 ++ beq cr6, L(aligned) ++ ++ /* Calculate and return the difference. */ ++L(different): ++ VCTZLSBB(r6, v7) ++ VEXTUBRX(r5, r6, v4) ++ VEXTUBRX(r4, r6, v5) ++ subf r3, r4, r5 ++ extsw r3, r3 ++ blr ++ ++ .align 4 ++L(different_nocmpb): ++ neg r3, r9 ++ and r9, r9, r3 ++ cntlzd r9, r9 ++ subfic r9, r9, 63 ++ srd r3, r8, r9 ++ srd r10, r10, r9 ++ rldicl r10, r10, 0, 56 ++ rldicl r3, r3, 0, 56 ++ subf r3, r10, r3 ++ extsw r3, r3 ++ blr ++ ++ .align 4 ++L(pagecross_check): ++ subfic r9, r9, 4096 ++ subfic r7, r7, 4096 ++ cmpld cr7, r7, r9 ++ bge cr7, L(pagecross) ++ mr r7, r9 ++ ++ /* If unaligned 16 bytes reads across a 4K page boundary, it uses ++ a simple byte a byte comparison until the page alignment for s1 ++ is reached. */ ++L(pagecross): ++ add r7, r3, r7 ++ subf r9, r3, r7 ++ mtctr r9 ++ ++ .align 4 ++L(pagecross_loop): ++ /* Loads a byte from s1 and s2, compare if *s1 is equal to *s2 ++ and if *s1 is '\0'. */ ++ lbz r9, 0(r3) ++ lbz r10, 0(r4) ++ addi r3, r3, 1 ++ addi r4, r4, 1 ++ cmplw cr7, r9, r10 ++ cmpdi cr5, r9, r0 ++ bne cr7, L(pagecross_ne) ++ beq cr5, L(pagecross_nullfound) ++ bdnz L(pagecross_loop) ++ b L(align) ++ ++ .align 4 ++L(pagecross_ne): ++ extsw r3, r9 ++ mr r9, r10 ++L(pagecross_retdiff): ++ subf r9, r9, r3 ++ extsw r3, r9 ++ blr ++ ++ .align 4 ++L(pagecross_nullfound): ++ li r3, 0 ++ b L(pagecross_retdiff) ++END (strcmp) ++libc_hidden_builtin_def (strcmp) ++#else ++#include ++#endif +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-23.patch b/SOURCES/glibc-rh1385004-23.patch new file mode 100644 index 0000000..9c10e69 --- /dev/null +++ b/SOURCES/glibc-rh1385004-23.patch @@ -0,0 +1,514 @@ +From fabf4e24731762be7ed1fded89b536fe7150fe13 Mon Sep 17 00:00:00 2001 +From: Rajalakshmi Srinivasaraghavan +Date: Tue, 13 Dec 2016 10:53:42 +0530 +Subject: [PATCH] powerpc: strncmp optimization for power9 + +Vectorized loops are used for strings > 32B when compared +to power8 optimization. + +Tested on power9 ppc64le simulator. + +(cherry picked from commit d89060d60307c84995177a6fba2ed80c96f6b914) + +Conflicts: + sysdeps/powerpc/powerpc64/multiarch/strncmp.c +--- + ChangeLog | 11 + + sysdeps/powerpc/powerpc64/multiarch/Makefile | 3 +- + .../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 2 + + .../powerpc/powerpc64/multiarch/strncmp-power9.S | 40 +++ + sysdeps/powerpc/powerpc64/multiarch/strncmp.c | 17 +- + sysdeps/powerpc/powerpc64/power9/strncmp.S | 375 +++++++++++++++++++++ + 6 files changed, 440 insertions(+), 8 deletions(-) + create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S + create mode 100644 sysdeps/powerpc/powerpc64/power9/strncmp.S + +diff --git a/ChangeLog b/ChangeLog +index 57152b8..0446268 100644 +diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile +index 2c83c22..2997b9d 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile ++++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile +@@ -8,7 +8,8 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + rawmemchr-ppc64 strlen-power7 strlen-ppc64 strnlen-power7 \ + strnlen-ppc64 strcasecmp-power7 strcasecmp_l-power7 \ + strncase-power7 strncase_l-power7 \ +- strncmp-power8 strncmp-power7 strncmp-power4 strncmp-ppc64 \ ++ strncmp-power9 strncmp-power8 strncmp-power7 \ ++ strncmp-power4 strncmp-ppc64 \ + strchr-power7 strchr-ppc64 \ + strchrnul-power7 strchrnul-ppc64 wcschr-power7 \ + wcschr-power6 wcschr-ppc64 wcsrchr-power7 wcsrchr-power6 \ +diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +index 404a226..a140583 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +@@ -110,6 +110,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c. */ + IFUNC_IMPL (i, name, strncmp, ++ IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00, ++ __strncmp_power9) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncmp_power8) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_HAS_VSX, +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S +new file mode 100644 +index 0000000..2f8d0c4 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S +@@ -0,0 +1,40 @@ ++/* Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#undef EALIGN ++#define EALIGN(name,alignt,words) \ ++ .section ".text"; \ ++ ENTRY_2(__strncmp_power9) \ ++ .align ALIGNARG(alignt); \ ++ EALIGN_W_##words; \ ++ BODY_LABEL(__strncmp_power9): \ ++ cfi_startproc; \ ++ LOCALENTRY(__strncmp_power9) ++ ++#undef END ++#define END(name) \ ++ cfi_endproc; \ ++ TRACEBACK(__strncmp_power9) \ ++ END_2(__strncmp_power9) ++ ++ ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(name) ++ ++#include +diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c +index 9b6a659..3859cbc 100644 +--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c ++++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c +@@ -26,15 +26,18 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden; + extern __typeof (strncmp) __strncmp_power4 attribute_hidden; + extern __typeof (strncmp) __strncmp_power7 attribute_hidden; + extern __typeof (strncmp) __strncmp_power8 attribute_hidden; ++extern __typeof (strncmp) __strncmp_power9 attribute_hidden; + + /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ + libc_ifunc (strncmp, +- (hwcap2 & PPC_FEATURE2_ARCH_2_07) +- ? __strncmp_power8 : +- (hwcap & PPC_FEATURE_HAS_VSX) +- ? __strncmp_power7 : +- (hwcap & PPC_FEATURE_POWER4) +- ? __strncmp_power4 +- : __strncmp_ppc); ++ (hwcap2 & PPC_FEATURE2_ARCH_3_00) ++ ? __strncmp_power9 : ++ (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ ? __strncmp_power8 : ++ (hwcap & PPC_FEATURE_HAS_VSX) ++ ? __strncmp_power7 : ++ (hwcap & PPC_FEATURE_POWER4) ++ ? __strncmp_power4 ++ : __strncmp_ppc); + #endif +diff --git a/sysdeps/powerpc/powerpc64/power9/strncmp.S b/sysdeps/powerpc/powerpc64/power9/strncmp.S +new file mode 100644 +index 0000000..3f2fa75 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power9/strncmp.S +@@ -0,0 +1,375 @@ ++/* Optimized strncmp implementation for PowerPC64/POWER9. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++#ifdef __LITTLE_ENDIAN__ ++#include ++ ++/* Implements the function ++ ++ int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n) ++ ++ The implementation uses unaligned doubleword access to avoid specialized ++ code paths depending of data alignment for first 32 bytes and uses ++ vectorised loops after that. */ ++ ++/* TODO: Change this to actual instructions when minimum binutils is upgraded ++ to 2.27. Macros are defined below for these newer instructions in order ++ to maintain compatibility. */ ++# define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) ++ ++# define VEXTUBRX(t,a,b) .long (0x1000070d \ ++ | ((t)<<(32-11)) \ ++ | ((a)<<(32-16)) \ ++ | ((b)<<(32-21)) ) ++ ++# define VCMPNEZB(t,a,b) .long (0x10000507 \ ++ | ((t)<<(32-11)) \ ++ | ((a)<<(32-16)) \ ++ | ((b)<<(32-21)) ) ++ ++/* Get 16 bytes for unaligned case. ++ reg1: Vector to hold next 16 bytes. ++ reg2: Address to read from. ++ reg3: Permute control vector. */ ++# define GET16BYTES(reg1, reg2, reg3) \ ++ lvx reg1, 0, reg2; \ ++ vperm v8, v2, reg1, reg3; \ ++ vcmpequb. v8, v0, v8; \ ++ beq cr6, 1f; \ ++ vspltisb v9, 0; \ ++ b 2f; \ ++ .align 4; \ ++1: \ ++ cmplw cr6, r5, r11; \ ++ ble cr6, 2f; \ ++ addi r6, reg2, 16; \ ++ lvx v9, 0, r6; \ ++2: \ ++ vperm reg1, v9, reg1, reg3; ++ ++/* TODO: change this to .machine power9 when minimum binutils ++ is upgraded to 2.27. */ ++ .machine power7 ++EALIGN (strncmp, 4, 0) ++ /* Check if size is 0. */ ++ cmpdi cr0, r5, 0 ++ beq cr0, L(ret0) ++ li r0, 0 ++ ++ /* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using ++ the code: ++ ++ (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) ++ ++ with PAGE_SIZE being 4096 and ITER_SIZE begin 32. */ ++ rldicl r8, r3, 0, 52 ++ cmpldi cr7, r8, 4096-32 ++ bgt cr7, L(pagecross) ++ rldicl r9, r4, 0, 52 ++ cmpldi cr7, r9, 4096-32 ++ bgt cr7, L(pagecross) ++ ++ /* For short strings up to 32 bytes, load both s1 and s2 using ++ unaligned dwords and compare. */ ++ ++ ld r7, 0(r3) ++ ld r9, 0(r4) ++ li r8, 0 ++ cmpb r8, r7, r8 ++ cmpb r6, r7, r9 ++ orc. r8, r8, r6 ++ bne cr0, L(different1) ++ ++ /* If the strings compared are equal, but size is less or equal ++ to 8, return 0. */ ++ cmpldi cr7, r5, 8 ++ li r9, 0 ++ ble cr7, L(ret1) ++ addi r5, r5, -8 ++ ++ ld r7, 8(r3) ++ ld r9, 8(r4) ++ cmpb r8, r7, r8 ++ cmpb r6, r7, r9 ++ orc. r8, r8, r6 ++ bne cr0, L(different1) ++ cmpldi cr7, r5, 8 ++ mr r9, r8 ++ ble cr7, L(ret1) ++ /* Update pointers and size. */ ++ addi r5, r5, -8 ++ addi r3, r3, 16 ++ addi r4, r4, 16 ++ ++ ld r7, 0(r3) ++ ld r9, 0(r4) ++ li r8, 0 ++ cmpb r8, r7, r8 ++ cmpb r6, r7, r9 ++ orc. r8, r8, r6 ++ bne cr0, L(different1) ++ cmpldi cr7, r5, 8 ++ li r9, 0 ++ ble cr7, L(ret1) ++ addi r5, r5, -8 ++ ++ ld r7, 8(r3) ++ ld r9, 8(r4) ++ cmpb r8, r7, r8 ++ cmpb r6, r7, r9 ++ orc. r8, r8, r6 ++ bne cr0, L(different1) ++ cmpldi cr7, r5, 8 ++ mr r9, r8 ++ ble cr7, L(ret1) ++ ++ /* Update pointers and size. */ ++ addi r5, r5, -8 ++ addi r3, r3, 16 ++ addi r4, r4, 16 ++L(align): ++ /* Now it has checked for first 32 bytes, align source1 to doubleword ++ and adjust source2 address. */ ++ vspltisb v0, 0 ++ vspltisb v2, -1 ++ or r6, r4, r3 ++ andi. r6, r6, 0xF ++ beq cr0, L(aligned) ++ lvsr v6, 0, r4 /* Compute mask. */ ++ clrldi r6, r4, 60 ++ subfic r11, r6, 16 ++ andi. r6, r3, 0xF ++ beq cr0, L(s1_align) ++ /* Both s1 and s2 are unaligned. */ ++ GET16BYTES(v5, r4, v6) ++ lvsr v10, 0, r3 /* Compute mask. */ ++ clrldi r6, r3, 60 ++ subfic r11, r6, 16 ++ GET16BYTES(v4, r3, v10) ++ VCMPNEZB(v7, v5, v4) ++ beq cr6, L(match) ++ b L(different) ++ ++ /* Align s1 to qw and adjust s2 address. */ ++ .align 4 ++L(match): ++ cmpldi cr7, r5, 16 ++ ble cr7, L(ret0) ++ subf r5, r11, r5 ++ add r3, r3, r11 ++ add r4, r4, r11 ++ andi. r11, r4, 0xF ++ beq cr0, L(aligned) ++ lvsr v6, 0, r4 ++ clrldi r6, r4, 60 ++ subfic r11, r6, 16 ++ /* There are 2 loops depending on the input alignment. ++ Each loop gets 16 bytes from s1 and s2, checks for null ++ and compares them. Loops until a mismatch or null occurs. */ ++L(s1_align): ++ lvx v4, 0, r3 ++ GET16BYTES(v5, r4, v6) ++ VCMPNEZB(v7, v5, v4) ++ bne cr6, L(different) ++ cmpldi cr7, r5, 16 ++ ble cr7, L(ret0) ++ addi r5, r5, -16 ++ addi r3, r3, 16 ++ addi r4, r4, 16 ++ ++ lvx v4, 0, r3 ++ GET16BYTES(v5, r4, v6) ++ VCMPNEZB(v7, v5, v4) ++ bne cr6, L(different) ++ cmpldi cr7, r5, 16 ++ ble cr7, L(ret0) ++ addi r5, r5, -16 ++ addi r3, r3, 16 ++ addi r4, r4, 16 ++ ++ lvx v4, 0, r3 ++ GET16BYTES(v5, r4, v6) ++ VCMPNEZB(v7, v5, v4) ++ bne cr6, L(different) ++ cmpldi cr7, r5, 16 ++ ble cr7, L(ret0) ++ addi r5, r5, -16 ++ addi r3, r3, 16 ++ addi r4, r4, 16 ++ ++ lvx v4, 0, r3 ++ GET16BYTES(v5, r4, v6) ++ VCMPNEZB(v7, v5, v4) ++ bne cr6, L(different) ++ cmpldi cr7, r5, 16 ++ ble cr7, L(ret0) ++ addi r5, r5, -16 ++ addi r3, r3, 16 ++ addi r4, r4, 16 ++ b L(s1_align) ++ .align 4 ++L(aligned): ++ lvx v4, 0, r3 ++ lvx v5, 0, r4 ++ VCMPNEZB(v7, v5, v4) ++ bne cr6, L(different) ++ cmpldi cr7, r5, 16 ++ ble cr7, L(ret0) ++ addi r5, r5, -16 ++ addi r3, r3, 16 ++ addi r4, r4, 16 ++ ++ lvx v4, 0, r3 ++ lvx v5, 0, r4 ++ VCMPNEZB(v7, v5, v4) ++ bne cr6, L(different) ++ cmpldi cr7, r5, 16 ++ ble cr7, L(ret0) ++ addi r5, r5, -16 ++ addi r3, r3, 16 ++ addi r4, r4, 16 ++ ++ lvx v4, 0, r3 ++ lvx v5, 0, r4 ++ VCMPNEZB(v7, v5, v4) ++ bne cr6, L(different) ++ cmpldi cr7, r5, 16 ++ ble cr7, L(ret0) ++ addi r5, r5, -16 ++ addi r3, r3, 16 ++ addi r4, r4, 16 ++ ++ lvx v4, 0, r3 ++ lvx v5, 0, r4 ++ VCMPNEZB(v7, v5, v4) ++ bne cr6, L(different) ++ cmpldi cr7, r5, 16 ++ ble cr7, L(ret0) ++ addi r5, r5, -16 ++ addi r3, r3, 16 ++ addi r4, r4, 16 ++ b L(aligned) ++ /* Calculate and return the difference. */ ++L(different): ++ VCTZLSBB(r6, v7) ++ cmplw cr7, r5, r6 ++ ble cr7, L(ret0) ++ VEXTUBRX(r5, r6, v4) ++ VEXTUBRX(r4, r6, v5) ++ subf r3, r4, r5 ++ extsw r3, r3 ++ blr ++ ++ .align 4 ++L(ret0): ++ li r9, 0 ++L(ret1): ++ mr r3, r9 ++ blr ++ ++ /* The code now checks if r8 and r5 are different by issuing a ++ cmpb and shifts the result based on its output: ++ ++ leadzero = (__builtin_ffsl (z1) - 1); ++ leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero; ++ r1 = (r1 >> leadzero) & 0xFFUL; ++ r2 = (r2 >> leadzero) & 0xFFUL; ++ return r1 - r2; */ ++ ++ .align 4 ++L(different1): ++ neg r11, r8 ++ sldi r5, r5, 3 ++ and r8, r11, r8 ++ addi r5, r5, -8 ++ cntlzd r8, r8 ++ subfic r8, r8, 63 ++ extsw r8, r8 ++ cmpld cr7, r8, r5 ++ ble cr7, L(different2) ++ mr r8, r5 ++L(different2): ++ extsw r8, r8 ++ srd r7, r7, r8 ++ srd r9, r9, r8 ++ rldicl r3, r7, 0, 56 ++ rldicl r9, r9, 0, 56 ++ subf r9, r9, 3 ++ extsw r9, r9 ++ mr r3, r9 ++ blr ++ ++ /* If unaligned 16 bytes reads across a 4K page boundary, it uses ++ a simple byte a byte comparison until the page alignment for s1 ++ is reached. */ ++ .align 4 ++L(pagecross): ++ lbz r7, 0(r3) ++ lbz r9, 0(r4) ++ subfic r8, r8,4095 ++ cmplw cr7, r9, r7 ++ bne cr7, L(byte_ne_3) ++ cmpdi cr7, r9, 0 ++ beq cr7, L(byte_ne_0) ++ addi r5, r5, -1 ++ subf r7, r8, r5 ++ subf r9, r7, r5 ++ addi r9, r9, 1 ++ mtctr r9 ++ b L(pagecross_loop1) ++ ++ .align 4 ++L(pagecross_loop0): ++ beq cr7, L(ret0) ++ lbz r9, 0(r3) ++ lbz r8, 0(r4) ++ addi r5, r5, -1 ++ cmplw cr7, r9, r8 ++ cmpdi cr5, r9, 0 ++ bne cr7, L(byte_ne_2) ++ beq cr5, L(byte_ne_0) ++L(pagecross_loop1): ++ cmpdi cr7, r5, 0 ++ addi r3, r3, 1 ++ addi r4, r4, 1 ++ bdnz L(pagecross_loop0) ++ cmpdi cr7, r7, 0 ++ li r9, 0 ++ bne+ cr7, L(align) ++ b L(ret1) ++ ++ .align 4 ++L(byte_ne_0): ++ li r7, 0 ++L(byte_ne_1): ++ subf r9, r9, r7 ++ extsw r9, r9 ++ b L(ret1) ++ ++ .align 4 ++L(byte_ne_2): ++ extsw r7, r9 ++ mr r9, r8 ++ b L(byte_ne_1) ++L(byte_ne_3): ++ extsw r7, r7 ++ b L(byte_ne_1) ++END(strncmp) ++libc_hidden_builtin_def(strncmp) ++#else ++#include ++#endif +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-24.patch b/SOURCES/glibc-rh1385004-24.patch new file mode 100644 index 0000000..5705519 --- /dev/null +++ b/SOURCES/glibc-rh1385004-24.patch @@ -0,0 +1,72 @@ +Update powerpc64 ULPs. + +The last upstream ULP update before the branch, in + +commit c6922934363f44b88250567f52036d8e9972c255 +Author: Andreas Schwab +Date: Sat Mar 3 14:20:24 2012 +0100 + + Update powerpc libm ULPs + +did not specify 1-ULP tolerance for all tests because some of them +returned the exact result. Current upstream uses a different way for +specifying ULPs, after this commit: + +commit e6b6a85705be16373fb3f78ef998edc62150499c +Author: Joseph Myers +Date: Wed Mar 5 15:02:38 2014 +0000 + + Don't include individual test ulps in libm-test-ulps. + +This implicitly increased the test tolerance for most (all?) math +functions to 1 ULP because at least one subtest was off by 1 ULP. + +Index: b/sysdeps/powerpc/fpu/libm-test-ulps +=================================================================== +--- a/sysdeps/powerpc/fpu/libm-test-ulps ++++ b/sysdeps/powerpc/fpu/libm-test-ulps +@@ -2396,6 +2396,9 @@ ldouble: 2 + Test "sin_downward (8) == 0.9893582466233817778081235982452886721164": + ildouble: 1 + ldouble: 1 ++Test "sin_downward (9) == 0.4121184852417565697562725663524351793439": ++float: 1 ++ifloat: 1 + + # sin_tonearest + Test "sin_tonearest (1) == 0.8414709848078965066525023216302989996226": +@@ -2438,10 +2441,15 @@ float: 1 + ifloat: 1 + ildouble: 2 + ldouble: 2 ++Test "sin_upward (10) == -0.5440211108893698134047476618513772816836": ++float: 1 ++ifloat: 1 + Test "sin_upward (2) == 0.9092974268256816953960198659117448427023": + float: 2 + ifloat: 2 + Test "sin_upward (3) == 0.1411200080598672221007448028081102798469": ++float: 1 ++ifloat: 1 + ildouble: 1 + ldouble: 1 + Test "sin_upward (4) == -0.7568024953079282513726390945118290941359": +@@ -2449,9 +2457,18 @@ float: 1 + ifloat: 1 + ildouble: 1 + ldouble: 1 ++Test "sin_upward (5) == -0.9589242746631384688931544061559939733525": ++float: 1 ++ifloat: 1 + Test "sin_upward (6) == -0.2794154981989258728115554466118947596280": + ildouble: 1 + ldouble: 1 ++Test "sin_upward (7) == 0.6569865987187890903969990915936351779369": ++float: 1 ++ifloat: 1 ++Test "sin_upward (8) == 0.9893582466233817778081235982452886721164": ++float: 1 ++ifloat: 1 + Test "sin_upward (9) == 0.4121184852417565697562725663524351793439": + float: 1 + ifloat: 1 diff --git a/SOURCES/glibc-rh1385004-3.patch b/SOURCES/glibc-rh1385004-3.patch new file mode 100644 index 0000000..5e24b02 --- /dev/null +++ b/SOURCES/glibc-rh1385004-3.patch @@ -0,0 +1,36 @@ +From 7235e7a509ef1f93cb895f6cdd2bf27f497a3d0a Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Fri, 31 Jul 2015 10:48:20 -0300 +Subject: [PATCH] powerpc: Fix PPC64/POWER7 conform tests + +When building with --disable-multi-arch the strstr POWER7 +optimization create and uses symbols that conflict with expect conform +tests. + + * sysdeps/powerpc/powerpc64/power7/strstr.S (strstr): Use __strnlen + for static build. + +(cherry picked from commit 6f714aa4ad5af2745ae2d185821d20ce8fabc2c5) +--- + ChangeLog | 7 +++++++ + sysdeps/powerpc/powerpc64/power7/strstr.S | 2 +- + 3 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index d70df5c..cf95a84 100644 +diff --git a/sysdeps/powerpc/powerpc64/power7/strstr.S b/sysdeps/powerpc/powerpc64/power7/strstr.S +index 8dca31c..bfb0c49 100644 +--- a/sysdeps/powerpc/powerpc64/power7/strstr.S ++++ b/sysdeps/powerpc/powerpc64/power7/strstr.S +@@ -39,7 +39,7 @@ + # ifdef SHARED + # define STRNLEN __GI_strnlen + # else +-# define STRNLEN strnlen ++# define STRNLEN __strnlen + # endif + #endif + +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-4.patch b/SOURCES/glibc-rh1385004-4.patch new file mode 100644 index 0000000..e09853f --- /dev/null +++ b/SOURCES/glibc-rh1385004-4.patch @@ -0,0 +1,113 @@ +From 561857f53a543684862c2b6d2308bc13affa2a18 Mon Sep 17 00:00:00 2001 +From: Rajalakshmi Srinivasaraghavan +Date: Tue, 18 Aug 2015 22:40:56 +0530 +Subject: [PATCH] powerpc: Handle worstcase behavior in strstr() for POWER7 + +Instead of checking needle length, constant 'n' number of comparisons +is checked to fall back to default implementation. This patch is tested +on powerpc64 and powerpc64le. + +2015-08-25 Rajalakshmi Srinivasaraghavan + + * sysdeps/powerpc/powerpc64/power7/strstr.S: Handle worst case. + +(cherry picked from commit fe7faec3e56a8dd64f78023a2f4a74fc8d42e79f) +--- + ChangeLog | 4 ++++ + sysdeps/powerpc/powerpc64/power7/strstr.S | 22 +++++++++++++++------- + 2 files changed, 19 insertions(+), 7 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index cf95a84..5cbd6d6 100644 +diff --git a/sysdeps/powerpc/powerpc64/power7/strstr.S b/sysdeps/powerpc/powerpc64/power7/strstr.S +index bfb0c49..fb3c810 100644 +--- a/sysdeps/powerpc/powerpc64/power7/strstr.S ++++ b/sysdeps/powerpc/powerpc64/power7/strstr.S +@@ -23,6 +23,8 @@ + /* The performance gain is obtained using aligned memory access, load + * doubleword and usage of cmpb instruction for quicker comparison. */ + ++#define ITERATIONS 64 ++ + #ifndef STRLEN + /* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +@@ -62,6 +64,8 @@ EALIGN (strstr, 4, 0) + cfi_offset(r30, -16) + std r29, -24(r1) /* Save callers register r29. */ + cfi_offset(r29, -24) ++ std r28, -32(r1) /* Save callers register r28. */ ++ cfi_offset(r28, -32) + std r0, 16(r1) /* Store the link register. */ + cfi_offset(lr, 16) + stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */ +@@ -69,7 +73,6 @@ EALIGN (strstr, 4, 0) + + dcbt 0, r3 + dcbt 0, r4 +- + cmpdi cr7, r3, 0 + beq cr7, L(retnull) + cmpdi cr7, r4, 0 +@@ -84,10 +87,6 @@ EALIGN (strstr, 4, 0) + cmpdi cr7, r3, 0 /* If search str is null. */ + beq cr7, L(ret_r3) + +- /* Call __strstr_ppc if needle len > 2048 */ +- cmpdi cr7, r3, 2048 +- bgt cr7, L(default) +- + mr r31, r3 + mr r4, r3 + mr r3, r29 +@@ -105,7 +104,8 @@ EALIGN (strstr, 4, 0) + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) +- ++ /* Reg r28 is used to count the number of iterations. */ ++ li r28, 0 + rldicl r8, r3, 0, 52 /* Page cross check. */ + cmpldi cr7, r8, 4096-16 + bgt cr7, L(bytebybyte) +@@ -324,6 +324,10 @@ L(return4): + .align 4 + L(begin): + mr r3, r8 ++ /* When our iterations exceed ITERATIONS,fall back to default. */ ++ addi r28, r28, 1 ++ cmpdi cr7, r28, ITERATIONS ++ beq cr7, L(default) + lbz r4, 0(r30) + bl STRCHR + nop +@@ -423,6 +427,10 @@ L(nextbyte): + cmpdi cr7, r9, -1 + beq cr7, L(end) + addi r3, r4, 1 ++ /* When our iterations exceed ITERATIONS,fall back to default. */ ++ addi r28, r28, 1 ++ cmpdi cr7, r28, ITERATIONS ++ beq cr7, L(default) + lbz r4, 0(r30) + bl STRCHR + nop +@@ -490,7 +498,6 @@ L(retnull): + + .align 4 + L(default): +- mr r3, r29 + mr r4, r30 + bl __strstr_ppc + nop +@@ -500,6 +507,7 @@ L(end): + addi r1, r1, FRAMESIZE /* Restore stack pointer. */ + cfi_adjust_cfa_offset(-FRAMESIZE) + ld r0, 16(r1) /* Restore the saved link register. */ ++ ld r28, -32(r1) /* Restore callers save register r28. */ + ld r29, -24(r1) /* Restore callers save register r29. */ + ld r30, -16(r1) /* Restore callers save register r30. */ + ld r31, -8(r1) /* Restore callers save register r31. */ +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-5.patch b/SOURCES/glibc-rh1385004-5.patch new file mode 100644 index 0000000..87a8978 --- /dev/null +++ b/SOURCES/glibc-rh1385004-5.patch @@ -0,0 +1,69 @@ +From de9b90366c27db80777d5099e4b22298f0b61a29 Mon Sep 17 00:00:00 2001 +From: Carlos Eduardo Seo +Date: Thu, 13 Aug 2015 14:33:06 -0300 +Subject: [PATCH] powerpc: Add missing hwcap strings. + +Some features in hwcap.h do not have matching string descriptors +to be displayed when LD_SHOW_AUXV=1. This patch fixes the problem. + +2015-08-13 Carlos Eduardo Seo + + * sysdeps/powerpc/dl-procinfo.c: + (_dl_powerpc_cap_flags): Added missing strings for some + hwcap features. + * sysdeps/powerpc/dl-procinfo.h: Updated hwcap bit count. + +(cherry picked from commit 94ec7e007f4845de284d4f7569721b225ba77572) +--- + ChangeLog | 7 +++++++ + sysdeps/powerpc/dl-procinfo.c | 6 +++--- + sysdeps/powerpc/dl-procinfo.h | 4 ++-- + 3 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index 5cbd6d6..d9d3659 100644 +diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c +index 3a8cc41..6eda5d9 100644 +--- a/sysdeps/powerpc/dl-procinfo.c ++++ b/sysdeps/powerpc/dl-procinfo.c +@@ -45,11 +45,11 @@ + #if !defined PROCINFO_DECL && defined SHARED + ._dl_powerpc_cap_flags + #else +-PROCINFO_CLASS const char _dl_powerpc_cap_flags[57][10] ++PROCINFO_CLASS const char _dl_powerpc_cap_flags[60][10] + #endif + #ifndef PROCINFO_DECL + = { +- "vsx", ++ "ppcle", "true_le", "archpmu", "vsx", + "arch_2_06", "power6x", "dfp", "pa6t", + "arch_2_05", "ic_snoop", "smt", "booke", + "cellbe", "power5+", "power5", "power4", +@@ -62,7 +62,7 @@ PROCINFO_CLASS const char _dl_powerpc_cap_flags[57][10] + "", "", "", "", + "", "", "", "", + "", "", "", "", +- "", "", "tar", "isel", ++ "", "vcrypto", "tar", "isel", + "ebb", "dscr", "htm", "arch_2_07", + } + #endif +diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h +index cf167b0..36873cf 100644 +--- a/sysdeps/powerpc/dl-procinfo.h ++++ b/sysdeps/powerpc/dl-procinfo.h +@@ -22,8 +22,8 @@ + #include + #include /* This defines the PPC_FEATURE[2]_* macros. */ + +-/* There are 25 bits used, but they are bits 7..31. */ +-#define _DL_HWCAP_FIRST 7 ++/* There are 28 bits used, but they are bits 4..31. */ ++#define _DL_HWCAP_FIRST 4 + + /* The total number of available bits (including those prior to + _DL_HWCAP_FIRST). Some of these bits might not be used. */ +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-6.patch b/SOURCES/glibc-rh1385004-6.patch new file mode 100644 index 0000000..c3bc7ec --- /dev/null +++ b/SOURCES/glibc-rh1385004-6.patch @@ -0,0 +1,29 @@ +From db22400947e1c82153e5270d23fed53fc1e3a659 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Mon, 23 Jun 2014 09:38:47 -0500 +Subject: [PATCH] PowerPC: sync hwcap.h capabilities + +Linux commit dd58a092c4202f2bd490adab7285b3ff77f8e467 added the +PPC_FEATURE2_VEC_CRYPTO auvx capability to indicate whether to +hardware supports vector crypto hardware instructions. This patch +adds its definition to powerpc hwcap bits. +--- + ChangeLog | 5 +++++ + sysdeps/powerpc/bits/hwcap.h | 2 ++ + 2 files changed, 7 insertions(+) + +diff --git a/ChangeLog b/ChangeLog +index 426af5d..b4f687f 100644 +diff --git a/sysdeps/powerpc/bits/hwcap.h b/sysdeps/powerpc/bits/hwcap.h +index 7daec91..e894bd4 100644 +--- a/sysdeps/powerpc/bits/hwcap.h ++++ b/sysdeps/powerpc/bits/hwcap.h +@@ -62,3 +62,5 @@ + #define PPC_FEATURE2_HAS_EBB 0x10000000 /* Event Base Branching */ + #define PPC_FEATURE2_HAS_ISEL 0x08000000 /* Integer Select */ + #define PPC_FEATURE2_HAS_TAR 0x04000000 /* Target Address Register */ ++#define PPC_FEATURE2_HAS_VEC_CRYPTO 0x02000000 /* Target supports vector ++ instruction. */ +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-7.patch b/SOURCES/glibc-rh1385004-7.patch new file mode 100644 index 0000000..5168cac --- /dev/null +++ b/SOURCES/glibc-rh1385004-7.patch @@ -0,0 +1,52 @@ +From 6edf51a1d5d2f070998553f99f3a6dc90c5c2969 Mon Sep 17 00:00:00 2001 +From: Carlos Eduardo Seo +Date: Wed, 19 Aug 2015 01:42:55 -0300 +Subject: [PATCH] powerpc: Sync hwcap.h with kernel + +Linux commit b4b56f9ecab40f3b4ef53e130c9f6663be491894 introduced +a new HWCAP2 bit to indicate that the kernel now aborts a memory +transaction when a syscall is made. This patch adds that bit to +sysdeps/powerpc/bits/hwcap.h. + +2015-08-26 Carlos Eduardo Seo + + * sysdeps/powerpc/bits/hwcap.h: Add PPC_FEATURE2_HTM_NOSC. + * sysdeps/powerpc/dl-procinfo.c: + (_dl_powerpc_cap_flags): Added descriptor for this hwcap + feature so it shows when LD_SHOW_AUXV=1. + +(cherry picked from commit 3c13f28c8eac1e5a883d1b3801314430a094fc99) +--- + ChangeLog | 7 +++++++ + sysdeps/powerpc/bits/hwcap.h | 2 ++ + sysdeps/powerpc/dl-procinfo.c | 2 +- + 3 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/ChangeLog b/ChangeLog +index d9d3659..079da2a 100644 +diff --git a/sysdeps/powerpc/bits/hwcap.h b/sysdeps/powerpc/bits/hwcap.h +index f8c48cd..12554ca 100644 +--- a/sysdeps/powerpc/bits/hwcap.h ++++ b/sysdeps/powerpc/bits/hwcap.h +@@ -64,3 +64,5 @@ + #define PPC_FEATURE2_HAS_TAR 0x04000000 /* Target Address Register */ + #define PPC_FEATURE2_HAS_VEC_CRYPTO 0x02000000 /* Target supports vector + instruction. */ ++#define PPC_FEATURE2_HTM_NOSC 0x01000000 /* Kernel aborts transaction ++ when a syscall is made. */ +diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c +index 6eda5d9..770c1f3 100644 +--- a/sysdeps/powerpc/dl-procinfo.c ++++ b/sysdeps/powerpc/dl-procinfo.c +@@ -62,7 +62,7 @@ PROCINFO_CLASS const char _dl_powerpc_cap_flags[60][10] + "", "", "", "", + "", "", "", "", + "", "", "", "", +- "", "vcrypto", "tar", "isel", ++ "htm-nosc", "vcrypto", "tar", "isel", + "ebb", "dscr", "htm", "arch_2_07", + } + #endif +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-8.patch b/SOURCES/glibc-rh1385004-8.patch new file mode 100644 index 0000000..007c643 --- /dev/null +++ b/SOURCES/glibc-rh1385004-8.patch @@ -0,0 +1,137 @@ +From c2b54e66e194405a1ff062bb442ede9a8c4b913a Mon Sep 17 00:00:00 2001 +From: Carlos Eduardo Seo +Date: Wed, 4 Nov 2015 19:30:49 -0200 +Subject: [PATCH] powerpc: Add basic support for POWER9 sans hwcap. + +This patch adds the minimum changes for supporting the POWER9 processor. + +(cherry picked from commit b1f19b8ef1003f202424ca222003a18b880bf914) +--- + ChangeLog | 13 +++++++++++++ + sysdeps/powerpc/dl-procinfo.c | 3 ++- + sysdeps/powerpc/dl-procinfo.h | 6 +++++- + sysdeps/powerpc/powerpc32/power9/Implies | 2 ++ + sysdeps/powerpc/powerpc32/power9/fpu/multiarch/Implies | 1 + + sysdeps/powerpc/powerpc32/power9/multiarch/Implies | 1 + + sysdeps/powerpc/powerpc64/power9/Implies | 2 ++ + sysdeps/powerpc/powerpc64/power9/fpu/Implies | 2 ++ + sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies | 1 + + sysdeps/powerpc/powerpc64/power9/multiarch/Implies | 1 + + 10 files changed, 30 insertions(+), 2 deletions(-) + create mode 100644 sysdeps/powerpc/powerpc32/power9/Implies + create mode 100644 sysdeps/powerpc/powerpc32/power9/fpu/multiarch/Implies + create mode 100644 sysdeps/powerpc/powerpc32/power9/multiarch/Implies + create mode 100644 sysdeps/powerpc/powerpc64/power9/Implies + create mode 100644 sysdeps/powerpc/powerpc64/power9/fpu/Implies + create mode 100644 sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies + create mode 100644 sysdeps/powerpc/powerpc64/power9/multiarch/Implies + +diff --git a/ChangeLog b/ChangeLog +index 079da2a..cecd77c 100644 +diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c +index 770c1f3..a8df5b8 100644 +--- a/sysdeps/powerpc/dl-procinfo.c ++++ b/sysdeps/powerpc/dl-procinfo.c +@@ -75,7 +75,7 @@ PROCINFO_CLASS const char _dl_powerpc_cap_flags[60][10] + #if !defined PROCINFO_DECL && defined SHARED + ._dl_powerpc_platforms + #else +-PROCINFO_CLASS const char _dl_powerpc_platforms[14][12] ++PROCINFO_CLASS const char _dl_powerpc_platforms[15][12] + #endif + #ifndef PROCINFO_DECL + = { +@@ -93,6 +93,7 @@ PROCINFO_CLASS const char _dl_powerpc_platforms[14][12] + [PPC_PLATFORM_PPC464] = "ppc464", + [PPC_PLATFORM_PPC476] = "ppc476", + [PPC_PLATFORM_POWER8] = "power8", ++ [PPC_PLATFORM_POWER9] = "power9" + } + #endif + #if !defined SHARED || defined PROCINFO_DECL +diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h +index 36873cf..407149b 100644 +--- a/sysdeps/powerpc/dl-procinfo.h ++++ b/sysdeps/powerpc/dl-procinfo.h +@@ -40,7 +40,7 @@ + #define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \ + + PPC_FEATURE_HAS_DFP) + +-#define _DL_PLATFORMS_COUNT 14 ++#define _DL_PLATFORMS_COUNT 15 + + #define _DL_FIRST_PLATFORM 32 + /* Mask to filter out platforms. */ +@@ -62,6 +62,7 @@ + #define PPC_PLATFORM_PPC464 11 + #define PPC_PLATFORM_PPC476 12 + #define PPC_PLATFORM_POWER8 13 ++#define PPC_PLATFORM_POWER9 14 + + static inline const char * + __attribute__ ((unused)) +@@ -125,6 +126,9 @@ _dl_string_platform (const char *str) + case '8': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER8; + break; ++ case '9': ++ ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER9; ++ break; + default: + return -1; + } +diff --git a/sysdeps/powerpc/powerpc32/power9/Implies b/sysdeps/powerpc/powerpc32/power9/Implies +new file mode 100644 +index 0000000..066dea2 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc32/power9/Implies +@@ -0,0 +1,2 @@ ++powerpc/powerpc32/power8/fpu ++powerpc/powerpc32/power8 +diff --git a/sysdeps/powerpc/powerpc32/power9/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc32/power9/fpu/multiarch/Implies +new file mode 100644 +index 0000000..4393b56 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc32/power9/fpu/multiarch/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc32/power8/fpu/multiarch +diff --git a/sysdeps/powerpc/powerpc32/power9/multiarch/Implies b/sysdeps/powerpc/powerpc32/power9/multiarch/Implies +new file mode 100644 +index 0000000..4393b56 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc32/power9/multiarch/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc32/power8/fpu/multiarch +diff --git a/sysdeps/powerpc/powerpc64/power9/Implies b/sysdeps/powerpc/powerpc64/power9/Implies +new file mode 100644 +index 0000000..fad2505 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power9/Implies +@@ -0,0 +1,2 @@ ++powerpc/powerpc64/power8/fpu ++powerpc/powerpc64/power8 +diff --git a/sysdeps/powerpc/powerpc64/power9/fpu/Implies b/sysdeps/powerpc/powerpc64/power9/fpu/Implies +new file mode 100644 +index 0000000..fad2505 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power9/fpu/Implies +@@ -0,0 +1,2 @@ ++powerpc/powerpc64/power8/fpu ++powerpc/powerpc64/power8 +diff --git a/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies +new file mode 100644 +index 0000000..f11e1bd +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc64/power8/fpu/multiarch +diff --git a/sysdeps/powerpc/powerpc64/power9/multiarch/Implies b/sysdeps/powerpc/powerpc64/power9/multiarch/Implies +new file mode 100644 +index 0000000..dd6bca4 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/power9/multiarch/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc64/power8/multiarch +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1385004-9.patch b/SOURCES/glibc-rh1385004-9.patch new file mode 100644 index 0000000..058286c --- /dev/null +++ b/SOURCES/glibc-rh1385004-9.patch @@ -0,0 +1,44 @@ +From 2f308f69b40f960729d4358afb1b1effb218b6cb Mon Sep 17 00:00:00 2001 +From: Carlos Eduardo Seo +Date: Tue, 5 Jan 2016 15:13:18 -0200 +Subject: [PATCH] powerpc: Add hwcap2 bits for POWER9. + +Added hwcap2 bit masks for Power ISA 3.0 and VSX IEEE binary float 128-bit +features. + +(cherry picked from commit d2de9ef7ad35341fd6f098f7e84a1128f2027d0c) +--- + ChangeLog | 7 +++++++ + sysdeps/powerpc/bits/hwcap.h | 3 +++ + sysdeps/powerpc/dl-procinfo.c | 2 +- + 3 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/ChangeLog b/ChangeLog +index cecd77c..565da33 100644 +diff --git a/sysdeps/powerpc/bits/hwcap.h b/sysdeps/powerpc/bits/hwcap.h +index 12554ca..55b37a4 100644 +--- a/sysdeps/powerpc/bits/hwcap.h ++++ b/sysdeps/powerpc/bits/hwcap.h +@@ -66,3 +66,6 @@ + instruction. */ + #define PPC_FEATURE2_HTM_NOSC 0x01000000 /* Kernel aborts transaction + when a syscall is made. */ ++#define PPC_FEATURE2_ARCH_3_00 0x00800000 /* ISA 3.0 */ ++#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float ++ 128-bit */ +diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c +index a8df5b8..1bbeb89 100644 +--- a/sysdeps/powerpc/dl-procinfo.c ++++ b/sysdeps/powerpc/dl-procinfo.c +@@ -61,7 +61,7 @@ PROCINFO_CLASS const char _dl_powerpc_cap_flags[60][10] + "", "", "", "", + "", "", "", "", + "", "", "", "", +- "", "", "", "", ++ "", "", "ieee128", "arch_3_00", + "htm-nosc", "vcrypto", "tar", "isel", + "ebb", "dscr", "htm", "arch_2_07", + } +-- +2.1.0 + diff --git a/SOURCES/glibc-rh1387874.patch b/SOURCES/glibc-rh1387874.patch new file mode 100644 index 0000000..02bbef8 --- /dev/null +++ b/SOURCES/glibc-rh1387874.patch @@ -0,0 +1,23 @@ +commit c6fe55cf6089fc5cf1cea15fc7e1c9a8b90d9fda +Author: Andreas Jaeger +Date: Fri Jan 11 11:53:13 2013 +0100 + + Add MSG_FASTOPEN + + [BZ #15003] + * sysdeps/unix/sysv/linux/bits/socket.h (MSG_FASTOPEN): New + value. Sync with Linux 3.7. + +diff --git a/sysdeps/unix/sysv/linux/bits/socket.h b/sysdeps/unix/sysv/linux/bits/socket.h +index 25b115e..eadd7d9 100644 +--- a/sysdeps/unix/sysv/linux/bits/socket.h ++++ b/sysdeps/unix/sysv/linux/bits/socket.h +@@ -207,6 +207,8 @@ enum + #define MSG_MORE MSG_MORE + MSG_WAITFORONE = 0x10000, /* Wait for at least one packet to return.*/ + #define MSG_WAITFORONE MSG_WAITFORONE ++ MSG_FASTOPEN = 0x20000000, /* Send data in TCP SYN. */ ++#define MSG_FASTOPEN MSG_FASTOPEN + + MSG_CMSG_CLOEXEC = 0x40000000 /* Set close_on_exit for file + descriptor received through diff --git a/SOURCES/glibc-rh1392540.patch b/SOURCES/glibc-rh1392540.patch new file mode 100644 index 0000000..870c48d --- /dev/null +++ b/SOURCES/glibc-rh1392540.patch @@ -0,0 +1,16 @@ +Add "sss" to the automount database. The sss service ordering is +based on passwd (and others; sss comes after files) and netgroup (sss +comes after nisplus). + +Index: b/releng/nsswitch.conf +=================================================================== +--- a/releng/nsswitch.conf ++++ b/releng/nsswitch.conf +@@ -59,6 +59,6 @@ netgroup: nisplus sss + + publickey: nisplus + +-automount: files nisplus ++automount: files nisplus sss + aliases: files nisplus + diff --git a/SOURCES/glibc-rh1398244.patch b/SOURCES/glibc-rh1398244.patch new file mode 100644 index 0000000..0ed2864 --- /dev/null +++ b/SOURCES/glibc-rh1398244.patch @@ -0,0 +1,17 @@ +2016-10-28 Tulio Magno Quites Machado Filho + + [BZ #20728] + * sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S: Replace a + branch to _exit() by a function call. + +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S +@@ -112,7 +112,7 @@ + #ifdef SHARED + b JUMPTARGET(__GI__exit) + #else +- b JUMPTARGET(_exit) ++ bl JUMPTARGET(_exit) + /* We won't ever get here but provide a nop so that the linker + will insert a toc adjusting stub if necessary. */ + nop diff --git a/SOURCES/glibc-rh1404435.patch b/SOURCES/glibc-rh1404435.patch new file mode 100644 index 0000000..f05f9c7 --- /dev/null +++ b/SOURCES/glibc-rh1404435.patch @@ -0,0 +1,37 @@ +Remove the "power8" AT_PLATFORM directory + +Index: b/releng/glibc_post_upgrade.c +=================================================================== +--- a/releng/glibc_post_upgrade.c ++++ b/releng/glibc_post_upgrade.c +@@ -73,14 +73,23 @@ main (void) + int i, j, fd; + off_t base; + ssize_t ret; ++ const char *remove_dirs[] = { + #ifdef __i386__ +- const char *remove_dirs[] = { "/lib/tls", "/lib/i686", "/lib/tls/i486", "/lib/tls/i586", "/lib/tls/i686" }; +-#else +-#ifndef LIBTLS +-#define LIBTLS "/lib/tls" +-#endif +- const char *remove_dirs[] = { LIBTLS }; +-#endif ++ "/lib/tls", "/lib/i686", "/lib/tls/i486", "/lib/tls/i586", "/lib/tls/i686" ++#else /* !__i386__ */ ++# ifndef LIBTLS ++# define LIBTLS "/lib/tls" ++# endif ++# if defined (__powerpc64__) ++ /* Covers both big endian and little endian. */ ++ "/lib64/power8", ++# elif defined (__powerpc__) ++ /* Covers 32-bit only (because of the previous conditional). */ ++ "/lib/power8", ++# endif ++ LIBTLS ++#endif /* !__i386__ */ ++ }; + for (j = 0; j < sizeof (remove_dirs) / sizeof (remove_dirs[0]); ++j) + { + size_t rmlen = strlen (remove_dirs[j]); diff --git a/SOURCES/glibc-rh1409611.patch b/SOURCES/glibc-rh1409611.patch new file mode 100644 index 0000000..f2e22d5 --- /dev/null +++ b/SOURCES/glibc-rh1409611.patch @@ -0,0 +1,3262 @@ +Partial backports of: +===================== + +commit c5d5d574cbfa96d0f6c1db24d1e072c472627e41 +Author: OndÅ™ej Bílka +Date: Thu Oct 17 16:03:24 2013 +0200 + + Format floating routines. + +commit da08f647d58d674db08cdb3e61c8826c89470e2e +Author: Siddhesh Poyarekar +Date: Fri Dec 21 09:15:10 2012 +0530 + + Move more constants into static variables + + Code cleanup. + +commit f93a8d15699ee699282465dc1e03e033f3fabb52 +Author: Siddhesh Poyarekar +Date: Wed Jan 16 16:06:48 2013 +0530 + + Consolidate constant defines into mpa.h + +commit caa99d06e7f1403887294442af520b0f8c6f3de0 +Author: Siddhesh Poyarekar +Date: Fri Jan 18 11:18:13 2013 +0530 + + Simplify calculation of 2^-m in __mpexp + +commit 107a5bf085f5c4ef8c28266a34d476724cfc3475 +Author: Joseph Myers +Date: Tue Nov 18 15:40:56 2014 +0000 + + Fix libm mpone, mptwo namespace (bug 17616). + +To provided __mptwo for __inv. + +Full backports of the following: +================================ + +commit 44e0d4c20ce5bf3825897e5d4b7caae94016214d +Author: Siddhesh Poyarekar +Date: Wed Jan 2 11:44:13 2013 +0530 + + Split mantissa calculation loop and add branch prediction + +commit f8af25d218202ff2f5d167b8e44e4b79f91d147f +Author: Siddhesh Poyarekar +Date: Fri Jan 4 15:09:33 2013 +0530 + + Remove commented declarations + +commit a9e48ab40e230c7fe34e4892bec8af4f3f975a20 +Author: Siddhesh Poyarekar +Date: Fri Jan 4 15:42:09 2013 +0530 + + Clean up comment for MP_NO + +commit fffb407f4668b40b3df1eb8ee3ae3bc64ee79e20 +Author: Siddhesh Poyarekar +Date: Fri Jan 4 22:52:12 2013 +0530 + + Remove unused __cr and __cpymn + +commit 950c99ca9094e7dc6394e90395f51e12093393aa +Author: Siddhesh Poyarekar +Date: Wed Jan 9 19:07:15 2013 +0530 + + Update comments in mpa.c + + Fixed comment style and clearer wording in some cases. + +commit 1066a53440d2744566e97c59bcd0d422186b3e90 +Author: Siddhesh Poyarekar +Date: Mon Jan 14 21:31:25 2013 +0530 + + Fix code formatting in mpa.c + + This includes the overridden mpa.c in power4. + +commit 2a91b5735ac1bc65ce5c2a3646d75ba7208e26e9 +Author: Siddhesh Poyarekar +Date: Mon Jan 14 21:36:58 2013 +0530 + + Minor tweak to mp multiplication + + Add a local variable to remove extra copies to/from memory in the Z + array. + +ommit 45f058844c33f670475bd02f266942746bcb332b +Author: Siddhesh Poyarekar +Date: Tue Feb 26 21:28:16 2013 +0530 + + Another tweak to the multiplication algorithm + + Reduce the formula to calculate mantissa so that we reduce the net + number of multiplications performed. + +commit bab8a695ee79a5a6e9b2b699938952b006fcbbec +Author: Siddhesh Poyarekar +Date: Thu Feb 21 14:29:18 2013 +0530 + + Fix whitespace differences between generic and powerpc mpa.c + + +commit 2d0e0f29f85036d1189231cb7c1f19f27ba14a89 +Author: Siddhesh Poyarekar +Date: Fri Feb 15 23:56:20 2013 +0530 + + Fix determination of lower precision in __mul + +commit 909279a5cfa938c989c9b01c8f48a0276291ec45 +Author: Siddhesh Poyarekar +Date: Wed Feb 13 14:16:23 2013 +0530 + + Optimized mp multiplication + + Don't bother multiplying zeroes since that only wastes cycles. + +commit bdf028142eb77d6ae59500db875068fa5d7b059d +Author: Siddhesh Poyarekar +Date: Wed Feb 13 13:55:29 2013 +0530 + + Clean up add_magnitudes and sub_magnitudes + +commit d6752ccd696c71d23cd3df8fb9cc60b61c32e65a +Author: Siddhesh Poyarekar +Date: Thu Feb 14 10:31:09 2013 +0530 + + New __sqr function as a faster special case of __mul + +commit 4709fe7602b56e9f6ee1ab6afb4067409a784f29 +Author: Siddhesh Poyarekar +Date: Sat Feb 16 00:09:29 2013 +0530 + + Use intermediate variable to compute exponent in __mul + +commit 20cd7fb3ae63795ae7c9a464abf5ed19b364ade0 +Author: Siddhesh Poyarekar +Date: Wed Feb 20 18:56:20 2013 +0530 + + Copy comment about inner loop from powerpc mpa.c to the default one + +commit e69804d14e43f14c3c65dc570afdbfb822c9838b +Author: Siddhesh Poyarekar +Date: Mon Feb 25 16:43:02 2013 +0530 + + Use long wherever possible in mpa.c + + Using long throughout like powerpc does is beneficial since it reduces + the need to switch to 32-bit instructions. It gives a very minor + performance improvement. + +commit 82a9811d29c00161c7c8ea7f70e2cc30988e192e +Author: Siddhesh Poyarekar +Date: Thu Mar 7 12:23:29 2013 +0530 + + Use generic mpa.c code for everything except __mul and __sqr + +commit 6f2e90e78f151bab153c2b38492505ae2012db06 +Author: Siddhesh Poyarekar +Date: Tue Mar 26 19:28:50 2013 +0530 + + Make mantissa type of mp_no configurable + + The mantissa of mp_no is intended to take only integral values. This + is a relatively good choice for powerpc due to its 4 fpus, but not for + other architectures, which suffer due to this choice. This change + makes the default mantissa a long integer and allows powerpc to + override it. Additionally, some operations have been optimized for + integer manipulation, resulting in a significant improvement in + performance. + +commit 5739f705eed5cf58e7b439e5983542e06d7fc2da +Author: Siddhesh Poyarekar +Date: Tue Mar 26 20:24:04 2013 +0530 + + Use integral constants + + The compiler is smart enough to convert those into double for powerpc, + but if we put them as doubles, it adds overhead by performing those + operations in floating point mode. + +commit 89f3b6e18c6e7833438789746fcfc2e7189f7cac +Author: Joseph Myers +Date: Thu May 21 23:05:45 2015 +0000 + + Fix sysdeps/ieee754/dbl-64/mpa.c for -Wuninitialized. + + If you remove the "override CFLAGS += -Wno-uninitialized" in + math/Makefile, one of the errors you get is: + + ../sysdeps/ieee754/dbl-64/mpa.c: In function '__mp_dbl.part.0': + ../sysdeps/ieee754/dbl-64/mpa.c:183:5: error: 'c' may be used uninitialized in this function [-Werror=maybe-uninitialized] + c *= X[0]; + + The problem is that the p < 5 case initializes c if p is 1, 2, 3 or 4 + but not otherwise, and in fact p is positive for all calls to this + function so the uninitialized case can't actually occur. This patch + replaces the "if (p == 4)" last case with a comment so the compiler + can see that all paths do initialize c. + + Tested for x86_64. + + * sysdeps/ieee754/dbl-64/mpa.c (norm): Remove if condition on + (p == 4) case. + +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa.c ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.c +@@ -1,7 +1,7 @@ + /* + * IBM Accurate Mathematical Library + * written by International Business Machines Corp. +- * Copyright (C) 2001, 2011 Free Software Foundation ++ * Copyright (C) 2001-2017 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by +@@ -22,9 +22,7 @@ + /* FUNCTIONS: */ + /* mcr */ + /* acr */ +-/* cr */ + /* cpy */ +-/* cpymn */ + /* norm */ + /* denorm */ + /* mp_dbl */ +@@ -44,479 +42,868 @@ + + #include "endian.h" + #include "mpa.h" +-#include "mpa2.h" +-#include /* For MIN() */ ++#include ++#include + + #ifndef SECTION + # define SECTION + #endif + ++#ifndef NO__CONST ++/* TODO: With only a partial backport of the constant cleanup from ++ upstream we limit the definition of these two constants to ++ this file. */ ++static const mp_no __mpone = { 1, { 1.0, 1.0 } }; ++static const mp_no __mptwo = { 1, { 1.0, 2.0 } }; ++#endif ++ + #ifndef NO___ACR +-/* mcr() compares the sizes of the mantissas of two multiple precision */ +-/* numbers. Mantissas are compared regardless of the signs of the */ +-/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also */ +-/* disregarded. */ ++/* Compare mantissa of two multiple precision numbers regardless of the sign ++ and exponent of the numbers. */ + static int +-mcr(const mp_no *x, const mp_no *y, int p) { +- int i; +- for (i=1; i<=p; i++) { +- if (X[i] == Y[i]) continue; +- else if (X[i] > Y[i]) return 1; +- else return -1; } ++mcr (const mp_no *x, const mp_no *y, int p) ++{ ++ long i; ++ long p2 = p; ++ for (i = 1; i <= p2; i++) ++ { ++ if (X[i] == Y[i]) ++ continue; ++ else if (X[i] > Y[i]) ++ return 1; ++ else ++ return -1; ++ } + return 0; + } + +- +-/* acr() compares the absolute values of two multiple precision numbers */ ++/* Compare the absolute values of two multiple precision numbers. */ + int +-__acr(const mp_no *x, const mp_no *y, int p) { +- int i; +- +- if (X[0] == ZERO) { +- if (Y[0] == ZERO) i= 0; +- else i=-1; +- } +- else if (Y[0] == ZERO) i= 1; +- else { +- if (EX > EY) i= 1; +- else if (EX < EY) i=-1; +- else i= mcr(x,y,p); +- } +- +- return i; +-} +-#endif +- ++__acr (const mp_no *x, const mp_no *y, int p) ++{ ++ long i; + +-#if 0 +-/* cr() compares the values of two multiple precision numbers */ +-static int __cr(const mp_no *x, const mp_no *y, int p) { +- int i; +- +- if (X[0] > Y[0]) i= 1; +- else if (X[0] < Y[0]) i=-1; +- else if (X[0] < ZERO ) i= __acr(y,x,p); +- else i= __acr(x,y,p); ++ if (X[0] == 0) ++ { ++ if (Y[0] == 0) ++ i = 0; ++ else ++ i = -1; ++ } ++ else if (Y[0] == 0) ++ i = 1; ++ else ++ { ++ if (EX > EY) ++ i = 1; ++ else if (EX < EY) ++ i = -1; ++ else ++ i = mcr (x, y, p); ++ } + + return i; + } + #endif + +- + #ifndef NO___CPY +-/* Copy a multiple precision number. Set *y=*x. x=y is permissible. */ +-void __cpy(const mp_no *x, mp_no *y, int p) { ++/* Copy multiple precision number X into Y. They could be the same ++ number. */ ++void ++__cpy (const mp_no *x, mp_no *y, int p) ++{ ++ long i; ++ + EY = EX; +- for (int i=0; i <= p; i++) Y[i] = X[i]; ++ for (i = 0; i <= p; i++) ++ Y[i] = X[i]; + } + #endif + ++#ifndef NO___MP_DBL ++/* Convert a multiple precision number *X into a double precision ++ number *Y, normalized case (|x| >= 2**(-1022))). X has precision ++ P, which is positive. */ ++static void ++norm (const mp_no *x, double *y, int p) ++{ ++# define R RADIXI ++ long i; ++ double c; ++ mantissa_t a, u, v, z[5]; ++ if (p < 5) ++ { ++ if (p == 1) ++ c = X[1]; ++ else if (p == 2) ++ c = X[1] + R * X[2]; ++ else if (p == 3) ++ c = X[1] + R * (X[2] + R * X[3]); ++ else /* p == 4. */ ++ c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]); ++ } ++ else ++ { ++ for (a = 1, z[1] = X[1]; z[1] < TWO23; ) ++ { ++ a *= 2; ++ z[1] *= 2; ++ } + +-#if 0 +-/* Copy a multiple precision number x of precision m into a */ +-/* multiple precision number y of precision n. In case n>m, */ +-/* the digits of y beyond the m'th are set to zero. In case */ +-/* n= 2**(-1022))) */ +-static void norm(const mp_no *x, double *y, int p) +-{ +- #define R radixi.d +- int i; +-#if 0 +- int k; +-#endif +- double a,c,u,v,z[5]; +- if (p<5) { +- if (p==1) c = X[1]; +- else if (p==2) c = X[1] + R* X[2]; +- else if (p==3) c = X[1] + R*(X[2] + R* X[3]); +- else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]); +- } +- else { +- for (a=ONE, z[1]=X[1]; z[1] < TWO23; ) +- {a *= TWO; z[1] *= TWO; } +- +- for (i=2; i<5; i++) { +- z[i] = X[i]*a; +- u = (z[i] + CUTTER)-CUTTER; +- if (u > z[i]) u -= RADIX; +- z[i] -= u; +- z[i-1] += u*RADIXI; +- } +- +- u = (z[3] + TWO71) - TWO71; +- if (u > z[3]) u -= TWO19; +- v = z[3]-u; +- +- if (v == TWO18) { +- if (z[4] == ZERO) { +- for (i=5; i <= p; i++) { +- if (X[i] == ZERO) continue; +- else {z[3] += ONE; break; } ++ if (v == TWO18) ++ { ++ if (z[4] == 0) ++ { ++ for (i = 5; i <= p; i++) ++ { ++ if (X[i] == 0) ++ continue; ++ else ++ { ++ z[3] += 1; ++ break; ++ } ++ } ++ } ++ else ++ z[3] += 1; + } +- } +- else z[3] += ONE; +- } + +- c = (z[1] + R *(z[2] + R * z[3]))/a; +- } ++ c = (z[1] + R * (z[2] + R * z[3])) / a; ++ } + + c *= X[0]; + +- for (i=1; iEX; i--) c *= RADIXI; ++ for (i = 1; i < EX; i++) ++ c *= RADIX; ++ for (i = 1; i > EX; i--) ++ c *= RADIXI; + + *y = c; +-#undef R ++# undef R + } + +-/* Convert a multiple precision number *x into a double precision */ +-/* number *y, denormalized case (|x| < 2**(-1022))) */ +-static void denorm(const mp_no *x, double *y, int p) ++/* Convert a multiple precision number *X into a double precision ++ number *Y, Denormal case (|x| < 2**(-1022))). */ ++static void ++denorm (const mp_no *x, double *y, int p) + { +- int i,k; +- double c,u,z[5]; +-#if 0 +- double a,v; +-#endif ++ long i, k; ++ long p2 = p; ++ double c; ++ mantissa_t u, z[5]; ++ ++# define R RADIXI ++ if (EX < -44 || (EX == -44 && X[1] < TWO5)) ++ { ++ *y = 0; ++ return; ++ } + +-#define R radixi.d +- if (EX<-44 || (EX==-44 && X[1] z[3]) u -= TWO5; +- +- if (u==z[3]) { +- for (i=k+1; i <= p; i++) { +- if (X[i] == ZERO) continue; +- else {z[3] += ONE; break; } +- } +- } +- +- c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10); +- +- *y = c*TWOM1032; +-#undef R +-} +- +-/* Convert a multiple precision number *x into a double precision number *y. */ +-/* The result is correctly rounded to the nearest/even. *x is left unchanged */ +- +-void __mp_dbl(const mp_no *x, double *y, int p) { +-#if 0 +- int i,k; +- double a,c,u,v,z[5]; +-#endif ++ if (p2 == 1) ++ { ++ if (EX == -42) ++ { ++ z[1] = X[1] + TWO10; ++ z[2] = 0; ++ z[3] = 0; ++ k = 3; ++ } ++ else if (EX == -43) ++ { ++ z[1] = TWO10; ++ z[2] = X[1]; ++ z[3] = 0; ++ k = 2; ++ } ++ else ++ { ++ z[1] = TWO10; ++ z[2] = 0; ++ z[3] = X[1]; ++ k = 1; ++ } ++ } ++ else if (p2 == 2) ++ { ++ if (EX == -42) ++ { ++ z[1] = X[1] + TWO10; ++ z[2] = X[2]; ++ z[3] = 0; ++ k = 3; ++ } ++ else if (EX == -43) ++ { ++ z[1] = TWO10; ++ z[2] = X[1]; ++ z[3] = X[2]; ++ k = 2; ++ } ++ else ++ { ++ z[1] = TWO10; ++ z[2] = 0; ++ z[3] = X[1]; ++ k = 1; ++ } ++ } ++ else ++ { ++ if (EX == -42) ++ { ++ z[1] = X[1] + TWO10; ++ z[2] = X[2]; ++ k = 3; ++ } ++ else if (EX == -43) ++ { ++ z[1] = TWO10; ++ z[2] = X[1]; ++ k = 2; ++ } ++ else ++ { ++ z[1] = TWO10; ++ z[2] = 0; ++ k = 1; ++ } ++ z[3] = X[k]; ++ } + +- if (X[0] == ZERO) {*y = ZERO; return; } ++ u = ALIGN_DOWN_TO (z[3], TWO5); + +- if (EX> -42) norm(x,y,p); +- else if (EX==-42 && X[1]>=TWO10) norm(x,y,p); +- else denorm(x,y,p); ++ if (u == z[3]) ++ { ++ for (i = k + 1; i <= p2; i++) ++ { ++ if (X[i] == 0) ++ continue; ++ else ++ { ++ z[3] += 1; ++ break; ++ } ++ } ++ } ++ ++ c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10); ++ ++ *y = c * TWOM1032; ++# undef R + } +-#endif + ++/* Convert multiple precision number *X into double precision number *Y. The ++ result is correctly rounded to the nearest/even. */ ++void ++__mp_dbl (const mp_no *x, double *y, int p) ++{ ++ if (X[0] == 0) ++ { ++ *y = 0; ++ return; ++ } + +-/* dbl_mp() converts a double precision number x into a multiple precision */ +-/* number *y. If the precision p is too small the result is truncated. x is */ +-/* left unchanged. */ ++ if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10))) ++ norm (x, y, p); ++ else ++ denorm (x, y, p); ++} ++#endif + ++/* Get the multiple precision equivalent of X into *Y. If the precision is too ++ small, the result is truncated. */ + void + SECTION +-__dbl_mp(double x, mp_no *y, int p) { ++__dbl_mp (double x, mp_no *y, int p) ++{ ++ long i, n; ++ long p2 = p; + +- int i,n; +- double u; ++ /* Sign. */ ++ if (x == 0) ++ { ++ Y[0] = 0; ++ return; ++ } ++ else if (x > 0) ++ Y[0] = 1; ++ else ++ { ++ Y[0] = -1; ++ x = -x; ++ } + +- /* Sign */ +- if (x == ZERO) {Y[0] = ZERO; return; } +- else if (x > ZERO) Y[0] = ONE; +- else {Y[0] = MONE; x=-x; } +- +- /* Exponent */ +- for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI; +- for ( ; x < ONE; EY -= ONE) x *= RADIX; +- +- /* Digits */ +- n=MIN(p,4); +- for (i=1; i<=n; i++) { +- u = (x + TWO52) - TWO52; +- if (u>x) u -= ONE; +- Y[i] = u; x -= u; x *= RADIX; } +- for ( ; i<=p; i++) Y[i] = ZERO; ++ /* Exponent. */ ++ for (EY = 1; x >= RADIX; EY += 1) ++ x *= RADIXI; ++ for (; x < 1; EY -= 1) ++ x *= RADIX; ++ ++ /* Digits. */ ++ n = MIN (p2, 4); ++ for (i = 1; i <= n; i++) ++ { ++ INTEGER_OF (x, Y[i]); ++ x *= RADIX; ++ } ++ for (; i <= p2; i++) ++ Y[i] = 0; + } + +- +-/* add_magnitudes() adds the magnitudes of *x & *y assuming that */ +-/* abs(*x) >= abs(*y) > 0. */ +-/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */ +-/* No guard digit is used. The result equals the exact sum, truncated. */ +-/* *x & *y are left unchanged. */ +- ++/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The ++ sign of the sum *Z is not changed. X and Y may overlap but not X and Z or ++ Y and Z. No guard digit is used. The result equals the exact sum, ++ truncated. */ + static void + SECTION +-add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { +- +- int i,j,k; ++add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) ++{ ++ long i, j, k; ++ long p2 = p; ++ mantissa_t zk; + + EZ = EX; + +- i=p; j=p+ EY - EX; k=p+1; ++ i = p2; ++ j = p2 + EY - EX; ++ k = p2 + 1; ++ ++ if (__glibc_unlikely (j < 1)) ++ { ++ __cpy (x, z, p); ++ return; ++ } + +- if (j<1) +- {__cpy(x,z,p); return; } +- else Z[k] = ZERO; +- +- for (; j>0; i--,j--) { +- Z[k] += X[i] + Y[j]; +- if (Z[k] >= RADIX) { +- Z[k] -= RADIX; +- Z[--k] = ONE; } +- else +- Z[--k] = ZERO; +- } +- +- for (; i>0; i--) { +- Z[k] += X[i]; +- if (Z[k] >= RADIX) { +- Z[k] -= RADIX; +- Z[--k] = ONE; } +- else +- Z[--k] = ZERO; +- } +- +- if (Z[1] == ZERO) { +- for (i=1; i<=p; i++) Z[i] = Z[i+1]; } +- else EZ += ONE; +-} ++ zk = 0; + ++ for (; j > 0; i--, j--) ++ { ++ zk += X[i] + Y[j]; ++ if (zk >= RADIX) ++ { ++ Z[k--] = zk - RADIX; ++ zk = 1; ++ } ++ else ++ { ++ Z[k--] = zk; ++ zk = 0; ++ } ++ } + +-/* sub_magnitudes() subtracts the magnitudes of *x & *y assuming that */ +-/* abs(*x) > abs(*y) > 0. */ +-/* The sign of the difference *z is undefined. x&y may overlap but not x&z */ +-/* or y&z. One guard digit is used. The error is less than one ulp. */ +-/* *x & *y are left unchanged. */ ++ for (; i > 0; i--) ++ { ++ zk += X[i]; ++ if (zk >= RADIX) ++ { ++ Z[k--] = zk - RADIX; ++ zk = 1; ++ } ++ else ++ { ++ Z[k--] = zk; ++ zk = 0; ++ } ++ } + ++ if (zk == 0) ++ { ++ for (i = 1; i <= p2; i++) ++ Z[i] = Z[i + 1]; ++ } ++ else ++ { ++ Z[1] = zk; ++ EZ += 1; ++ } ++} ++ ++/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0. ++ The sign of the difference *Z is not changed. X and Y may overlap but not X ++ and Z or Y and Z. One guard digit is used. The error is less than one ++ ULP. */ + static void + SECTION +-sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { +- +- int i,j,k; ++sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) ++{ ++ long i, j, k; ++ long p2 = p; ++ mantissa_t zk; + + EZ = EX; ++ i = p2; ++ j = p2 + EY - EX; ++ k = p2; ++ ++ /* Y is too small compared to X, copy X over to the result. */ ++ if (__glibc_unlikely (j < 1)) ++ { ++ __cpy (x, z, p); ++ return; ++ } ++ ++ /* The relevant least significant digit in Y is non-zero, so we factor it in ++ to enhance accuracy. */ ++ if (j < p2 && Y[j + 1] > 0) ++ { ++ Z[k + 1] = RADIX - Y[j + 1]; ++ zk = -1; ++ } ++ else ++ zk = Z[k + 1] = 0; + +- if (EX == EY) { +- i=j=k=p; +- Z[k] = Z[k+1] = ZERO; } +- else { +- j= EX - EY; +- if (j > p) {__cpy(x,z,p); return; } +- else { +- i=p; j=p+1-j; k=p; +- if (Y[j] > ZERO) { +- Z[k+1] = RADIX - Y[j--]; +- Z[k] = MONE; } +- else { +- Z[k+1] = ZERO; +- Z[k] = ZERO; j--;} +- } +- } +- +- for (; j>0; i--,j--) { +- Z[k] += (X[i] - Y[j]); +- if (Z[k] < ZERO) { +- Z[k] += RADIX; +- Z[--k] = MONE; } +- else +- Z[--k] = ZERO; +- } +- +- for (; i>0; i--) { +- Z[k] += X[i]; +- if (Z[k] < ZERO) { +- Z[k] += RADIX; +- Z[--k] = MONE; } +- else +- Z[--k] = ZERO; +- } ++ /* Subtract and borrow. */ ++ for (; j > 0; i--, j--) ++ { ++ zk += (X[i] - Y[j]); ++ if (zk < 0) ++ { ++ Z[k--] = zk + RADIX; ++ zk = -1; ++ } ++ else ++ { ++ Z[k--] = zk; ++ zk = 0; ++ } ++ } + +- for (i=1; Z[i] == ZERO; i++) ; ++ /* We're done with digits from Y, so it's just digits in X. */ ++ for (; i > 0; i--) ++ { ++ zk += X[i]; ++ if (zk < 0) ++ { ++ Z[k--] = zk + RADIX; ++ zk = -1; ++ } ++ else ++ { ++ Z[k--] = zk; ++ zk = 0; ++ } ++ } ++ ++ /* Normalize. */ ++ for (i = 1; Z[i] == 0; i++) ++ ; + EZ = EZ - i + 1; +- for (k=1; i <= p+1; ) ++ for (k = 1; i <= p2 + 1; ) + Z[k++] = Z[i++]; +- for (; k <= p; ) +- Z[k++] = ZERO; ++ for (; k <= p2; ) ++ Z[k++] = 0; + } + +- +-/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap */ +-/* but not x&z or y&z. One guard digit is used. The error is less than */ +-/* one ulp. *x & *y are left unchanged. */ +- ++/* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X ++ and Z or Y and Z. One guard digit is used. The error is less than one ++ ULP. */ + void + SECTION +-__add(const mp_no *x, const mp_no *y, mp_no *z, int p) { +- ++__add (const mp_no *x, const mp_no *y, mp_no *z, int p) ++{ + int n; + +- if (X[0] == ZERO) {__cpy(y,z,p); return; } +- else if (Y[0] == ZERO) {__cpy(x,z,p); return; } ++ if (X[0] == 0) ++ { ++ __cpy (y, z, p); ++ return; ++ } ++ else if (Y[0] == 0) ++ { ++ __cpy (x, z, p); ++ return; ++ } + +- if (X[0] == Y[0]) { +- if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } +- else {add_magnitudes(y,x,z,p); Z[0] = Y[0]; } +- } +- else { +- if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } +- else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; } +- else Z[0] = ZERO; +- } ++ if (X[0] == Y[0]) ++ { ++ if (__acr (x, y, p) > 0) ++ { ++ add_magnitudes (x, y, z, p); ++ Z[0] = X[0]; ++ } ++ else ++ { ++ add_magnitudes (y, x, z, p); ++ Z[0] = Y[0]; ++ } ++ } ++ else ++ { ++ if ((n = __acr (x, y, p)) == 1) ++ { ++ sub_magnitudes (x, y, z, p); ++ Z[0] = X[0]; ++ } ++ else if (n == -1) ++ { ++ sub_magnitudes (y, x, z, p); ++ Z[0] = Y[0]; ++ } ++ else ++ Z[0] = 0; ++ } + } + ++/* Subtract *Y from *X and return the result in *Z. X and Y may overlap but ++ not X and Z or Y and Z. One guard digit is used. The error is less than ++ one ULP. */ ++void ++SECTION ++__sub (const mp_no *x, const mp_no *y, mp_no *z, int p) ++{ ++ int n; ++ ++ if (X[0] == 0) ++ { ++ __cpy (y, z, p); ++ Z[0] = -Z[0]; ++ return; ++ } ++ else if (Y[0] == 0) ++ { ++ __cpy (x, z, p); ++ return; ++ } + +-/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */ +-/* overlap but not x&z or y&z. One guard digit is used. The error is */ +-/* less than one ulp. *x & *y are left unchanged. */ ++ if (X[0] != Y[0]) ++ { ++ if (__acr (x, y, p) > 0) ++ { ++ add_magnitudes (x, y, z, p); ++ Z[0] = X[0]; ++ } ++ else ++ { ++ add_magnitudes (y, x, z, p); ++ Z[0] = -Y[0]; ++ } ++ } ++ else ++ { ++ if ((n = __acr (x, y, p)) == 1) ++ { ++ sub_magnitudes (x, y, z, p); ++ Z[0] = X[0]; ++ } ++ else if (n == -1) ++ { ++ sub_magnitudes (y, x, z, p); ++ Z[0] = -Y[0]; ++ } ++ else ++ Z[0] = 0; ++ } ++} + ++#ifndef NO__MUL ++/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X ++ and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P ++ digits. In case P > 3 the error is bounded by 1.001 ULP. */ + void + SECTION +-__sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { ++__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) ++{ ++ long i, j, k, ip, ip2; ++ long p2 = p; ++ mantissa_store_t zk; ++ const mp_no *a; ++ mantissa_store_t *diag; ++ ++ /* Is z=0? */ ++ if (__glibc_unlikely (X[0] * Y[0] == 0)) ++ { ++ Z[0] = 0; ++ return; ++ } + +- int n; ++ /* We need not iterate through all X's and Y's since it's pointless to ++ multiply zeroes. Here, both are zero... */ ++ for (ip2 = p2; ip2 > 0; ip2--) ++ if (X[ip2] != 0 || Y[ip2] != 0) ++ break; ++ ++ a = X[ip2] != 0 ? y : x; ++ ++ /* ... and here, at least one of them is still zero. */ ++ for (ip = ip2; ip > 0; ip--) ++ if (a->d[ip] != 0) ++ break; ++ ++ /* The product looks like this for p = 3 (as an example): ++ ++ ++ a1 a2 a3 ++ x b1 b2 b3 ++ ----------------------------- ++ a1*b3 a2*b3 a3*b3 ++ a1*b2 a2*b2 a3*b2 ++ a1*b1 a2*b1 a3*b1 ++ ++ So our K needs to ideally be P*2, but we're limiting ourselves to P + 3 ++ for P >= 3. We compute the above digits in two parts; the last P-1 ++ digits and then the first P digits. The last P-1 digits are a sum of ++ products of the input digits from P to P-k where K is 0 for the least ++ significant digit and increases as we go towards the left. The product ++ term is of the form X[k]*X[P-k] as can be seen in the above example. ++ ++ The first P digits are also a sum of products with the same product term, ++ except that the sum is from 1 to k. This is also evident from the above ++ example. ++ ++ Another thing that becomes evident is that only the most significant ++ ip+ip2 digits of the result are non-zero, where ip and ip2 are the ++ 'internal precision' of the input numbers, i.e. digits after ip and ip2 ++ are all 0. */ ++ ++ k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3; ++ ++ while (k > ip + ip2 + 1) ++ Z[k--] = 0; ++ ++ zk = 0; ++ ++ /* Precompute sums of diagonal elements so that we can directly use them ++ later. See the next comment to know we why need them. */ ++ diag = alloca (k * sizeof (mantissa_store_t)); ++ mantissa_store_t d = 0; ++ for (i = 1; i <= ip; i++) ++ { ++ d += X[i] * (mantissa_store_t) Y[i]; ++ diag[i] = d; ++ } ++ while (i < k) ++ diag[i++] = d; + +- if (X[0] == ZERO) {__cpy(y,z,p); Z[0] = -Z[0]; return; } +- else if (Y[0] == ZERO) {__cpy(x,z,p); return; } ++ while (k > p2) ++ { ++ long lim = k / 2; + +- if (X[0] != Y[0]) { +- if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } +- else {add_magnitudes(y,x,z,p); Z[0] = -Y[0]; } +- } +- else { +- if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } +- else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; } +- else Z[0] = ZERO; +- } +-} ++ if (k % 2 == 0) ++ /* We want to add this only once, but since we subtract it in the sum ++ of products above, we add twice. */ ++ zk += 2 * X[lim] * (mantissa_store_t) Y[lim]; + ++ for (i = k - p2, j = p2; i < j; i++, j--) ++ zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]); + +-/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y */ +-/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is */ +-/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp. */ +-/* *x & *y are left unchanged. */ ++ zk -= diag[k - 1]; + +-void +-SECTION +-__mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { ++ DIV_RADIX (zk, Z[k]); ++ k--; ++ } + +- int i, i1, i2, j, k, k2; +- double u; ++ /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i ++ goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the ++ number of multiplications, we halve the range and if k is an even number, ++ add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute ++ X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j]. ++ ++ This reduction tells us that we're summing two things, the first term ++ through the half range and the negative of the sum of the product of all ++ terms of X and Y in the full range. i.e. ++ ++ SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in ++ a single loop so that it completes in O(n) time and can hence be directly ++ used in the loop below. */ ++ while (k > 1) ++ { ++ long lim = k / 2; ++ ++ if (k % 2 == 0) ++ /* We want to add this only once, but since we subtract it in the sum ++ of products above, we add twice. */ ++ zk += 2 * X[lim] * (mantissa_store_t) Y[lim]; + +- /* Is z=0? */ +- if (X[0]*Y[0]==ZERO) +- { Z[0]=ZERO; return; } +- +- /* Multiply, add and carry */ +- k2 = (p<3) ? p+p : p+3; +- Z[k2]=ZERO; +- for (k=k2; k>1; ) { +- if (k > p) {i1=k-p; i2=p+1; } +- else {i1=1; i2=k; } +- for (i=i1,j=i2-1; i Z[k]) u -= RADIX; +- Z[k] -= u; +- Z[--k] = u*RADIXI; +- } +- +- /* Is there a carry beyond the most significant digit? */ +- if (Z[1] == ZERO) { +- for (i=1; i<=p; i++) Z[i]=Z[i+1]; +- EZ = EX + EY - 1; } +- else +- EZ = EX + EY; ++ for (i = 1, j = k - 1; i < j; i++, j--) ++ zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]); ++ ++ zk -= diag[k - 1]; ++ ++ DIV_RADIX (zk, Z[k]); ++ k--; ++ } ++ Z[k] = zk; ++ ++ /* Get the exponent sum into an intermediate variable. This is a subtle ++ optimization, where given enough registers, all operations on the exponent ++ happen in registers and the result is written out only once into EZ. */ ++ int e = EX + EY; ++ ++ /* Is there a carry beyond the most significant digit? */ ++ if (__glibc_unlikely (Z[1] == 0)) ++ { ++ for (i = 1; i <= p2; i++) ++ Z[i] = Z[i + 1]; ++ e--; ++ } + ++ EZ = e; + Z[0] = X[0] * Y[0]; + } ++#endif ++ ++#ifndef NO__SQR ++/* Square *X and store result in *Y. X and Y may not overlap. For P in ++ [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the ++ error is bounded by 1.001 ULP. This is a faster special case of ++ multiplication. */ ++void ++SECTION ++__sqr (const mp_no *x, mp_no *y, int p) ++{ ++ long i, j, k, ip; ++ mantissa_store_t yk; + ++ /* Is z=0? */ ++ if (__glibc_unlikely (X[0] == 0)) ++ { ++ Y[0] = 0; ++ return; ++ } + +-/* Invert a multiple precision number. Set *y = 1 / *x. */ +-/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3, */ +-/* 2.001*r**(1-p) for p>3. */ +-/* *x=0 is not permissible. *x is left unchanged. */ ++ /* We need not iterate through all X's since it's pointless to ++ multiply zeroes. */ ++ for (ip = p; ip > 0; ip--) ++ if (X[ip] != 0) ++ break; + +-static +-SECTION +-void __inv(const mp_no *x, mp_no *y, int p) { +- int i; +-#if 0 +- int l; ++ k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; ++ ++ while (k > 2 * ip + 1) ++ Y[k--] = 0; ++ ++ yk = 0; ++ ++ while (k > p) ++ { ++ mantissa_store_t yk2 = 0; ++ long lim = k / 2; ++ ++ if (k % 2 == 0) ++ yk += X[lim] * (mantissa_store_t) X[lim]; ++ ++ /* In __mul, this loop (and the one within the next while loop) run ++ between a range to calculate the mantissa as follows: ++ ++ Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1] ++ + X[n] * Y[k] ++ ++ For X == Y, we can get away with summing halfway and doubling the ++ result. For cases where the range size is even, the mid-point needs ++ to be added separately (above). */ ++ for (i = k - p, j = p; i < j; i++, j--) ++ yk2 += X[i] * (mantissa_store_t) X[j]; ++ ++ yk += 2 * yk2; ++ ++ DIV_RADIX (yk, Y[k]); ++ k--; ++ } ++ ++ while (k > 1) ++ { ++ mantissa_store_t yk2 = 0; ++ long lim = k / 2; ++ ++ if (k % 2 == 0) ++ yk += X[lim] * (mantissa_store_t) X[lim]; ++ ++ /* Likewise for this loop. */ ++ for (i = 1, j = k - 1; i < j; i++, j--) ++ yk2 += X[i] * (mantissa_store_t) X[j]; ++ ++ yk += 2 * yk2; ++ ++ DIV_RADIX (yk, Y[k]); ++ k--; ++ } ++ Y[k] = yk; ++ ++ /* Squares are always positive. */ ++ Y[0] = 1; ++ ++ /* Get the exponent sum into an intermediate variable. This is a subtle ++ optimization, where given enough registers, all operations on the exponent ++ happen in registers and the result is written out only once into EZ. */ ++ int e = EX * 2; ++ ++ /* Is there a carry beyond the most significant digit? */ ++ if (__glibc_unlikely (Y[1] == 0)) ++ { ++ for (i = 1; i <= p; i++) ++ Y[i] = Y[i + 1]; ++ e--; ++ } ++ ++ EY = e; ++} + #endif ++ ++/* Invert *X and store in *Y. Relative error bound: ++ - For P = 2: 1.001 * R ^ (1 - P) ++ - For P = 3: 1.063 * R ^ (1 - P) ++ - For P > 3: 2.001 * R ^ (1 - P) ++ ++ *X = 0 is not permissible. */ ++static void ++SECTION ++__inv (const mp_no *x, mp_no *y, int p) ++{ ++ long i; + double t; +- mp_no z,w; +- static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3, +- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4}; +- const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, +- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, +- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, +- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; +- +- __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p); +- t=ONE/t; __dbl_mp(t,y,p); EY -= EX; +- +- for (i=0; i 3: 3.001 * R ^ (1 - P) + +-/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */ +-/* are left unchanged. x&y may overlap but not x&z or y&z. */ +-/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3 */ +-/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible. */ +- ++ *X = 0 is not permissible. */ + void + SECTION +-__dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) { +- ++__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p) ++{ + mp_no w; + +- if (X[0] == ZERO) Z[0] = ZERO; +- else {__inv(y,&w,p); __mul(x,&w,z,p);} ++ if (X[0] == 0) ++ Z[0] = 0; ++ else ++ { ++ __inv (y, &w, p); ++ __mul (x, &w, z, p); ++ } + } +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa.h ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.h +@@ -1,7 +1,7 @@ + /* + * IBM Accurate Mathematical Library + * Written by International Business Machines Corp. +- * Copyright (C) 2001, 2011 Free Software Foundation, Inc. ++ * Copyright (C) 2001-2017 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by +@@ -23,36 +23,58 @@ + /* FUNCTIONS: */ + /* mcr */ + /* acr */ +-/* cr */ + /* cpy */ +-/* cpymn */ + /* mp_dbl */ + /* dbl_mp */ + /* add */ + /* sub */ + /* mul */ +-/* inv */ + /* dvd */ + /* */ + /* Arithmetic functions for multiple precision numbers. */ + /* Common types and definition */ + /************************************************************************/ + ++#include + +-typedef struct {/* This structure holds the details of a multi-precision */ +- int e; /* floating point number, x: d[0] holds its sign (-1,0 or 1) */ +- double d[40]; /* e holds its exponent (...,-2,-1,0,1,2,...) and */ +-} mp_no; /* d[1]...d[p] hold its mantissa digits. The value of x is, */ +- /* x = d[1]*r**(e-1) + d[2]*r**(e-2) + ... + d[p]*r**(e-p). */ +- /* Here r = 2**24, 0 <= d[i] < r and 1 <= p <= 32. */ +- /* p is a global variable. A multi-precision number is */ +- /* always normalized. Namely, d[1] > 0. An exception is */ +- /* a zero which is characterized by d[0] = 0. The terms */ +- /* d[p+1], d[p+2], ... of a none zero number have no */ +- /* significance and so are the terms e, d[1],d[2],... */ +- /* of a zero. */ ++/* The mp_no structure holds the details of a multi-precision floating point ++ number. + +-typedef union { int i[2]; double d; } number; ++ - The radix of the number (R) is 2 ^ 24. ++ ++ - E: The exponent of the number. ++ ++ - D[0]: The sign (-1, 1) or 0 if the value is 0. In the latter case, the ++ values of the remaining members of the structure are ignored. ++ ++ - D[1] - D[p]: The mantissa of the number where: ++ ++ 0 <= D[i] < R and ++ P is the precision of the number and 1 <= p <= 32 ++ ++ D[p+1] ... D[39] have no significance. ++ ++ - The value of the number is: ++ ++ D[1] * R ^ (E - 1) + D[2] * R ^ (E - 2) ... D[p] * R ^ (E - p) ++ ++ */ ++typedef struct ++{ ++ int e; ++ mantissa_t d[40]; ++} mp_no; ++ ++typedef union ++{ ++ int i[2]; ++ double d; ++} number; ++ ++/* TODO: With only a partial backport of the constant cleanup we don't ++ define __mpone or __mptwo here for other code to use. */ ++/* extern const mp_no __mpone; ++extern const mp_no __mptwo; */ + + #define X x->d + #define Y y->d +@@ -63,21 +85,73 @@ typedef union { int i[2]; double d; } nu + + #define ABS(x) ((x) < 0 ? -(x) : (x)) + +-int __acr(const mp_no *, const mp_no *, int); +-// int __cr(const mp_no *, const mp_no *, int); +-void __cpy(const mp_no *, mp_no *, int); +-// void __cpymn(const mp_no *, int, mp_no *, int); +-void __mp_dbl(const mp_no *, double *, int); +-void __dbl_mp(double, mp_no *, int); +-void __add(const mp_no *, const mp_no *, mp_no *, int); +-void __sub(const mp_no *, const mp_no *, mp_no *, int); +-void __mul(const mp_no *, const mp_no *, mp_no *, int); +-// void __inv(const mp_no *, mp_no *, int); +-void __dvd(const mp_no *, const mp_no *, mp_no *, int); ++#ifndef RADIXI ++# define RADIXI 0x1.0p-24 /* 2^-24 */ ++#endif ++ ++#ifndef TWO52 ++# define TWO52 0x1.0p52 /* 2^52 */ ++#endif ++ ++#define TWO5 TWOPOW (5) /* 2^5 */ ++#define TWO8 TWOPOW (8) /* 2^52 */ ++#define TWO10 TWOPOW (10) /* 2^10 */ ++#define TWO18 TWOPOW (18) /* 2^18 */ ++#define TWO19 TWOPOW (19) /* 2^19 */ ++#define TWO23 TWOPOW (23) /* 2^23 */ ++ ++#define TWO57 0x1.0p57 /* 2^57 */ ++#define TWO71 0x1.0p71 /* 2^71 */ ++#define TWOM1032 0x1.0p-1032 /* 2^-1032 */ ++#define TWOM1022 0x1.0p-1022 /* 2^-1022 */ ++ ++#define HALF 0x1.0p-1 /* 1/2 */ ++#define MHALF -0x1.0p-1 /* -1/2 */ ++#define HALFRAD 0x1.0p23 /* 2^23 */ ++ ++int __acr (const mp_no *, const mp_no *, int); ++void __cpy (const mp_no *, mp_no *, int); ++void __mp_dbl (const mp_no *, double *, int); ++void __dbl_mp (double, mp_no *, int); ++void __add (const mp_no *, const mp_no *, mp_no *, int); ++void __sub (const mp_no *, const mp_no *, mp_no *, int); ++void __mul (const mp_no *, const mp_no *, mp_no *, int); ++void __sqr (const mp_no *, mp_no *, int); ++void __dvd (const mp_no *, const mp_no *, mp_no *, int); + + extern void __mpatan (mp_no *, mp_no *, int); + extern void __mpatan2 (mp_no *, mp_no *, mp_no *, int); + extern void __mpsqrt (mp_no *, mp_no *, int); +-extern void __mpexp (mp_no *, mp_no *__y, int); ++extern void __mpexp (mp_no *, mp_no *, int); + extern void __c32 (mp_no *, mp_no *, mp_no *, int); + extern int __mpranred (double, mp_no *, int); ++ ++/* Given a power POW, build a multiprecision number 2^POW. */ ++static inline void ++__pow_mp (int pow, mp_no *y, int p) ++{ ++ int i, rem; ++ ++ /* The exponent is E such that E is a factor of 2^24. The remainder (of the ++ form 2^x) goes entirely into the first digit of the mantissa as it is ++ always less than 2^24. */ ++ EY = pow / 24; ++ rem = pow - EY * 24; ++ EY++; ++ ++ /* If the remainder is negative, it means that POW was negative since ++ |EY * 24| <= |pow|. Adjust so that REM is positive and still less than ++ 24 because of which, the mantissa digit is less than 2^24. */ ++ if (rem < 0) ++ { ++ EY--; ++ rem += 24; ++ } ++ /* The sign of any 2^x is always positive. */ ++ Y[0] = 1; ++ Y[1] = 1 << rem; ++ ++ /* Everything else is 0. */ ++ for (i = 2; i <= p; i++) ++ Y[i] = 0; ++} +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpexp.c ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.c +@@ -69,13 +69,13 @@ __mpexp(mp_no *x, mp_no *y, int p) { + for (i=0; iEX; i--) a *= RADIX; + b = X[1]*RADIXI; m2 = 24*EX; +- for (; b0; i--,n--) { if (m1np[i][p]+m2>0) break; } + } + +@@ -84,8 +84,8 @@ __mpexp(mp_no *x, mp_no *y, int p) { + __mul(x,&mpt1,&mps,p); + + /* Evaluate the polynomial. Put result in mpt2 */ +- mpone.e=1; mpone.d[0]=ONE; mpone.d[1]=ONE; +- mpk.e = 1; mpk.d[0] = ONE; mpk.d[1]=__mpexp_nn[n].d; ++ mpone.e=1; mpone.d[0]=1; mpone.d[1]=1; ++ mpk.e = 1; mpk.d[0] = 1; mpk.d[1]=__mpexp_nn[n].d; + __dvd(&mps,&mpk,&mpt1,p); + __add(&mpone,&mpt1,&mpak,p); + for (k=n-1; k>1; k--) { +@@ -99,9 +99,9 @@ __mpexp(mp_no *x, mp_no *y, int p) { + + /* Raise polynomial value to the power of 2**m. Put result in y */ + for (k=0,j=0; k. + */ +-/************************************************************************/ +-/* MODULE_NAME: mpa.c */ +-/* */ +-/* FUNCTIONS: */ +-/* mcr */ +-/* acr */ +-/* cr */ +-/* cpy */ +-/* cpymn */ +-/* norm */ +-/* denorm */ +-/* mp_dbl */ +-/* dbl_mp */ +-/* add_magnitudes */ +-/* sub_magnitudes */ +-/* add */ +-/* sub */ +-/* mul */ +-/* inv */ +-/* dvd */ +-/* */ +-/* Arithmetic functions for multiple precision numbers. */ +-/* Relative errors are bounded */ +-/************************************************************************/ +- +- +-#include "endian.h" +-#include "mpa.h" +-#include "mpa2.h" +-#include /* For MIN() */ +-/* mcr() compares the sizes of the mantissas of two multiple precision */ +-/* numbers. Mantissas are compared regardless of the signs of the */ +-/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also */ +-/* disregarded. */ +-static int mcr(const mp_no *x, const mp_no *y, int p) { +- long i; +- long p2 = p; +- for (i=1; i<=p2; i++) { +- if (X[i] == Y[i]) continue; +- else if (X[i] > Y[i]) return 1; +- else return -1; } +- return 0; +-} +- +- +- +-/* acr() compares the absolute values of two multiple precision numbers */ +-int __acr(const mp_no *x, const mp_no *y, int p) { +- long i; +- +- if (X[0] == ZERO) { +- if (Y[0] == ZERO) i= 0; +- else i=-1; +- } +- else if (Y[0] == ZERO) i= 1; +- else { +- if (EX > EY) i= 1; +- else if (EX < EY) i=-1; +- else i= mcr(x,y,p); +- } +- +- return i; +-} +- +- +-/* cr90 compares the values of two multiple precision numbers */ +-int __cr(const mp_no *x, const mp_no *y, int p) { +- int i; +- +- if (X[0] > Y[0]) i= 1; +- else if (X[0] < Y[0]) i=-1; +- else if (X[0] < ZERO ) i= __acr(y,x,p); +- else i= __acr(x,y,p); +- +- return i; +-} +- +- +-/* Copy a multiple precision number. Set *y=*x. x=y is permissible. */ +-void __cpy(const mp_no *x, mp_no *y, int p) { +- long i; +- +- EY = EX; +- for (i=0; i <= p; i++) Y[i] = X[i]; +- +- return; +-} +- +- +-/* Copy a multiple precision number x of precision m into a */ +-/* multiple precision number y of precision n. In case n>m, */ +-/* the digits of y beyond the m'th are set to zero. In case */ +-/* n= 2**(-1022))) */ +-static void norm(const mp_no *x, double *y, int p) +-{ +- #define R radixi.d +- long i; +-#if 0 +- int k; +-#endif +- double a,c,u,v,z[5]; +- if (p<5) { +- if (p==1) c = X[1]; +- else if (p==2) c = X[1] + R* X[2]; +- else if (p==3) c = X[1] + R*(X[2] + R* X[3]); +- else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]); +- } +- else { +- for (a=ONE, z[1]=X[1]; z[1] < TWO23; ) +- {a *= TWO; z[1] *= TWO; } +- +- for (i=2; i<5; i++) { +- z[i] = X[i]*a; +- u = (z[i] + CUTTER)-CUTTER; +- if (u > z[i]) u -= RADIX; +- z[i] -= u; +- z[i-1] += u*RADIXI; +- } +- +- u = (z[3] + TWO71) - TWO71; +- if (u > z[3]) u -= TWO19; +- v = z[3]-u; +- +- if (v == TWO18) { +- if (z[4] == ZERO) { +- for (i=5; i <= p; i++) { +- if (X[i] == ZERO) continue; +- else {z[3] += ONE; break; } +- } +- } +- else z[3] += ONE; +- } +- +- c = (z[1] + R *(z[2] + R * z[3]))/a; +- } + +- c *= X[0]; +- +- for (i=1; iEX; i--) c *= RADIXI; +- +- *y = c; +- return; +-#undef R +-} +- +-/* Convert a multiple precision number *x into a double precision */ +-/* number *y, denormalized case (|x| < 2**(-1022))) */ +-static void denorm(const mp_no *x, double *y, int p) ++/* Define __mul and __sqr and use the rest from generic code. */ ++#define NO__MUL ++#define NO__SQR ++ ++#include ++ ++/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X ++ and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P ++ digits. In case P > 3 the error is bounded by 1.001 ULP. */ ++void ++__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) + { +- long i,k; ++ long i, i1, i2, j, k, k2; + long p2 = p; +- double c,u,z[5]; +-#if 0 +- double a,v; +-#endif ++ double u, zk, zk2; + +-#define R radixi.d +- if (EX<-44 || (EX==-44 && X[1] z[3]) u -= TWO5; +- +- if (u==z[3]) { +- for (i=k+1; i <= p2; i++) { +- if (X[i] == ZERO) continue; +- else {z[3] += ONE; break; } ++ /* Is z=0? */ ++ if (__glibc_unlikely (X[0] * Y[0] == 0)) ++ { ++ Z[0] = 0; ++ return; + } +- } +- +- c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10); + +- *y = c*TWOM1032; +- return; +- +-#undef R +-} +- +-/* Convert a multiple precision number *x into a double precision number *y. */ +-/* The result is correctly rounded to the nearest/even. *x is left unchanged */ +- +-void __mp_dbl(const mp_no *x, double *y, int p) { +-#if 0 +- int i,k; +- double a,c,u,v,z[5]; ++ /* Multiply, add and carry */ ++ k2 = (p2 < 3) ? p2 + p2 : p2 + 3; ++ zk = Z[k2] = 0; ++ for (k = k2; k > 1;) ++ { ++ if (k > p2) ++ { ++ i1 = k - p2; ++ i2 = p2 + 1; ++ } ++ else ++ { ++ i1 = 1; ++ i2 = k; ++ } ++#if 1 ++ /* Rearrange this inner loop to allow the fmadd instructions to be ++ independent and execute in parallel on processors that have ++ dual symmetrical FP pipelines. */ ++ if (i1 < (i2 - 1)) ++ { ++ /* Make sure we have at least 2 iterations. */ ++ if (((i2 - i1) & 1L) == 1L) ++ { ++ /* Handle the odd iterations case. */ ++ zk2 = x->d[i2 - 1] * y->d[i1]; ++ } ++ else ++ zk2 = 0.0; ++ /* Do two multiply/adds per loop iteration, using independent ++ accumulators; zk and zk2. */ ++ for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2) ++ { ++ zk += x->d[i] * y->d[j]; ++ zk2 += x->d[i + 1] * y->d[j - 1]; ++ } ++ zk += zk2; /* Final sum. */ ++ } ++ else ++ { ++ /* Special case when iterations is 1. */ ++ zk += x->d[i1] * y->d[i1]; ++ } ++#else ++ /* The original code. */ ++ for (i = i1, j = i2 - 1; i < i2; i++, j--) ++ zk += X[i] * Y[j]; + #endif + +- if (X[0] == ZERO) {*y = ZERO; return; } +- +- if (EX> -42) norm(x,y,p); +- else if (EX==-42 && X[1]>=TWO10) norm(x,y,p); +- else denorm(x,y,p); +-} +- +- +-/* dbl_mp() converts a double precision number x into a multiple precision */ +-/* number *y. If the precision p is too small the result is truncated. x is */ +-/* left unchanged. */ +- +-void __dbl_mp(double x, mp_no *y, int p) { ++ u = (zk + CUTTER) - CUTTER; ++ if (u > zk) ++ u -= RADIX; ++ Z[k] = zk - u; ++ zk = u * RADIXI; ++ --k; ++ } ++ Z[k] = zk; + +- long i,n; +- long p2 = p; +- double u; ++ int e = EX + EY; ++ /* Is there a carry beyond the most significant digit? */ ++ if (Z[1] == 0) ++ { ++ for (i = 1; i <= p2; i++) ++ Z[i] = Z[i + 1]; ++ e--; ++ } + +- /* Sign */ +- if (x == ZERO) {Y[0] = ZERO; return; } +- else if (x > ZERO) Y[0] = ONE; +- else {Y[0] = MONE; x=-x; } +- +- /* Exponent */ +- for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI; +- for ( ; x < ONE; EY -= ONE) x *= RADIX; +- +- /* Digits */ +- n=MIN(p2,4); +- for (i=1; i<=n; i++) { +- u = (x + TWO52) - TWO52; +- if (u>x) u -= ONE; +- Y[i] = u; x -= u; x *= RADIX; } +- for ( ; i<=p2; i++) Y[i] = ZERO; +- return; ++ EZ = e; ++ Z[0] = X[0] * Y[0]; + } + ++/* Square *X and store result in *Y. X and Y may not overlap. For P in ++ [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the ++ error is bounded by 1.001 ULP. This is a faster special case of ++ multiplication. */ ++void ++__sqr (const mp_no *x, mp_no *y, int p) ++{ ++ long i, j, k, ip; ++ double u, yk; + +-/* add_magnitudes() adds the magnitudes of *x & *y assuming that */ +-/* abs(*x) >= abs(*y) > 0. */ +-/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */ +-/* No guard digit is used. The result equals the exact sum, truncated. */ +-/* *x & *y are left unchanged. */ ++ /* Is z=0? */ ++ if (__glibc_unlikely (X[0] == 0)) ++ { ++ Y[0] = 0; ++ return; ++ } + +-static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { ++ /* We need not iterate through all X's since it's pointless to ++ multiply zeroes. */ ++ for (ip = p; ip > 0; ip--) ++ if (X[ip] != 0) ++ break; + +- long i,j,k; +- long p2 = p; ++ k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; + +- EZ = EX; ++ while (k > 2 * ip + 1) ++ Y[k--] = 0; + +- i=p2; j=p2+ EY - EX; k=p2+1; ++ yk = 0; + +- if (j<1) +- {__cpy(x,z,p); return; } +- else Z[k] = ZERO; +- +- for (; j>0; i--,j--) { +- Z[k] += X[i] + Y[j]; +- if (Z[k] >= RADIX) { +- Z[k] -= RADIX; +- Z[--k] = ONE; } +- else +- Z[--k] = ZERO; +- } +- +- for (; i>0; i--) { +- Z[k] += X[i]; +- if (Z[k] >= RADIX) { +- Z[k] -= RADIX; +- Z[--k] = ONE; } +- else +- Z[--k] = ZERO; +- } +- +- if (Z[1] == ZERO) { +- for (i=1; i<=p2; i++) Z[i] = Z[i+1]; } +- else EZ += ONE; +-} ++ while (k > p) ++ { ++ double yk2 = 0.0; ++ long lim = k / 2; + ++ if (k % 2 == 0) ++ { ++ yk += X[lim] * X[lim]; ++ lim--; ++ } + +-/* sub_magnitudes() subtracts the magnitudes of *x & *y assuming that */ +-/* abs(*x) > abs(*y) > 0. */ +-/* The sign of the difference *z is undefined. x&y may overlap but not x&z */ +-/* or y&z. One guard digit is used. The error is less than one ulp. */ +-/* *x & *y are left unchanged. */ ++ /* In __mul, this loop (and the one within the next while loop) run ++ between a range to calculate the mantissa as follows: + +-static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { ++ Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1] ++ + X[n] * Y[k] + +- long i,j,k; +- long p2 = p; ++ For X == Y, we can get away with summing halfway and doubling the ++ result. For cases where the range size is even, the mid-point needs ++ to be added separately (above). */ ++ for (i = k - p, j = p; i <= lim; i++, j--) ++ yk2 += X[i] * X[j]; + +- EZ = EX; ++ yk += 2.0 * yk2; + +- if (EX == EY) { +- i=j=k=p2; +- Z[k] = Z[k+1] = ZERO; } +- else { +- j= EX - EY; +- if (j > p2) {__cpy(x,z,p); return; } +- else { +- i=p2; j=p2+1-j; k=p2; +- if (Y[j] > ZERO) { +- Z[k+1] = RADIX - Y[j--]; +- Z[k] = MONE; } +- else { +- Z[k+1] = ZERO; +- Z[k] = ZERO; j--;} ++ u = (yk + CUTTER) - CUTTER; ++ if (u > yk) ++ u -= RADIX; ++ Y[k--] = yk - u; ++ yk = u * RADIXI; + } +- } +- +- for (; j>0; i--,j--) { +- Z[k] += (X[i] - Y[j]); +- if (Z[k] < ZERO) { +- Z[k] += RADIX; +- Z[--k] = MONE; } +- else +- Z[--k] = ZERO; +- } +- +- for (; i>0; i--) { +- Z[k] += X[i]; +- if (Z[k] < ZERO) { +- Z[k] += RADIX; +- Z[--k] = MONE; } +- else +- Z[--k] = ZERO; +- } +- +- for (i=1; Z[i] == ZERO; i++) ; +- EZ = EZ - i + 1; +- for (k=1; i <= p2+1; ) +- Z[k++] = Z[i++]; +- for (; k <= p2; ) +- Z[k++] = ZERO; +- +- return; +-} +- +- +-/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap */ +-/* but not x&z or y&z. One guard digit is used. The error is less than */ +-/* one ulp. *x & *y are left unchanged. */ +- +-void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) { +- +- int n; +- +- if (X[0] == ZERO) {__cpy(y,z,p); return; } +- else if (Y[0] == ZERO) {__cpy(x,z,p); return; } +- +- if (X[0] == Y[0]) { +- if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } +- else {add_magnitudes(y,x,z,p); Z[0] = Y[0]; } +- } +- else { +- if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } +- else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; } +- else Z[0] = ZERO; +- } +- return; +-} +- +- +-/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */ +-/* overlap but not x&z or y&z. One guard digit is used. The error is */ +-/* less than one ulp. *x & *y are left unchanged. */ +- +-void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { +- +- int n; +- +- if (X[0] == ZERO) {__cpy(y,z,p); Z[0] = -Z[0]; return; } +- else if (Y[0] == ZERO) {__cpy(x,z,p); return; } +- +- if (X[0] != Y[0]) { +- if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } +- else {add_magnitudes(y,x,z,p); Z[0] = -Y[0]; } +- } +- else { +- if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } +- else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; } +- else Z[0] = ZERO; +- } +- return; +-} +- +- +-/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y */ +-/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is */ +-/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp. */ +-/* *x & *y are left unchanged. */ + +-void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { +- +- long i, i1, i2, j, k, k2; +- long p2 = p; +- double u, zk, zk2; +- +- /* Is z=0? */ +- if (X[0]*Y[0]==ZERO) +- { Z[0]=ZERO; return; } +- +- /* Multiply, add and carry */ +- k2 = (p2<3) ? p2+p2 : p2+3; +- zk = Z[k2]=ZERO; +- for (k=k2; k>1; ) { +- if (k > p2) {i1=k-p2; i2=p2+1; } +- else {i1=1; i2=k; } +-#if 1 +- /* rearange this inner loop to allow the fmadd instructions to be +- independent and execute in parallel on processors that have +- dual symetrical FP pipelines. */ +- if (i1 < (i2-1)) ++ while (k > 1) + { +- /* make sure we have at least 2 iterations */ +- if (((i2 - i1) & 1L) == 1L) +- { +- /* Handle the odd iterations case. */ +- zk2 = x->d[i2-1]*y->d[i1]; +- } +- else +- zk2 = zero.d; +- /* Do two multiply/adds per loop iteration, using independent +- accumulators; zk and zk2. */ +- for (i=i1,j=i2-1; id[i]*y->d[j]; +- zk2 += x->d[i+1]*y->d[j-1]; ++ double yk2 = 0.0; ++ long lim = k / 2; ++ ++ if (k % 2 == 0) ++ { ++ yk += X[lim] * X[lim]; ++ lim--; + } +- zk += zk2; /* final sum. */ +- } +- else ++ ++ /* Likewise for this loop. */ ++ for (i = 1, j = k - 1; i <= lim; i++, j--) ++ yk2 += X[i] * X[j]; ++ ++ yk += 2.0 * yk2; ++ ++ u = (yk + CUTTER) - CUTTER; ++ if (u > yk) ++ u -= RADIX; ++ Y[k--] = yk - u; ++ yk = u * RADIXI; ++ } ++ Y[k] = yk; ++ ++ /* Squares are always positive. */ ++ Y[0] = 1.0; ++ ++ int e = EX * 2; ++ /* Is there a carry beyond the most significant digit? */ ++ if (__glibc_unlikely (Y[1] == 0)) + { +- /* Special case when iterations is 1. */ +- zk += x->d[i1]*y->d[i1]; ++ for (i = 1; i <= p; i++) ++ Y[i] = Y[i + 1]; ++ e--; + } +-#else +- /* The orginal code. */ +- for (i=i1,j=i2-1; i zk) u -= RADIX; +- Z[k] = zk - u; +- zk = u*RADIXI; +- --k; +- } +- Z[k] = zk; +- +- /* Is there a carry beyond the most significant digit? */ +- if (Z[1] == ZERO) { +- for (i=1; i<=p2; i++) Z[i]=Z[i+1]; +- EZ = EX + EY - 1; } +- else +- EZ = EX + EY; +- +- Z[0] = X[0] * Y[0]; +- return; +-} +- +- +-/* Invert a multiple precision number. Set *y = 1 / *x. */ +-/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3, */ +-/* 2.001*r**(1-p) for p>3. */ +-/* *x=0 is not permissible. *x is left unchanged. */ +- +-void __inv(const mp_no *x, mp_no *y, int p) { +- long i; +-#if 0 +- int l; +-#endif +- double t; +- mp_no z,w; +- static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3, +- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4}; +- const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, +- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, +- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, +- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; +- +- __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p); +- t=ONE/t; __dbl_mp(t,y,p); EY -= EX; +- +- for (i=0; i3. *y=0 is not permissible. */ +- +-void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) { +- +- mp_no w; +- +- if (X[0] == ZERO) Z[0] = ZERO; +- else {__inv(y,&w,p); __mul(x,&w,z,p);} +- return; ++ EY = e; + } +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/atnat.h ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat.h +@@ -138,8 +138,6 @@ + #endif + #endif + +-#define ZERO zero.d +-#define ONE one.d + #define A a.d + #define B b.d + #define C c.d +@@ -160,7 +158,5 @@ + #define U6 u6.d + #define U7 u7.d + #define U8 u8.d +-#define TWO8 two8.d +-#define TWO52 two52.d + + #endif +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat2.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/atnat2.h ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat2.h +@@ -174,11 +174,4 @@ + #endif + #endif + +-#define ZERO zero.d +-#define MZERO mzero.d +-#define ONE one.d +-#define TWO8 two8.d +-#define TWO52 two52.d +-#define TWOM1022 twom1022.d +- + #endif +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa2.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa2.h ++++ /dev/null +@@ -1,94 +0,0 @@ +- +-/* +- * IBM Accurate Mathematical Library +- * Written by International Business Machines Corp. +- * Copyright (C) 2001 Free Software Foundation, Inc. +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU Lesser General Public License as published by +- * the Free Software Foundation; either version 2.1 of the License, or +- * (at your option) any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public License +- * along with this program; if not, see . +- */ +- +-/**************************************************************************/ +-/* */ +-/* MODULE_NAME:mpa2.h */ +-/* */ +-/* */ +-/* variables prototype and definition according to type of processor */ +-/* types definition */ +-/**************************************************************************/ +- +-#ifndef MPA2_H +-#define MPA2_H +- +- +-#ifdef BIG_ENDI +-static const number +-/**/ radix = {{0x41700000, 0x00000000} }, /* 2**24 */ +-/**/ radixi = {{0x3e700000, 0x00000000} }, /* 2**-24 */ +-/**/ cutter = {{0x44b00000, 0x00000000} }, /* 2**76 */ +-/**/ zero = {{0x00000000, 0x00000000} }, /* 0 */ +-/**/ one = {{0x3ff00000, 0x00000000} }, /* 1 */ +-/**/ mone = {{0xbff00000, 0x00000000} }, /* -1 */ +-/**/ two = {{0x40000000, 0x00000000} }, /* 2 */ +-/**/ two5 = {{0x40400000, 0x00000000} }, /* 2**5 */ +-/**/ two10 = {{0x40900000, 0x00000000} }, /* 2**10 */ +-/**/ two18 = {{0x41100000, 0x00000000} }, /* 2**18 */ +-/**/ two19 = {{0x41200000, 0x00000000} }, /* 2**19 */ +-/**/ two23 = {{0x41600000, 0x00000000} }, /* 2**23 */ +-/**/ two52 = {{0x43300000, 0x00000000} }, /* 2**52 */ +-/**/ two57 = {{0x43800000, 0x00000000} }, /* 2**57 */ +-/**/ two71 = {{0x44600000, 0x00000000} }, /* 2**71 */ +-/**/ twom1032 = {{0x00000400, 0x00000000} }; /* 2**-1032 */ +- +-#else +-#ifdef LITTLE_ENDI +-static const number +-/**/ radix = {{0x00000000, 0x41700000} }, /* 2**24 */ +-/**/ radixi = {{0x00000000, 0x3e700000} }, /* 2**-24 */ +-/**/ cutter = {{0x00000000, 0x44b00000} }, /* 2**76 */ +-/**/ zero = {{0x00000000, 0x00000000} }, /* 0 */ +-/**/ one = {{0x00000000, 0x3ff00000} }, /* 1 */ +-/**/ mone = {{0x00000000, 0xbff00000} }, /* -1 */ +-/**/ two = {{0x00000000, 0x40000000} }, /* 2 */ +-/**/ two5 = {{0x00000000, 0x40400000} }, /* 2**5 */ +-/**/ two10 = {{0x00000000, 0x40900000} }, /* 2**10 */ +-/**/ two18 = {{0x00000000, 0x41100000} }, /* 2**18 */ +-/**/ two19 = {{0x00000000, 0x41200000} }, /* 2**19 */ +-/**/ two23 = {{0x00000000, 0x41600000} }, /* 2**23 */ +-/**/ two52 = {{0x00000000, 0x43300000} }, /* 2**52 */ +-/**/ two57 = {{0x00000000, 0x43800000} }, /* 2**57 */ +-/**/ two71 = {{0x00000000, 0x44600000} }, /* 2**71 */ +-/**/ twom1032 = {{0x00000000, 0x00000400} }; /* 2**-1032 */ +- +-#endif +-#endif +- +-#define RADIX radix.d +-#define RADIXI radixi.d +-#define CUTTER cutter.d +-#define ZERO zero.d +-#define ONE one.d +-#define MONE mone.d +-#define TWO two.d +-#define TWO5 two5.d +-#define TWO10 two10.d +-#define TWO18 two18.d +-#define TWO19 two19.d +-#define TWO23 two23.d +-#define TWO52 two52.d +-#define TWO57 two57.d +-#define TWO71 two71.d +-#define TWOM1032 twom1032.d +- +- +-#endif +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpatan.h ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.h +@@ -177,6 +177,3 @@ __atan_twonm1[33] = { + + #endif + #endif +- +-#define ONE __atan_one.d +-#define TWO __atan_two.d +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan2.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpatan2.c ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan2.c +@@ -49,18 +49,16 @@ void + SECTION + __mpatan2(mp_no *y, mp_no *x, mp_no *z, int p) { + +- static const double ZERO = 0.0, ONE = 1.0; +- + mp_no mpone = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, + 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, + 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; + mp_no mpt1,mpt2,mpt3; + + +- if (X[0] <= ZERO) { +- mpone.e = 1; mpone.d[0] = mpone.d[1] = ONE; ++ if (X[0] <= 0) { ++ mpone.e = 1; mpone.d[0] = mpone.d[1] = 1; + __dvd(x,y,&mpt1,p); __mul(&mpt1,&mpt1,&mpt2,p); +- if (mpt1.d[0] != ZERO) mpt1.d[0] = ONE; ++ if (mpt1.d[0] != 0) mpt1.d[0] = 1; + __add(&mpt2,&mpone,&mpt3,p); __mpsqrt(&mpt3,&mpt2,p); + __add(&mpt1,&mpt2,&mpt3,p); mpt3.d[0]=Y[0]; + __mpatan(&mpt3,&mpt1,p); __add(&mpt1,&mpt1,z,p); +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpexp.h ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.h +@@ -159,11 +159,4 @@ extern const number __mpexp_half attribu + #endif + #endif + +-#define RADIX __mpexp_radix.d +-#define RADIXI __mpexp_radixi.d +-#define ZERO __mpexp_zero.d +-#define ONE __mpexp_one.d +-#define TWO __mpexp_two.d +-#define HALF __mpexp_half.d +- + #endif +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpsqrt.h ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.h +@@ -51,7 +51,4 @@ extern const int __mpsqrt_mp[33] attribu + 4,4,4,4,4,4,4,4,4}; + #endif + +-#define ONE __mpsqrt_one.d +-#define HALFRAD __mpsqrt_halfrad.d +- + #endif +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mptan.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mptan.c ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mptan.c +@@ -47,8 +47,6 @@ void + SECTION + __mptan(double x, mp_no *mpy, int p) { + +- static const double MONE = -1.0; +- + int n; + mp_no mpw, mpc, mps; + +@@ -56,7 +54,7 @@ __mptan(double x, mp_no *mpy, int p) { + __c32(&mpw, &mpc, &mps, p); /* computing sin(x) and cos(x) */ + if (n) /* second or fourth quarter of unit circle */ + { __dvd(&mpc,&mps,mpy,p); +- mpy->d[0] *= MONE; ++ mpy->d[0] *= -1; + } /* tan is negative in this area */ + else __dvd(&mps,&mpc,mpy,p); + +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/ulog.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/ulog.h ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/ulog.h +@@ -181,10 +181,6 @@ + #endif + #endif + +-#define ZERO zero.d +-#define ONE one.d +-#define HALF half.d +-#define MHALF mhalf.d + #define SQRT_2 sqrt_2.d + #define DEL_U delu.d + #define DEL_V delv.d +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/utan.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/utan.h ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/utan.h +@@ -270,10 +270,4 @@ + #endif + #endif + +- +-#define ZERO zero.d +-#define ONE one.d +-#define MONE mone.d +-#define TWO8 two8.d +- + #endif +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa-arch.h +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa-arch.h +@@ -0,0 +1,47 @@ ++/* Overridable constants and operations. ++ Copyright (C) 2013 Free Software Foundation, Inc. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU Lesser General Public License as published by ++ the Free Software Foundation; either version 2.1 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public License ++ along with this program; if not, see . */ ++ ++#include ++ ++typedef long mantissa_t; ++typedef int64_t mantissa_store_t; ++ ++#define TWOPOW(i) (1L << i) ++ ++#define RADIX_EXP 24 ++#define RADIX TWOPOW (RADIX_EXP) /* 2^24 */ ++ ++/* Divide D by RADIX and put the remainder in R. D must be a non-negative ++ integral value. */ ++#define DIV_RADIX(d, r) \ ++ ({ \ ++ r = d & (RADIX - 1); \ ++ d >>= RADIX_EXP; \ ++ }) ++ ++/* Put the integer component of a double X in R and retain the fraction in ++ X. This is used in extracting mantissa digits for MP_NO by using the ++ integer portion of the current value of the number as the current mantissa ++ digit and then scaling by RADIX to get the next mantissa digit in the same ++ manner. */ ++#define INTEGER_OF(x, i) \ ++ ({ \ ++ i = (mantissa_t) x; \ ++ x -= i; \ ++ }) ++ ++/* Align IN down to F. The code assumes that F is a power of two. */ ++#define ALIGN_DOWN_TO(in, f) ((in) & -(f)) +Index: glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa-arch.h +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa-arch.h +@@ -0,0 +1,56 @@ ++/* Overridable constants and operations. ++ Copyright (C) 2013-2017 Free Software Foundation, Inc. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU Lesser General Public License as published by ++ the Free Software Foundation; either version 2.1 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public License ++ along with this program; if not, see . */ ++ ++typedef double mantissa_t; ++typedef double mantissa_store_t; ++ ++#define TWOPOW(i) (0x1.0p##i) ++ ++#define RADIX TWOPOW (24) /* 2^24 */ ++#define CUTTER TWOPOW (76) /* 2^76 */ ++#define RADIXI 0x1.0p-24 /* 2^-24 */ ++#define TWO52 TWOPOW (52) /* 2^52 */ ++ ++/* Divide D by RADIX and put the remainder in R. */ ++#define DIV_RADIX(d,r) \ ++ ({ \ ++ double u = ((d) + CUTTER) - CUTTER; \ ++ if (u > (d)) \ ++ u -= RADIX; \ ++ r = (d) - u; \ ++ (d) = u * RADIXI; \ ++ }) ++ ++/* Put the integer component of a double X in R and retain the fraction in ++ X. */ ++#define INTEGER_OF(x, r) \ ++ ({ \ ++ double u = ((x) + TWO52) - TWO52; \ ++ if (u > (x)) \ ++ u -= 1; \ ++ (r) = u; \ ++ (x) -= u; \ ++ }) ++ ++/* Align IN down to a multiple of F, where F is a power of two. */ ++#define ALIGN_DOWN_TO(in, f) \ ++ ({ \ ++ double factor = f * TWO52; \ ++ double u = (in + factor) - factor; \ ++ if (u > in) \ ++ u -= f; \ ++ u; \ ++ }) +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_atan2.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/e_atan2.c ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_atan2.c +@@ -98,15 +98,15 @@ __ieee754_atan2(double y,double x) { + /* y=+-0 */ + if (uy==0x00000000) { + if (dy==0x00000000) { +- if ((ux&0x80000000)==0x00000000) return ZERO; ++ if ((ux&0x80000000)==0x00000000) return 0; + else return opi.d; } } + else if (uy==0x80000000) { + if (dy==0x00000000) { +- if ((ux&0x80000000)==0x00000000) return MZERO; ++ if ((ux&0x80000000)==0x00000000) return -0.0; + else return mopi.d;} } + + /* x=+-0 */ +- if (x==ZERO) { ++ if (x==0) { + if ((uy&0x80000000)==0x00000000) return hpi.d; + else return mhpi.d; } + +@@ -118,8 +118,8 @@ __ieee754_atan2(double y,double x) { + else if (uy==0xfff00000) { + if (dy==0x00000000) return mqpi.d; } + else { +- if ((uy&0x80000000)==0x00000000) return ZERO; +- else return MZERO; } ++ if ((uy&0x80000000)==0x00000000) return 0; ++ else return -0.0; } + } + } + else if (ux==0xfff00000) { +@@ -141,14 +141,14 @@ __ieee754_atan2(double y,double x) { + if (dy==0x00000000) return mhpi.d; } + + /* either x/y or y/x is very close to zero */ +- ax = (x=ep) { return ((y>ZERO) ? hpi.d : mhpi.d); } ++ if (de>=ep) { return ((y>0) ? hpi.d : mhpi.d); } + else if (de<=em) { +- if (x>ZERO) { ++ if (x>0) { + if ((z=ay/ax)ZERO) ? opi.d : mopi.d); } } ++ else { return ((y>0) ? opi.d : mopi.d); } } + + /* if either x or y is extremely close to zero, scale abs(x), abs(y). */ + if (axZERO) { ++ if (x>0) { + + /* (i) x>0, abs(y)< abs(x): atan(ay/ax) */ + if (ay U03, 1)) { goto case_03; } + + +@@ -113,25 +113,25 @@ __ieee754_log(double x) { + w*(d17.d+w*(d18.d+w*(d19.d+w*d20.d)))))))); + EMULV(w,a,s2,ss2,t1,t2,t3,t4,t5) + ADD2(d10.d,dd10.d,s2,ss2,s3,ss3,t1,t2) +- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ++ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) + ADD2(d9.d,dd9.d,s2,ss2,s3,ss3,t1,t2) +- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ++ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) + ADD2(d8.d,dd8.d,s2,ss2,s3,ss3,t1,t2) +- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ++ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) + ADD2(d7.d,dd7.d,s2,ss2,s3,ss3,t1,t2) +- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ++ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) + ADD2(d6.d,dd6.d,s2,ss2,s3,ss3,t1,t2) +- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ++ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) + ADD2(d5.d,dd5.d,s2,ss2,s3,ss3,t1,t2) +- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ++ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) + ADD2(d4.d,dd4.d,s2,ss2,s3,ss3,t1,t2) +- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ++ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) + ADD2(d3.d,dd3.d,s2,ss2,s3,ss3,t1,t2) +- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ++ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) + ADD2(d2.d,dd2.d,s2,ss2,s3,ss3,t1,t2) +- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) +- MUL2(w,ZERO,s2,ss2,s3,ss3,t1,t2,t3,t4,t5,t6,t7,t8) +- ADD2(w,ZERO, s3,ss3, b, bb,t1,t2) ++ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ++ MUL2(w,0,s2,ss2,s3,ss3,t1,t2,t3,t4,t5,t6,t7,t8) ++ ADD2(w,0, s3,ss3, b, bb,t1,t2) + + /* End stage II, case abs(x-1) < 0.03 */ + if ((y=b+(bb+b*E4)) == b+(bb-b*E4)) return y; +@@ -155,7 +155,7 @@ __ieee754_log(double x) { + j = (num.i[HIGH_HALF] & 0x000fffff) >> 4; + + /* Compute w=(u-ui*vj)/(ui*vj) */ +- p0=(ONE+(i-75)*DEL_U)*(ONE+(j-180)*DEL_V); ++ p0=(1+(i-75)*DEL_U)*(1+(j-180)*DEL_V); + q=u-p0; r0=Iu[i].d*Iv[j].d; w=q*r0; + + /* Evaluate polynomial I */ +@@ -178,11 +178,11 @@ __ieee754_log(double x) { + + /* Improve the accuracy of r0 */ + EMULV(p0,r0,sa,sb,t1,t2,t3,t4,t5) +- t=r0*((ONE-sa)-sb); ++ t=r0*((1-sa)-sb); + EADD(r0,t,ra,rb) + + /* Compute w */ +- MUL2(q,ZERO,ra,rb,w,ww,t1,t2,t3,t4,t5,t6,t7,t8) ++ MUL2(q,0,ra,rb,w,ww,t1,t2,t3,t4,t5,t6,t7,t8) + + EADD(A,B0,a0,aa0) + +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_atan.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/s_atan.c ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_atan.c +@@ -82,7 +82,7 @@ double atan(double x) { + return x+x; + + /* Regular values of x, including denormals +-0 and +-INF */ +- u = (x= 1/2 */ + if ((y=t1+(yy-u3)) == t1+(yy+u3)) return __signArctan(x,y); + +- DIV2(ONE,ZERO,u,ZERO,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10) ++ DIV2(1,0,u,0,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10) + t1=w-hij[i][0].d; + EADD(t1,ww,z,zz) + s1=z*(hij[i][11].d+z*(hij[i][12].d+z*(hij[i][13].d+ + z*(hij[i][14].d+z* hij[i][15].d)))); +- ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2) ++ ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2) + MUL2(z,zz,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) + ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2) + MUL2(z,zz,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) +@@ -173,18 +173,18 @@ double atan(double x) { + } + else { + if (u__atan_xm[m].d) break;} + } + mpone.e = mptwo.e = mptwoim1.e = 1; +- mpone.d[0] = mpone.d[1] = mptwo.d[0] = mptwoim1.d[0] = ONE; +- mptwo.d[1] = TWO; ++ mpone.d[0] = mpone.d[1] = mptwo.d[0] = mptwoim1.d[0] = 1; ++ mptwo.d[1] = 2; + + /* Reduce x m times */ + __mul(x,x,&mpsm,p); +@@ -92,7 +92,7 @@ __mpatan(mp_no *x, mp_no *y, int p) { + n=__atan_np[p]; mptwoim1.d[1] = __atan_twonm1[p].d; + __dvd(&mpsm,&mptwoim1,&mpt,p); + for (i=n-1; i>1; i--) { +- mptwoim1.d[1] -= TWO; ++ mptwoim1.d[1] -= 2; + __dvd(&mpsm,&mptwoim1,&mpt1,p); + __mul(&mpsm,&mpt,&mpt2,p); + __sub(&mpt1,&mpt2,&mpt,p); +Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpsqrt.c ++++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.c +@@ -62,8 +62,8 @@ __mpsqrt(mp_no *x, mp_no *y, int p) { + mp_no mpxn,mpz,mpu,mpt1,mpt2; + + /* Prepare multi-precision 1/2 and 3/2 */ +- mphalf.e =0; mphalf.d[0] =ONE; mphalf.d[1] =HALFRAD; +- mp3halfs.e=1; mp3halfs.d[0]=ONE; mp3halfs.d[1]=ONE; mp3halfs.d[2]=HALFRAD; ++ mphalf.e =0; mphalf.d[0] =1; mphalf.d[1] =HALFRAD; ++ mp3halfs.e=1; mp3halfs.d[0]=1; mp3halfs.d[1]=1; mp3halfs.d[2]=HALFRAD; + + ey=EX/2; __cpy(x,&mpxn,p); mpxn.e -= (ey+ey); + __mp_dbl(&mpxn,&dx,p); dy=fastiroot(dx); __dbl_mp(dy,&mpu,p); diff --git a/SOURCES/glibc-rh1413638-1.patch b/SOURCES/glibc-rh1413638-1.patch new file mode 100644 index 0000000..f5255d5 --- /dev/null +++ b/SOURCES/glibc-rh1413638-1.patch @@ -0,0 +1,16 @@ +commit cb756c6d686242acdc942e3d4276e399a69a6f02 +Author: Marcus Shawcroft +Date: Tue Dec 17 18:12:30 2013 +0000 + + Compile e_sqrt.c with -ffp-contract=off. + +diff --git a/sysdeps/ieee754/dbl-64/Makefile b/sysdeps/ieee754/dbl-64/Makefile +index 1a7b311..35f545f 100644 +--- a/sysdeps/ieee754/dbl-64/Makefile ++++ b/sysdeps/ieee754/dbl-64/Makefile +@@ -1,4 +1,5 @@ + ifeq ($(subdir),math) + # branred depends on precise IEEE double rounding + CFLAGS-branred.c = $(config-cflags-nofma) ++CFLAGS-e_sqrt.c = $(config-cflags-nofma) + endif diff --git a/SOURCES/glibc-rh1413638-2.patch b/SOURCES/glibc-rh1413638-2.patch new file mode 100644 index 0000000..7343462 --- /dev/null +++ b/SOURCES/glibc-rh1413638-2.patch @@ -0,0 +1,19 @@ +commit d421868bb85d1459b1d2df520bb26f3e11aa195a +Author: Adhemerval Zanella +Date: Tue Mar 10 09:38:54 2015 -0400 + + powerpc: Fix incorrect results for pow when using FMA + + This patch adds no FMA generation for e_pow to avoid precision issues + for powerpc. This fixes BZ#18104. + +diff --git a/sysdeps/ieee754/dbl-64/Makefile b/sysdeps/ieee754/dbl-64/Makefile +index 35f545f..5557c75 100644 +--- a/sysdeps/ieee754/dbl-64/Makefile ++++ b/sysdeps/ieee754/dbl-64/Makefile +@@ -2,4 +2,5 @@ ifeq ($(subdir),math) + # branred depends on precise IEEE double rounding + CFLAGS-branred.c = $(config-cflags-nofma) + CFLAGS-e_sqrt.c = $(config-cflags-nofma) ++CFLAGS-e_pow.c = $(config-cflags-nofma) + endif diff --git a/SOURCES/glibc-rh1417205.patch b/SOURCES/glibc-rh1417205.patch new file mode 100644 index 0000000..ff3393b --- /dev/null +++ b/SOURCES/glibc-rh1417205.patch @@ -0,0 +1,46 @@ +commit 164fd39d05925717e75715929c7ced14a2c1505e +Author: Andreas Jaeger +Date: Fri May 3 20:51:27 2013 +0200 + + Sync with Linux 3.9 + + * sysdeps/gnu/netinet/tcp.h (TCP_TIMESTAMP): New value, from + Linux 3.9. + * sysdeps/unix/sysv/linux/bits/socket.h (PF_VSOCK, AF_VSOCK): + Add. + (PF_MAX): Adjust for VSOCK change. + +Index: b/sysdeps/gnu/netinet/tcp.h +=================================================================== +--- a/sysdeps/gnu/netinet/tcp.h ++++ b/sysdeps/gnu/netinet/tcp.h +@@ -60,6 +60,7 @@ + #define TCP_QUEUE_SEQ 21 /* Set sequence number of repaired queue. */ + #define TCP_REPAIR_OPTIONS 22 /* Repair TCP connection options */ + #define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ ++#define TCP_TIMESTAMP 24 /* TCP time stamp */ + + #ifdef __USE_MISC + # include +Index: b/sysdeps/unix/sysv/linux/bits/socket.h +=================================================================== +--- a/sysdeps/unix/sysv/linux/bits/socket.h ++++ b/sysdeps/unix/sysv/linux/bits/socket.h +@@ -80,7 +80,8 @@ typedef __socklen_t socklen_t; + #define PF_CAIF 37 /* CAIF sockets. */ + #define PF_ALG 38 /* Algorithm sockets. */ + #define PF_NFC 39 /* NFC sockets. */ +-#define PF_MAX 40 /* For now.. */ ++#define PF_VSOCK 40 /* vSockets. */ ++#define PF_MAX 41 /* For now.. */ + + /* Address families. */ + #define AF_UNSPEC PF_UNSPEC +@@ -124,6 +125,7 @@ typedef __socklen_t socklen_t; + #define AF_CAIF PF_CAIF + #define AF_ALG PF_ALG + #define AF_NFC PF_NFC ++#define AF_VSOCK PF_VSOCK + #define AF_MAX PF_MAX + + /* Socket level values. Others are defined in the appropriate headers. diff --git a/SOURCES/glibc-rh1418978-0.patch b/SOURCES/glibc-rh1418978-0.patch new file mode 100644 index 0000000..5811cff --- /dev/null +++ b/SOURCES/glibc-rh1418978-0.patch @@ -0,0 +1,108 @@ +This patch backports the makefile changes related to the support/ +framework. It is based on upstream commits +c23de0aacbeaa7a091609b35764bed931475a16d (support: Introduce new +subdirectory for test infrastructure) and +76dcbf42df83c970c13c786d287f1ec69e1b91eb (Expose linking against +libsupport as make dependency). + +The actual contents of the support/ subdirectory is kept in a separate +patch, so that it can be updated separately. + +Index: b/Makeconfig +=================================================================== +--- a/Makeconfig ++++ b/Makeconfig +@@ -384,6 +384,9 @@ LDFLAGS.so += $(hashstyle-LDFLAGS) + LDFLAGS-rtld += $(hashstyle-LDFLAGS) + endif + ++# Additional libraries to link into every test. ++link-extra-libs-tests = $(libsupport) ++ + # Command for linking PIE programs with the C library. + ifndef +link-pie + +link-pie = $(CC) -pie -Wl,-O1 -nostdlib -nostartfiles -o $@ \ +@@ -464,7 +467,7 @@ link-libc-before-gnulib = -Wl,-rpath-lin + link-libc = $(link-libc-before-gnulib) $(gnulib) + link-libc-tests = $(link-libc-before-gnulib) $(gnulib-tests) + # This is how to find at build-time things that will be installed there. +-rpath-dirs = math elf dlfcn nss nis rt resolv crypt ++rpath-dirs = math elf dlfcn nss nis rt resolv crypt support + rpath-link = \ + $(common-objdir):$(subst $(empty) ,:,$(patsubst ../$(subdir),.,$(rpath-dirs:%=$(common-objpfx)%))) + else +@@ -732,7 +735,7 @@ libio-include = -I$(..)libio + # List of non-library modules that we build. + built-modules = iconvprogs iconvdata ldconfig lddlibc4 libmemusage \ + libSegFault libpcprofile librpcsvc locale-programs \ +- memusagestat nonlib nscd extramodules libnldbl ++ memusagestat nonlib nscd extramodules libnldbl libsupport + + in-module = $(subst -,_,$(firstword $(libof-$(basename $(@F))) \ + $(libof-$( +Date: Sat Jan 28 19:13:34 2017 -0500 + + Bug 20116: Fix use after free in pthread_create() + + The commit documents the ownership rules around 'struct pthread' and + when a thread can read or write to the descriptor. With those ownership + rules in place it becomes obvious that pd->stopped_start should not be + touched in several of the paths during thread startup, particularly so + for detached threads. In the case of detached threads, between the time + the thread is created by the OS kernel and the creating thread checks + pd->stopped_start, the detached thread might have already exited and the + memory for pd unmapped. As a regression test we add a simple test which + exercises this exact case by quickly creating detached threads with + large enough stacks to ensure the thread stack cache is bypassed and the + stacks are unmapped. Before the fix the testcase segfaults, after the + fix it works correctly and completes without issue. + + For a detailed discussion see: + https://www.sourceware.org/ml/libc-alpha/2017-01/msg00505.html + +diff --git a/support/Makefile b/support/Makefile +new file mode 100644 +index 0000000..2ace559 +--- /dev/null ++++ b/support/Makefile +@@ -0,0 +1,128 @@ ++# Makefile for support library, used only at build and test time ++# Copyright (C) 2016-2017 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++ ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++ ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++ ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++subdir := support ++ ++include ../Makeconfig ++ ++extra-libs := libsupport ++extra-libs-others = $(extra-libs) ++extra-libs-noinstall := $(extra-libs) ++ ++libsupport-routines = \ ++ check \ ++ check_addrinfo \ ++ check_dns_packet \ ++ check_hostent \ ++ check_netent \ ++ delayed_exit \ ++ ignore_stderr \ ++ oom_error \ ++ resolv_test \ ++ set_fortify_handler \ ++ support_become_root \ ++ support_enter_network_namespace \ ++ support_format_address_family \ ++ support_format_addrinfo \ ++ support_format_dns_packet \ ++ support_format_herrno \ ++ support_format_hostent \ ++ support_format_netent \ ++ support_record_failure \ ++ support_run_diff \ ++ support_test_main \ ++ support_test_verify_impl \ ++ temp_file \ ++ write_message \ ++ xaccept \ ++ xasprintf \ ++ xbind \ ++ xcalloc \ ++ xconnect \ ++ xfclose \ ++ xfopen \ ++ xfork \ ++ xgetsockname \ ++ xlisten \ ++ xmalloc \ ++ xmemstream \ ++ xmmap \ ++ xmunmap \ ++ xpoll \ ++ xpthread_attr_destroy \ ++ xpthread_attr_init \ ++ xpthread_attr_setdetachstate \ ++ xpthread_attr_setstacksize \ ++ xpthread_barrier_destroy \ ++ xpthread_barrier_init \ ++ xpthread_barrier_wait \ ++ xpthread_cancel \ ++ xpthread_check_return \ ++ xpthread_cond_wait \ ++ xpthread_create \ ++ xpthread_detach \ ++ xpthread_join \ ++ xpthread_mutex_consistent \ ++ xpthread_mutex_destroy \ ++ xpthread_mutex_init \ ++ xpthread_mutex_lock \ ++ xpthread_mutex_unlock \ ++ xpthread_mutexattr_destroy \ ++ xpthread_mutexattr_init \ ++ xpthread_mutexattr_setprotocol \ ++ xpthread_mutexattr_setpshared \ ++ xpthread_mutexattr_setrobust \ ++ xpthread_mutexattr_settype \ ++ xpthread_once \ ++ xpthread_sigmask \ ++ xpthread_spin_lock \ ++ xpthread_spin_unlock \ ++ xrealloc \ ++ xrecvfrom \ ++ xsendto \ ++ xsetsockopt \ ++ xsocket \ ++ xstrdup \ ++ xwaitpid \ ++ xwrite \ ++ ++libsupport-static-only-routines := $(libsupport-routines) ++# Only build one variant of the library. ++libsupport-inhibit-o := .os ++ifeq ($(build-shared),yes) ++libsupport-inhibit-o += .o ++endif ++ ++tests = \ ++ README-testing \ ++ tst-support-namespace \ ++ tst-support_record_failure \ ++ ++ifeq ($(run-built-tests),yes) ++tests-special = \ ++ $(objpfx)tst-support_record_failure-2.out ++ ++$(objpfx)tst-support_record_failure-2.out: tst-support_record_failure-2.sh \ ++ $(objpfx)tst-support_record_failure ++ $(SHELL) $< $(common-objpfx) '$(test-program-prefix-before-env)' \ ++ '$(run-program-env)' '$(test-program-prefix-after-env)' \ ++ > $@; \ ++ $(evaluate-test) ++endif ++ ++include ../Rules +diff --git a/support/README b/support/README +new file mode 100644 +index 0000000..476cfcd +--- /dev/null ++++ b/support/README +@@ -0,0 +1,29 @@ ++This subdirectory contains infrastructure which is not put into ++installed libraries, but may be linked into programs (installed or ++not) and tests. ++ ++# Error-checking wrappers ++ ++These wrappers test for error return codes an terminate the process on ++error. They are declared in these header files: ++ ++* support.h ++* xsignal.h ++* xthread.h ++ ++In general, new wrappers should be added to support.h if possible. ++However, support.h must remain fully compatible with C90 and therefore ++cannot include headers which use identifers not reserved in C90. If ++the wrappers need additional types, additional headers such as ++signal.h need to be introduced. ++ ++# Test framework ++ ++The test framework provides a main program for tests, including a ++timeout for hanging tests. See README-testing.c for a minimal ++example, and test-driver.c for details how to use it. The following ++header files provide related declarations: ++ ++* check.h ++* temp_file.h ++* test-driver.h +diff --git a/support/README-testing.c b/support/README-testing.c +new file mode 100644 +index 0000000..9d289c3 +--- /dev/null ++++ b/support/README-testing.c +@@ -0,0 +1,19 @@ ++/* This file contains an example test case which shows minimal use of ++ the test framework. Additional testing hooks are described in ++ . */ ++ ++/* This function will be called from the test driver. */ ++static int ++do_test (void) ++{ ++ if (3 == 5) ++ /* Indicate failure. */ ++ return 1; ++ else ++ /* Indicate success. */ ++ return 0; ++} ++ ++/* This file references do_test above and contains the definition of ++ the main function. */ ++#include +diff --git a/support/check.c b/support/check.c +new file mode 100644 +index 0000000..592f2bc +--- /dev/null ++++ b/support/check.c +@@ -0,0 +1,57 @@ ++/* Support code for reporting test results. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++static void ++print_failure (const char *file, int line, const char *format, va_list ap) ++{ ++ printf ("error: %s:%d: ", file, line); ++ vprintf (format, ap); ++ puts (""); ++} ++ ++int ++support_print_failure_impl (const char *file, int line, ++ const char *format, ...) ++{ ++ support_record_failure (); ++ va_list ap; ++ va_start (ap, format); ++ print_failure (file, line, format, ap); ++ va_end (ap); ++ return 1; ++} ++ ++void ++support_exit_failure_impl (int status, const char *file, int line, ++ const char *format, ...) ++{ ++ if (status != EXIT_SUCCESS && status != EXIT_UNSUPPORTED) ++ support_record_failure (); ++ va_list ap; ++ va_start (ap, format); ++ print_failure (file, line, format, ap); ++ va_end (ap); ++ exit (status); ++} +diff --git a/support/check.h b/support/check.h +new file mode 100644 +index 0000000..1d244a3 +--- /dev/null ++++ b/support/check.h +@@ -0,0 +1,94 @@ ++/* Functionality for reporting test results. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_CHECK_H ++#define SUPPORT_CHECK_H ++ ++#include ++ ++__BEGIN_DECLS ++ ++/* Record a test failure, print the failure message to standard output ++ and return 1. */ ++#define FAIL_RET(...) \ ++ return support_print_failure_impl (__FILE__, __LINE__, __VA_ARGS__) ++ ++/* Print the failure message and terminate the process with STATUS. ++ Record a the process as failed if STATUS is neither EXIT_SUCCESS ++ nor EXIT_UNSUPPORTED. */ ++#define FAIL_EXIT(status, ...) \ ++ support_exit_failure_impl (status, __FILE__, __LINE__, __VA_ARGS__) ++ ++/* Record a test failure, print the failure message and terminate with ++ exit status 1. */ ++#define FAIL_EXIT1(...) \ ++ support_exit_failure_impl (1, __FILE__, __LINE__, __VA_ARGS__) ++ ++/* Print failure message and terminate with as unsupported test (exit ++ status of 77). */ ++#define FAIL_UNSUPPORTED(...) \ ++ support_exit_failure_impl (77, __FILE__, __LINE__, __VA_ARGS__) ++ ++/* Record a test failure (but continue executing) if EXPR evaluates to ++ false. */ ++#define TEST_VERIFY(expr) \ ++ ({ \ ++ if (expr) \ ++ ; \ ++ else \ ++ support_test_verify_impl (-1, __FILE__, __LINE__, #expr); \ ++ }) ++ ++/* Record a test failure and exit if EXPR evaluates to false. */ ++#define TEST_VERIFY_EXIT(expr) \ ++ ({ \ ++ if (expr) \ ++ ; \ ++ else \ ++ support_test_verify_impl (1, __FILE__, __LINE__, #expr); \ ++ }) ++ ++int support_print_failure_impl (const char *file, int line, ++ const char *format, ...) ++ __attribute__ ((nonnull (1), format (printf, 3, 4))); ++void support_exit_failure_impl (int exit_status, ++ const char *file, int line, ++ const char *format, ...) ++ __attribute__ ((noreturn, nonnull (2), format (printf, 4, 5))); ++void support_test_verify_impl (int status, const char *file, int line, ++ const char *expr); ++ ++/* Record a test failure. This function returns and does not ++ terminate the process. The failure counter is stored in a shared ++ memory mapping, so that failures reported in child processes are ++ visible to the parent process and test driver. This function ++ depends on initialization by an ELF constructor, so it can only be ++ invoked after the test driver has run. Note that this function ++ does not support reporting failures from a DSO. */ ++void support_record_failure (void); ++ ++/* Internal function called by the test driver. */ ++int support_report_failure (int status) ++ __attribute__ ((weak, warn_unused_result)); ++ ++/* Internal function used to test the failure recording framework. */ ++void support_record_failure_reset (void); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_CHECK_H */ +diff --git a/support/check_addrinfo.c b/support/check_addrinfo.c +new file mode 100644 +index 0000000..55895ac +--- /dev/null ++++ b/support/check_addrinfo.c +@@ -0,0 +1,42 @@ ++/* Compare struct addrinfo values against a formatted string. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++void ++check_addrinfo (const char *query_description, struct addrinfo *ai, int ret, ++ const char *expected) ++{ ++ char *formatted = support_format_addrinfo (ai, ret); ++ if (strcmp (formatted, expected) != 0) ++ { ++ support_record_failure (); ++ printf ("error: addrinfo comparison failure\n"); ++ if (query_description != NULL) ++ printf ("query: %s\n", query_description); ++ support_run_diff ("expected", expected, ++ "actual", formatted); ++ } ++ free (formatted); ++} +diff --git a/support/check_dns_packet.c b/support/check_dns_packet.c +new file mode 100644 +index 0000000..d2a31be +--- /dev/null ++++ b/support/check_dns_packet.c +@@ -0,0 +1,42 @@ ++/* Check that a DNS packet buffer has the expected contents. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++void ++check_dns_packet (const char *query_description, ++ const unsigned char *buffer, size_t length, ++ const char *expected) ++{ ++ char *formatted = support_format_dns_packet (buffer, length); ++ if (strcmp (formatted, expected) != 0) ++ { ++ support_record_failure (); ++ printf ("error: packet comparison failure\n"); ++ if (query_description != NULL) ++ printf ("query: %s\n", query_description); ++ support_run_diff ("expected", expected, "actual", formatted); ++ } ++ free (formatted); ++} +diff --git a/support/check_hostent.c b/support/check_hostent.c +new file mode 100644 +index 0000000..890d672 +--- /dev/null ++++ b/support/check_hostent.c +@@ -0,0 +1,42 @@ ++/* Compare struct hostent values against a formatted string. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++void ++check_hostent (const char *query_description, struct hostent *h, ++ const char *expected) ++{ ++ char *formatted = support_format_hostent (h); ++ if (strcmp (formatted, expected) != 0) ++ { ++ support_record_failure (); ++ printf ("error: hostent comparison failure\n"); ++ if (query_description != NULL) ++ printf ("query: %s\n", query_description); ++ support_run_diff ("expected", expected, ++ "actual", formatted); ++ } ++ free (formatted); ++} +diff --git a/support/check_netent.c b/support/check_netent.c +new file mode 100644 +index 0000000..daa3083 +--- /dev/null ++++ b/support/check_netent.c +@@ -0,0 +1,42 @@ ++/* Compare struct netent values against a formatted string. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++void ++check_netent (const char *query_description, struct netent *e, ++ const char *expected) ++{ ++ char *formatted = support_format_netent (e); ++ if (strcmp (formatted, expected) != 0) ++ { ++ support_record_failure (); ++ printf ("error: netent comparison failure\n"); ++ if (query_description != NULL) ++ printf ("query: %s\n", query_description); ++ support_run_diff ("expected", expected, ++ "actual", formatted); ++ } ++ free (formatted); ++} +diff --git a/support/check_nss.h b/support/check_nss.h +new file mode 100644 +index 0000000..2893f2c +--- /dev/null ++++ b/support/check_nss.h +@@ -0,0 +1,42 @@ ++/* Test verification functions for NSS- and DNS-related data. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_CHECK_NSS_H ++#define SUPPORT_CHECK_NSS_H ++ ++#include ++#include ++ ++__BEGIN_DECLS ++ ++/* Compare the data structures against the expected values (which have ++ to be formatted according to the support_format_* functions in ++ ). If there is a difference, a delayed test ++ failure is recorded, and a diff is written to standard output. */ ++void check_addrinfo (const char *query_description, ++ struct addrinfo *, int ret, const char *expected); ++void check_dns_packet (const char *query_description, ++ const unsigned char *, size_t, const char *expected); ++void check_hostent (const char *query_description, ++ struct hostent *, const char *expected); ++void check_netent (const char *query_description, ++ struct netent *, const char *expected); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_CHECK_NSS_H */ +diff --git a/support/delayed_exit.c b/support/delayed_exit.c +new file mode 100644 +index 0000000..67442f9 +--- /dev/null ++++ b/support/delayed_exit.c +@@ -0,0 +1,55 @@ ++/* Time-triggered process termination. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++static void * ++delayed_exit_thread (void *seconds_as_ptr) ++{ ++ int seconds = (uintptr_t) seconds_as_ptr; ++ struct timespec delay = { seconds, 0 }; ++ struct timespec remaining = { 0 }; ++ if (nanosleep (&delay, &remaining) != 0) ++ FAIL_EXIT1 ("nanosleep: %m"); ++ /* Exit the process sucessfully. */ ++ exit (0); ++ return NULL; ++} ++ ++void ++delayed_exit (int seconds) ++{ ++ /* Create the new thread with all signals blocked. */ ++ sigset_t all_blocked; ++ sigfillset (&all_blocked); ++ sigset_t old_set; ++ xpthread_sigmask (SIG_SETMASK, &all_blocked, &old_set); ++ /* Create a detached thread. */ ++ pthread_t thr = xpthread_create ++ (NULL, delayed_exit_thread, (void *) (uintptr_t) seconds); ++ xpthread_detach (thr); ++ /* Restore the original signal mask. */ ++ xpthread_sigmask (SIG_SETMASK, &old_set, NULL); ++} +diff --git a/support/format_nss.h b/support/format_nss.h +new file mode 100644 +index 0000000..fb4597c +--- /dev/null ++++ b/support/format_nss.h +@@ -0,0 +1,41 @@ ++/* String formatting functions for NSS- and DNS-related data. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_FORMAT_NSS_H ++#define SUPPORT_FORMAT_NSS_H ++ ++#include ++#include ++ ++__BEGIN_DECLS ++ ++/* The following functions format their arguments as human-readable ++ strings (which can span multiple lines). The caller must free the ++ returned buffer. For NULL pointers or failure status arguments, ++ error variables such as h_errno and errno are included in the ++ result. */ ++char *support_format_address_family (int); ++char *support_format_addrinfo (struct addrinfo *, int ret); ++char *support_format_dns_packet (const unsigned char *buffer, size_t length); ++char *support_format_herrno (int); ++char *support_format_hostent (struct hostent *); ++char *support_format_netent (struct netent *); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_FORMAT_NSS_H */ +diff --git a/support/ignore_stderr.c b/support/ignore_stderr.c +new file mode 100644 +index 0000000..7b77a2c +--- /dev/null ++++ b/support/ignore_stderr.c +@@ -0,0 +1,38 @@ ++/* Avoid all the buffer overflow messages on stderr. ++ Copyright (C) 2015-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++void ++ignore_stderr (void) ++{ ++ int fd = open (_PATH_DEVNULL, O_WRONLY); ++ if (fd == -1) ++ close (STDERR_FILENO); ++ else ++ { ++ dup2 (fd, STDERR_FILENO); ++ close (fd); ++ } ++ setenv ("LIBC_FATAL_STDERR_", "1", 1); ++} +diff --git a/support/namespace.h b/support/namespace.h +new file mode 100644 +index 0000000..6bc82d6 +--- /dev/null ++++ b/support/namespace.h +@@ -0,0 +1,53 @@ ++/* Entering namespaces for test case isolation. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_NAMESPACE_H ++#define SUPPORT_NAMESPACE_H ++ ++#include ++#include ++ ++__BEGIN_DECLS ++ ++/* Attempts to become root (or acquire root-like privileges), possibly ++ with the help of user namespaces. Return true if (restricted) root ++ privileges could be attained in some way. Print diagnostics to ++ standard output. ++ ++ Note that this function generally has to be called before a process ++ becomes multi-threaded, otherwise it may fail with insufficient ++ privileges on systems which would support this operation for ++ single-threaded processes. */ ++bool support_become_root (void); ++ ++/* Enter a network namespace (and a UTS namespace if possible) and ++ configure the loopback interface. Return true if a network ++ namespace could be created. Print diagnostics to standard output. ++ If a network namespace could be created, but networking in it could ++ not be configured, terminate the process. It is recommended to ++ call support_become_root before this function so that the process ++ has sufficient privileges. */ ++bool support_enter_network_namespace (void); ++ ++/* Return true if support_enter_network_namespace managed to enter a ++ UTS namespace. */ ++bool support_in_uts_namespace (void); ++ ++__END_DECLS ++ ++#endif +diff --git a/support/oom_error.c b/support/oom_error.c +new file mode 100644 +index 0000000..7816978 +--- /dev/null ++++ b/support/oom_error.c +@@ -0,0 +1,29 @@ ++/* Reporting out-of-memory errors. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++ ++void ++oom_error (const char *function, size_t size) ++{ ++ printf ("%s: unable to allocate %zu bytes: %m\n", function, size); ++ exit (1); ++} +diff --git a/support/resolv_test.c b/support/resolv_test.c +new file mode 100644 +index 0000000..2d0ea3c +--- /dev/null ++++ b/support/resolv_test.c +@@ -0,0 +1,1150 @@ ++/* DNS test framework and libresolv redirection. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Response builder. */ ++ ++enum ++ { ++ max_response_length = 65536 ++ }; ++ ++/* List of pointers to be freed. The hash table implementation ++ (struct hsearch_data) does not provide a way to deallocate all ++ objects, so this approach is used to avoid memory leaks. */ ++struct to_be_freed ++{ ++ struct to_be_freed *next; ++ void *ptr; ++}; ++ ++struct resolv_response_builder ++{ ++ const unsigned char *query_buffer; ++ size_t query_length; ++ ++ size_t offset; /* Bytes written so far in buffer. */ ++ ns_sect section; /* Current section in the DNS packet. */ ++ unsigned int truncate_bytes; /* Bytes to remove at end of response. */ ++ bool drop; /* Discard generated response. */ ++ bool close; /* Close TCP client connection. */ ++ ++ /* Offset of the two-byte RDATA length field in the currently ++ written RDATA sub-structure. 0 if no RDATA is being written. */ ++ size_t current_rdata_offset; ++ ++ /* Hash table for locating targets for label compression. */ ++ struct hsearch_data compression_offsets; ++ /* List of pointers which need to be freed. Used for domain names ++ involved in label compression. */ ++ struct to_be_freed *to_be_freed; ++ ++ /* Must be last. Not zeroed for performance reasons. */ ++ unsigned char buffer[max_response_length]; ++}; ++ ++/* Response builder. */ ++ ++/* Add a pointer to the list of pointers to be freed when B is ++ deallocated. */ ++static void ++response_push_pointer_to_free (struct resolv_response_builder *b, void *ptr) ++{ ++ if (ptr == NULL) ++ return; ++ struct to_be_freed *e = xmalloc (sizeof (*e)); ++ *e = (struct to_be_freed) {b->to_be_freed, ptr}; ++ b->to_be_freed = e; ++} ++ ++void ++resolv_response_init (struct resolv_response_builder *b, ++ struct resolv_response_flags flags) ++{ ++ if (b->offset > 0) ++ FAIL_EXIT1 ("response_init: called at offset %zu", b->offset); ++ if (b->query_length < 12) ++ FAIL_EXIT1 ("response_init called for a query of size %zu", ++ b->query_length); ++ if (flags.rcode > 15) ++ FAIL_EXIT1 ("response_init: invalid RCODE %u", flags.rcode); ++ ++ /* Copy the transaction ID. */ ++ b->buffer[0] = b->query_buffer[0]; ++ b->buffer[1] = b->query_buffer[1]; ++ ++ /* Initialize the flags. */ ++ b->buffer[2] = 0x80; /* Mark as response. */ ++ b->buffer[2] |= b->query_buffer[2] & 0x01; /* Copy the RD bit. */ ++ if (flags.tc) ++ b->buffer[2] |= 0x02; ++ b->buffer[3] = 0x80 | flags.rcode; /* Always set RA. */ ++ ++ /* Fill in the initial section count values. */ ++ b->buffer[4] = flags.qdcount >> 8; ++ b->buffer[5] = flags.qdcount; ++ b->buffer[6] = flags.ancount >> 8; ++ b->buffer[7] = flags.ancount; ++ b->buffer[8] = flags.nscount >> 8; ++ b->buffer[9] = flags.nscount; ++ b->buffer[10] = flags.adcount >> 8; ++ b->buffer[11] = flags.adcount; ++ ++ b->offset = 12; ++} ++ ++void ++resolv_response_section (struct resolv_response_builder *b, ns_sect section) ++{ ++ if (b->offset == 0) ++ FAIL_EXIT1 ("resolv_response_section: response_init not called before"); ++ if (section < b->section) ++ FAIL_EXIT1 ("resolv_response_section: cannot go back to previous section"); ++ b->section = section; ++} ++ ++/* Add a single byte to B. */ ++static inline void ++response_add_byte (struct resolv_response_builder *b, unsigned char ch) ++{ ++ if (b->offset == max_response_length) ++ FAIL_EXIT1 ("DNS response exceeds 64 KiB limit"); ++ b->buffer[b->offset] = ch; ++ ++b->offset; ++} ++ ++/* Add a 16-bit word VAL to B, in big-endian format. */ ++static void ++response_add_16 (struct resolv_response_builder *b, uint16_t val) ++{ ++ response_add_byte (b, val >> 8); ++ response_add_byte (b, val); ++} ++ ++/* Increment the pers-section record counter in the packet header. */ ++static void ++response_count_increment (struct resolv_response_builder *b) ++{ ++ unsigned int offset = b->section; ++ offset = 4 + 2 * offset; ++ ++b->buffer[offset + 1]; ++ if (b->buffer[offset + 1] == 0) ++ { ++ /* Carry. */ ++ ++b->buffer[offset]; ++ if (b->buffer[offset] == 0) ++ /* Overflow. */ ++ FAIL_EXIT1 ("too many records in section"); ++ } ++} ++ ++void ++resolv_response_add_question (struct resolv_response_builder *b, ++ const char *name, uint16_t class, uint16_t type) ++{ ++ if (b->offset == 0) ++ FAIL_EXIT1 ("resolv_response_add_question: " ++ "resolv_response_init not called"); ++ if (b->section != ns_s_qd) ++ FAIL_EXIT1 ("resolv_response_add_question: " ++ "must be called in the question section"); ++ ++ resolv_response_add_name (b, name); ++ response_add_16 (b, type); ++ response_add_16 (b, class); ++ ++ response_count_increment (b); ++} ++ ++void ++resolv_response_add_name (struct resolv_response_builder *b, ++ const char *const origname) ++{ ++ /* Normalized name. */ ++ char *name; ++ /* Normalized name with case preserved. */ ++ char *name_case; ++ { ++ size_t namelen = strlen (origname); ++ /* Remove trailing dots. FIXME: Handle trailing quoted dots. */ ++ while (namelen > 0 && origname[namelen - 1] == '.') ++ --namelen; ++ name = xmalloc (namelen + 1); ++ name_case = xmalloc (namelen + 1); ++ /* Copy and convert to lowercase. FIXME: This needs to normalize ++ escaping as well. */ ++ for (size_t i = 0; i < namelen; ++i) ++ { ++ char ch = origname[i]; ++ name_case[i] = ch; ++ if ('A' <= ch && ch <= 'Z') ++ ch = ch - 'A' + 'a'; ++ name[i] = ch; ++ } ++ name[namelen] = 0; ++ name_case[namelen] = 0; ++ } ++ char *name_start = name; ++ char *name_case_start = name_case; ++ ++ bool compression = false; ++ while (*name) ++ { ++ /* Search for a previous name we can reference. */ ++ ENTRY new_entry = ++ { ++ .key = name, ++ .data = (void *) (uintptr_t) b->offset, ++ }; ++ ++ /* If the label can be a compression target because it is at a ++ reachable offset, add it to the hash table. */ ++ ACTION action; ++ if (b->offset < (1 << 12)) ++ action = ENTER; ++ else ++ action = FIND; ++ ++ /* Search for known compression offsets in the hash table. */ ++ ENTRY *e; ++ if (hsearch_r (new_entry, action, &e, &b->compression_offsets) == 0) ++ { ++ if (action == FIND && errno == ESRCH) ++ /* Fall through. */ ++ e = NULL; ++ else ++ FAIL_EXIT1 ("hsearch_r failure in name compression: %m"); ++ } ++ ++ /* The name is known. Reference the previous location. */ ++ if (e != NULL && e->data != new_entry.data) ++ { ++ size_t old_offset = (uintptr_t) e->data; ++ response_add_byte (b, 0xC0 | (old_offset >> 8)); ++ response_add_byte (b, old_offset); ++ compression = true; ++ break; ++ } ++ ++ /* The name does not exist yet. Write one label. First, add ++ room for the label length. */ ++ size_t buffer_label_offset = b->offset; ++ response_add_byte (b, 0); ++ ++ /* Copy the label. */ ++ while (true) ++ { ++ char ch = *name_case; ++ if (ch == '\0') ++ break; ++ ++name; ++ ++name_case; ++ if (ch == '.') ++ break; ++ /* FIXME: Handle escaping. */ ++ response_add_byte (b, ch); ++ } ++ ++ /* Patch in the label length. */ ++ size_t label_length = b->offset - buffer_label_offset - 1; ++ if (label_length == 0) ++ FAIL_EXIT1 ("empty label in name compression: %s", origname); ++ if (label_length > 63) ++ FAIL_EXIT1 ("label too long in name compression: %s", origname); ++ b->buffer[buffer_label_offset] = label_length; ++ ++ /* Continue with the tail of the name and the next label. */ ++ } ++ ++ if (compression) ++ { ++ /* If we found an immediate match for the name, we have not put ++ it into the hash table, and can free it immediately. */ ++ if (name == name_start) ++ free (name_start); ++ else ++ response_push_pointer_to_free (b, name_start); ++ } ++ else ++ { ++ /* Terminate the sequence of labels. With compression, this is ++ implicit in the compression reference. */ ++ response_add_byte (b, 0); ++ response_push_pointer_to_free (b, name_start); ++ } ++ ++ free (name_case_start); ++} ++ ++void ++resolv_response_open_record (struct resolv_response_builder *b, ++ const char *name, ++ uint16_t class, uint16_t type, uint32_t ttl) ++{ ++ if (b->section == ns_s_qd) ++ FAIL_EXIT1 ("resolv_response_open_record called in question section"); ++ if (b->current_rdata_offset != 0) ++ FAIL_EXIT1 ("resolv_response_open_record called with open record"); ++ ++ resolv_response_add_name (b, name); ++ response_add_16 (b, type); ++ response_add_16 (b, class); ++ response_add_16 (b, ttl >> 16); ++ response_add_16 (b, ttl); ++ ++ b->current_rdata_offset = b->offset; ++ /* Add room for the RDATA length. */ ++ response_add_16 (b, 0); ++} ++ ++ ++void ++resolv_response_close_record (struct resolv_response_builder *b) ++{ ++ size_t rdata_offset = b->current_rdata_offset; ++ if (rdata_offset == 0) ++ FAIL_EXIT1 ("response_close_record called without open record"); ++ size_t rdata_length = b->offset - rdata_offset - 2; ++ if (rdata_length > 65535) ++ FAIL_EXIT1 ("RDATA length %zu exceeds limit", rdata_length); ++ b->buffer[rdata_offset] = rdata_length >> 8; ++ b->buffer[rdata_offset + 1] = rdata_length; ++ response_count_increment (b); ++ b->current_rdata_offset = 0; ++} ++ ++void ++resolv_response_add_data (struct resolv_response_builder *b, ++ const void *data, size_t length) ++{ ++ size_t remaining = max_response_length - b->offset; ++ if (remaining < length) ++ FAIL_EXIT1 ("resolv_response_add_data: not enough room for %zu bytes", ++ length); ++ memcpy (b->buffer + b->offset, data, length); ++ b->offset += length; ++} ++ ++void ++resolv_response_drop (struct resolv_response_builder *b) ++{ ++ b->drop = true; ++} ++ ++void ++resolv_response_close (struct resolv_response_builder *b) ++{ ++ b->close = true; ++} ++ ++void ++resolv_response_truncate_data (struct resolv_response_builder *b, size_t count) ++{ ++ if (count > 65535) ++ FAIL_EXIT1 ("resolv_response_truncate_data: argument too large: %zu", ++ count); ++ b->truncate_bytes = count; ++} ++ ++ ++size_t ++resolv_response_length (const struct resolv_response_builder *b) ++{ ++ return b->offset; ++} ++ ++unsigned char * ++resolv_response_buffer (const struct resolv_response_builder *b) ++{ ++ unsigned char *result = xmalloc (b->offset); ++ memcpy (result, b->buffer, b->offset); ++ return result; ++} ++ ++static struct resolv_response_builder * ++response_builder_allocate ++ (const unsigned char *query_buffer, size_t query_length) ++{ ++ struct resolv_response_builder *b = xmalloc (sizeof (*b)); ++ memset (b, 0, offsetof (struct resolv_response_builder, buffer)); ++ b->query_buffer = query_buffer; ++ b->query_length = query_length; ++ TEST_VERIFY_EXIT (hcreate_r (10000, &b->compression_offsets) != 0); ++ return b; ++} ++ ++static void ++response_builder_free (struct resolv_response_builder *b) ++{ ++ struct to_be_freed *current = b->to_be_freed; ++ while (current != NULL) ++ { ++ struct to_be_freed *next = current->next; ++ free (current->ptr); ++ free (current); ++ current = next; ++ } ++ hdestroy_r (&b->compression_offsets); ++ free (b); ++} ++ ++/* DNS query processing. */ ++ ++/* Data extracted from the question section of a DNS packet. */ ++struct query_info ++{ ++ char qname[MAXDNAME]; ++ uint16_t qclass; ++ uint16_t qtype; ++}; ++ ++/* Update *INFO from the specified DNS packet. */ ++static void ++parse_query (struct query_info *info, ++ const unsigned char *buffer, size_t length) ++{ ++ if (length < 12) ++ FAIL_EXIT1 ("malformed DNS query: too short: %zu bytes", length); ++ ++ int ret = dn_expand (buffer, buffer + length, buffer + 12, ++ info->qname, sizeof (info->qname)); ++ if (ret < 0) ++ FAIL_EXIT1 ("malformed DNS query: cannot uncompress QNAME"); ++ ++ /* Obtain QTYPE and QCLASS. */ ++ size_t remaining = length - (12 + ret); ++ struct ++ { ++ uint16_t qtype; ++ uint16_t qclass; ++ } qtype_qclass; ++ if (remaining < sizeof (qtype_qclass)) ++ FAIL_EXIT1 ("malformed DNS query: " ++ "query lacks QCLASS/QTYPE, QNAME: %s", info->qname); ++ memcpy (&qtype_qclass, buffer + 12 + ret, sizeof (qtype_qclass)); ++ info->qclass = ntohs (qtype_qclass.qclass); ++ info->qtype = ntohs (qtype_qclass.qtype); ++} ++ ++ ++/* Main testing framework. */ ++ ++/* Per-server information. One struct is allocated for each test ++ server. */ ++struct resolv_test_server ++{ ++ /* Local address of the server. UDP and TCP use the same port. */ ++ struct sockaddr_in address; ++ ++ /* File descriptor of the UDP server, or -1 if this server is ++ disabled. */ ++ int socket_udp; ++ ++ /* File descriptor of the TCP server, or -1 if this server is ++ disabled. */ ++ int socket_tcp; ++ ++ /* Counter of the number of responses processed so far. */ ++ size_t response_number; ++ ++ /* Thread handles for the server threads (if not disabled in the ++ configuration). */ ++ pthread_t thread_udp; ++ pthread_t thread_tcp; ++}; ++ ++/* Main struct for keeping track of libresolv redirection and ++ testing. */ ++struct resolv_test ++{ ++ /* After initialization, any access to the struct must be performed ++ while this lock is acquired. */ ++ pthread_mutex_t lock; ++ ++ /* Data for each test server. */ ++ struct resolv_test_server servers[resolv_max_test_servers]; ++ ++ /* Used if config.single_thread_udp is true. */ ++ pthread_t thread_udp_single; ++ ++ struct resolv_redirect_config config; ++ bool termination_requested; ++}; ++ ++/* Function implementing a server thread. */ ++typedef void (*thread_callback) (struct resolv_test *, int server_index); ++ ++/* Storage for thread-specific data, for passing to the ++ thread_callback function. */ ++struct thread_closure ++{ ++ struct resolv_test *obj; /* Current test object. */ ++ thread_callback callback; /* Function to call. */ ++ int server_index; /* Index of the implemented server. */ ++}; ++ ++/* Wrap response_callback as a function which can be passed to ++ pthread_create. */ ++static void * ++thread_callback_wrapper (void *arg) ++{ ++ struct thread_closure *closure = arg; ++ closure->callback (closure->obj, closure->server_index); ++ free (closure); ++ return NULL; ++} ++ ++/* Start a server thread for the specified SERVER_INDEX, implemented ++ by CALLBACK. */ ++static pthread_t ++start_server_thread (struct resolv_test *obj, int server_index, ++ thread_callback callback) ++{ ++ struct thread_closure *closure = xmalloc (sizeof (*closure)); ++ *closure = (struct thread_closure) ++ { ++ .obj = obj, ++ .callback = callback, ++ .server_index = server_index, ++ }; ++ return xpthread_create (NULL, thread_callback_wrapper, closure); ++} ++ ++/* Process one UDP query. Return false if a termination requested has ++ been detected. */ ++static bool ++server_thread_udp_process_one (struct resolv_test *obj, int server_index) ++{ ++ unsigned char query[512]; ++ struct sockaddr_storage peer; ++ socklen_t peerlen = sizeof (peer); ++ size_t length = xrecvfrom (obj->servers[server_index].socket_udp, ++ query, sizeof (query), 0, ++ (struct sockaddr *) &peer, &peerlen); ++ /* Check for termination. */ ++ { ++ bool termination_requested; ++ xpthread_mutex_lock (&obj->lock); ++ termination_requested = obj->termination_requested; ++ xpthread_mutex_unlock (&obj->lock); ++ if (termination_requested) ++ return false; ++ } ++ ++ ++ struct query_info qinfo; ++ parse_query (&qinfo, query, length); ++ if (test_verbose > 0) ++ { ++ if (test_verbose > 1) ++ printf ("info: UDP server %d: incoming query:" ++ " %zd bytes, %s/%u/%u, tnxid=0x%02x%02x\n", ++ server_index, length, qinfo.qname, qinfo.qclass, qinfo.qtype, ++ query[0], query[1]); ++ else ++ printf ("info: UDP server %d: incoming query:" ++ " %zd bytes, %s/%u/%u\n", ++ server_index, length, qinfo.qname, qinfo.qclass, qinfo.qtype); ++ } ++ ++ struct resolv_response_context ctx = ++ { ++ .query_buffer = query, ++ .query_length = length, ++ .server_index = server_index, ++ .tcp = false, ++ }; ++ struct resolv_response_builder *b = response_builder_allocate (query, length); ++ obj->config.response_callback ++ (&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype); ++ ++ if (b->drop) ++ { ++ if (test_verbose) ++ printf ("info: UDP server %d: dropping response to %s/%u/%u\n", ++ server_index, qinfo.qname, qinfo.qclass, qinfo.qtype); ++ } ++ else ++ { ++ if (test_verbose) ++ { ++ if (b->offset >= 12) ++ printf ("info: UDP server %d: sending response:" ++ " %zu bytes, RCODE %d (for %s/%u/%u)\n", ++ server_index, b->offset, b->buffer[3] & 0x0f, ++ qinfo.qname, qinfo.qclass, qinfo.qtype); ++ else ++ printf ("info: UDP server %d: sending response: %zu bytes" ++ " (for %s/%u/%u)\n", ++ server_index, b->offset, ++ qinfo.qname, qinfo.qclass, qinfo.qtype); ++ if (b->truncate_bytes > 0) ++ printf ("info: truncated by %u bytes\n", b->truncate_bytes); ++ } ++ size_t to_send = b->offset; ++ if (to_send < b->truncate_bytes) ++ to_send = 0; ++ else ++ to_send -= b->truncate_bytes; ++ ++ /* Ignore most errors here because the other end may have closed ++ the socket. */ ++ if (sendto (obj->servers[server_index].socket_udp, ++ b->buffer, to_send, 0, ++ (struct sockaddr *) &peer, peerlen) < 0) ++ TEST_VERIFY_EXIT (errno != EBADF); ++ } ++ response_builder_free (b); ++ return true; ++} ++ ++/* UDP thread_callback function. Variant for one thread per ++ server. */ ++static void ++server_thread_udp (struct resolv_test *obj, int server_index) ++{ ++ while (server_thread_udp_process_one (obj, server_index)) ++ ; ++} ++ ++/* Single-threaded UDP processing function, for the single_thread_udp ++ case. */ ++static void * ++server_thread_udp_single (void *closure) ++{ ++ struct resolv_test *obj = closure; ++ ++ struct pollfd fds[resolv_max_test_servers]; ++ for (int server_index = 0; server_index < resolv_max_test_servers; ++ ++server_index) ++ if (obj->config.servers[server_index].disable_udp) ++ fds[server_index] = (struct pollfd) {.fd = -1}; ++ else ++ { ++ fds[server_index] = (struct pollfd) ++ { ++ .fd = obj->servers[server_index].socket_udp, ++ .events = POLLIN ++ }; ++ ++ /* Make the socket non-blocking. */ ++ int flags = fcntl (obj->servers[server_index].socket_udp, F_GETFL, 0); ++ if (flags < 0) ++ FAIL_EXIT1 ("fcntl (F_GETFL): %m"); ++ flags |= O_NONBLOCK; ++ if (fcntl (obj->servers[server_index].socket_udp, F_SETFL, flags) < 0) ++ FAIL_EXIT1 ("fcntl (F_SETFL): %m"); ++ } ++ ++ while (true) ++ { ++ xpoll (fds, resolv_max_test_servers, -1); ++ for (int server_index = 0; server_index < resolv_max_test_servers; ++ ++server_index) ++ if (fds[server_index].revents != 0) ++ { ++ if (!server_thread_udp_process_one (obj, server_index)) ++ goto out; ++ fds[server_index].revents = 0; ++ } ++ } ++ ++ out: ++ return NULL; ++} ++ ++/* Start the single UDP handler thread (for the single_thread_udp ++ case). */ ++static void ++start_server_thread_udp_single (struct resolv_test *obj) ++{ ++ obj->thread_udp_single ++ = xpthread_create (NULL, server_thread_udp_single, obj); ++} ++ ++/* Data describing a TCP client connect. */ ++struct tcp_thread_closure ++{ ++ struct resolv_test *obj; ++ int server_index; ++ int client_socket; ++}; ++ ++/* Read a complete DNS query packet. If EOF_OK, an immediate ++ end-of-file condition is acceptable. */ ++static bool ++read_fully (int fd, void *buf, size_t len, bool eof_ok) ++{ ++ const void *const end = buf + len; ++ while (buf < end) ++ { ++ ssize_t ret = read (fd, buf, end - buf); ++ if (ret == 0) ++ { ++ if (!eof_ok) ++ { ++ support_record_failure (); ++ printf ("error: unexpected EOF on TCP connection\n"); ++ } ++ return false; ++ } ++ else if (ret < 0) ++ { ++ if (!eof_ok || errno != ECONNRESET) ++ { ++ support_record_failure (); ++ printf ("error: TCP read: %m\n"); ++ } ++ return false; ++ } ++ buf += ret; ++ eof_ok = false; ++ } ++ return true; ++} ++ ++/* Write an array of iovecs. Terminate the process on failure. */ ++static void ++writev_fully (int fd, struct iovec *buffers, size_t count) ++{ ++ while (count > 0) ++ { ++ /* Skip zero-length write requests. */ ++ if (buffers->iov_len == 0) ++ { ++ ++buffers; ++ --count; ++ continue; ++ } ++ /* Try to rewrite the remaing buffers. */ ++ ssize_t ret = writev (fd, buffers, count); ++ if (ret < 0) ++ FAIL_EXIT1 ("writev: %m"); ++ if (ret == 0) ++ FAIL_EXIT1 ("writev: invalid return value zero"); ++ /* Find the buffers that were successfully written. */ ++ while (ret > 0) ++ { ++ if (count == 0) ++ FAIL_EXIT1 ("internal writev consistency failure"); ++ /* Current buffer was partially written. */ ++ if (buffers->iov_len > (size_t) ret) ++ { ++ buffers->iov_base += ret; ++ buffers->iov_len -= ret; ++ ret = 0; ++ } ++ else ++ { ++ ret -= buffers->iov_len; ++ buffers->iov_len = 0; ++ ++buffers; ++ --count; ++ } ++ } ++ } ++} ++ ++/* Thread callback for handling a single established TCP connection to ++ a client. */ ++static void * ++server_thread_tcp_client (void *arg) ++{ ++ struct tcp_thread_closure *closure = arg; ++ ++ while (true) ++ { ++ /* Read packet length. */ ++ uint16_t query_length; ++ if (!read_fully (closure->client_socket, ++ &query_length, sizeof (query_length), true)) ++ break; ++ query_length = ntohs (query_length); ++ ++ /* Read the packet. */ ++ unsigned char *query_buffer = xmalloc (query_length); ++ read_fully (closure->client_socket, query_buffer, query_length, false); ++ ++ struct query_info qinfo; ++ parse_query (&qinfo, query_buffer, query_length); ++ if (test_verbose > 0) ++ { ++ if (test_verbose > 1) ++ printf ("info: UDP server %d: incoming query:" ++ " %d bytes, %s/%u/%u, tnxid=0x%02x%02x\n", ++ closure->server_index, query_length, ++ qinfo.qname, qinfo.qclass, qinfo.qtype, ++ query_buffer[0], query_buffer[1]); ++ else ++ printf ("info: TCP server %d: incoming query:" ++ " %u bytes, %s/%u/%u\n", ++ closure->server_index, query_length, ++ qinfo.qname, qinfo.qclass, qinfo.qtype); ++ } ++ ++ struct resolv_response_context ctx = ++ { ++ .query_buffer = query_buffer, ++ .query_length = query_length, ++ .server_index = closure->server_index, ++ .tcp = true, ++ }; ++ struct resolv_response_builder *b = response_builder_allocate ++ (query_buffer, query_length); ++ closure->obj->config.response_callback ++ (&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype); ++ ++ if (b->drop) ++ { ++ if (test_verbose) ++ printf ("info: TCP server %d: dropping response to %s/%u/%u\n", ++ closure->server_index, ++ qinfo.qname, qinfo.qclass, qinfo.qtype); ++ } ++ else ++ { ++ if (test_verbose) ++ printf ("info: TCP server %d: sending response: %zu bytes" ++ " (for %s/%u/%u)\n", ++ closure->server_index, b->offset, ++ qinfo.qname, qinfo.qclass, qinfo.qtype); ++ uint16_t length = htons (b->offset); ++ size_t to_send = b->offset; ++ if (to_send < b->truncate_bytes) ++ to_send = 0; ++ else ++ to_send -= b->truncate_bytes; ++ struct iovec buffers[2] = ++ { ++ {&length, sizeof (length)}, ++ {b->buffer, to_send} ++ }; ++ writev_fully (closure->client_socket, buffers, 2); ++ } ++ bool close_flag = b->close; ++ response_builder_free (b); ++ free (query_buffer); ++ if (close_flag) ++ break; ++ } ++ ++ close (closure->client_socket); ++ free (closure); ++ return NULL; ++} ++ ++/* thread_callback for the TCP case. Accept connections and create a ++ new thread for each client. */ ++static void ++server_thread_tcp (struct resolv_test *obj, int server_index) ++{ ++ while (true) ++ { ++ /* Get the client conenction. */ ++ int client_socket = xaccept ++ (obj->servers[server_index].socket_tcp, NULL, NULL); ++ ++ /* Check for termination. */ ++ xpthread_mutex_lock (&obj->lock); ++ if (obj->termination_requested) ++ { ++ xpthread_mutex_unlock (&obj->lock); ++ close (client_socket); ++ break; ++ } ++ xpthread_mutex_unlock (&obj->lock); ++ ++ /* Spawn a new thread for handling this connection. */ ++ struct tcp_thread_closure *closure = xmalloc (sizeof (*closure)); ++ *closure = (struct tcp_thread_closure) ++ { ++ .obj = obj, ++ .server_index = server_index, ++ .client_socket = client_socket, ++ }; ++ ++ pthread_t thr ++ = xpthread_create (NULL, server_thread_tcp_client, closure); ++ /* TODO: We should keep track of this thread so that we can ++ block in resolv_test_end until it has exited. */ ++ xpthread_detach (thr); ++ } ++} ++ ++/* Create UDP and TCP server sockets. */ ++static void ++make_server_sockets (struct resolv_test_server *server) ++{ ++ while (true) ++ { ++ server->socket_udp = xsocket (AF_INET, SOCK_DGRAM, IPPROTO_UDP); ++ server->socket_tcp = xsocket (AF_INET, SOCK_STREAM, IPPROTO_TCP); ++ ++ /* Pick the address for the UDP socket. */ ++ server->address = (struct sockaddr_in) ++ { ++ .sin_family = AF_INET, ++ .sin_addr = {.s_addr = htonl (INADDR_LOOPBACK)} ++ }; ++ xbind (server->socket_udp, ++ (struct sockaddr *)&server->address, sizeof (server->address)); ++ ++ /* Retrieve the address. */ ++ socklen_t addrlen = sizeof (server->address); ++ xgetsockname (server->socket_udp, ++ (struct sockaddr *)&server->address, &addrlen); ++ ++ /* Bind the TCP socket to the same address. */ ++ { ++ int on = 1; ++ xsetsockopt (server->socket_tcp, SOL_SOCKET, SO_REUSEADDR, ++ &on, sizeof (on)); ++ } ++ if (bind (server->socket_tcp, ++ (struct sockaddr *)&server->address, ++ sizeof (server->address)) != 0) ++ { ++ /* Port collision. The UDP bind succeeded, but the TCP BIND ++ failed. We assume here that the kernel will pick the ++ next local UDP address randomly. */ ++ if (errno == EADDRINUSE) ++ { ++ close (server->socket_udp); ++ close (server->socket_tcp); ++ continue; ++ } ++ FAIL_EXIT1 ("TCP bind: %m"); ++ } ++ xlisten (server->socket_tcp, 5); ++ break; ++ } ++} ++ ++/* One-time initialization of NSS. */ ++static void ++resolv_redirect_once (void) ++{ ++ /* Only use nss_dns. */ ++ __nss_configure_lookup ("hosts", "dns"); ++ __nss_configure_lookup ("networks", "dns"); ++ /* Enter a network namespace for isolation and firewall state ++ cleanup. The tests will still work if these steps fail, but they ++ may be less reliable. */ ++ support_become_root (); ++ support_enter_network_namespace (); ++} ++pthread_once_t resolv_redirect_once_var = PTHREAD_ONCE_INIT; ++ ++void ++resolv_test_init (void) ++{ ++ /* Perform one-time initialization of NSS. */ ++ xpthread_once (&resolv_redirect_once_var, resolv_redirect_once); ++} ++ ++/* Copy the search path from CONFIG.search to the _res object. */ ++static void ++set_search_path (struct resolv_redirect_config config) ++{ ++ memset (_res.defdname, 0, sizeof (_res.defdname)); ++ memset (_res.dnsrch, 0, sizeof (_res.dnsrch)); ++ ++ char *current = _res.defdname; ++ char *end = current + sizeof (_res.defdname); ++ ++ for (unsigned int i = 0; ++ i < sizeof (config.search) / sizeof (config.search[0]); ++i) ++ { ++ if (config.search[i] == NULL) ++ continue; ++ ++ size_t length = strlen (config.search[i]) + 1; ++ size_t remaining = end - current; ++ TEST_VERIFY_EXIT (length <= remaining); ++ memcpy (current, config.search[i], length); ++ _res.dnsrch[i] = current; ++ current += length; ++ } ++} ++ ++struct resolv_test * ++resolv_test_start (struct resolv_redirect_config config) ++{ ++ /* Apply configuration defaults. */ ++ if (config.nscount == 0) ++ config.nscount = resolv_max_test_servers; ++ ++ struct resolv_test *obj = xmalloc (sizeof (*obj)); ++ *obj = (struct resolv_test) { ++ .config = config, ++ .lock = PTHREAD_MUTEX_INITIALIZER, ++ }; ++ ++ resolv_test_init (); ++ ++ /* Create all the servers, to reserve the necessary ports. */ ++ for (int server_index = 0; server_index < config.nscount; ++server_index) ++ make_server_sockets (obj->servers + server_index); ++ ++ /* Start server threads. Disable the server ports, as ++ requested. */ ++ for (int server_index = 0; server_index < config.nscount; ++server_index) ++ { ++ struct resolv_test_server *server = obj->servers + server_index; ++ if (config.servers[server_index].disable_udp) ++ { ++ close (server->socket_udp); ++ server->socket_udp = -1; ++ } ++ else if (!config.single_thread_udp) ++ server->thread_udp = start_server_thread (obj, server_index, ++ server_thread_udp); ++ if (config.servers[server_index].disable_tcp) ++ { ++ close (server->socket_tcp); ++ server->socket_tcp = -1; ++ } ++ else ++ server->thread_tcp = start_server_thread (obj, server_index, ++ server_thread_tcp); ++ } ++ if (config.single_thread_udp) ++ start_server_thread_udp_single (obj); ++ ++ int timeout = 1; ++ ++ /* Initialize libresolv. */ ++ TEST_VERIFY_EXIT (res_init () == 0); ++ ++ /* Disable IPv6 name server addresses. The code below only ++ overrides the IPv4 addresses. */ ++ __res_iclose (&_res, true); ++ _res._u._ext.nscount = 0; ++ ++ /* Redirect queries to the server socket. */ ++ if (test_verbose) ++ { ++ printf ("info: old timeout value: %d\n", _res.retrans); ++ printf ("info: old retry attempt value: %d\n", _res.retry); ++ printf ("info: old _res.options: 0x%lx\n", _res.options); ++ printf ("info: old _res.nscount value: %d\n", _res.nscount); ++ printf ("info: old _res.ndots value: %d\n", _res.ndots); ++ } ++ _res.retrans = timeout; ++ _res.retry = 4; ++ _res.nscount = config.nscount; ++ _res.options = RES_INIT | RES_RECURSE | RES_DEFNAMES | RES_DNSRCH; ++ _res.ndots = 1; ++ if (test_verbose) ++ { ++ printf ("info: new timeout value: %d\n", _res.retrans); ++ printf ("info: new retry attempt value: %d\n", _res.retry); ++ printf ("info: new _res.options: 0x%lx\n", _res.options); ++ printf ("info: new _res.nscount value: %d\n", _res.nscount); ++ printf ("info: new _res.ndots value: %d\n", _res.ndots); ++ } ++ for (int server_index = 0; server_index < config.nscount; ++server_index) ++ { ++ _res.nsaddr_list[server_index] = obj->servers[server_index].address; ++ if (test_verbose) ++ { ++ char buf[256]; ++ TEST_VERIFY_EXIT ++ (inet_ntop (AF_INET, &obj->servers[server_index].address.sin_addr, ++ buf, sizeof (buf)) != NULL); ++ printf ("info: server %d: %s/%u\n", ++ server_index, buf, ++ htons (obj->servers[server_index].address.sin_port)); ++ } ++ } ++ ++ set_search_path (config); ++ ++ return obj; ++} ++ ++void ++resolv_test_end (struct resolv_test *obj) ++{ ++ res_close (); ++ ++ xpthread_mutex_lock (&obj->lock); ++ obj->termination_requested = true; ++ xpthread_mutex_unlock (&obj->lock); ++ ++ /* Send trigger packets to unblock the server threads. */ ++ for (int server_index = 0; server_index < obj->config.nscount; ++ ++server_index) ++ { ++ if (!obj->config.servers[server_index].disable_udp) ++ { ++ int sock = xsocket (AF_INET, SOCK_DGRAM, IPPROTO_UDP); ++ xsendto (sock, "", 1, 0, ++ (struct sockaddr *) &obj->servers[server_index].address, ++ sizeof (obj->servers[server_index].address)); ++ close (sock); ++ } ++ if (!obj->config.servers[server_index].disable_tcp) ++ { ++ int sock = xsocket (AF_INET, SOCK_STREAM, IPPROTO_TCP); ++ xconnect (sock, ++ (struct sockaddr *) &obj->servers[server_index].address, ++ sizeof (obj->servers[server_index].address)); ++ close (sock); ++ } ++ } ++ ++ if (obj->config.single_thread_udp) ++ xpthread_join (obj->thread_udp_single); ++ ++ /* Wait for the server threads to terminate. */ ++ for (int server_index = 0; server_index < obj->config.nscount; ++ ++server_index) ++ { ++ if (!obj->config.servers[server_index].disable_udp) ++ { ++ if (!obj->config.single_thread_udp) ++ xpthread_join (obj->servers[server_index].thread_udp); ++ close (obj->servers[server_index].socket_udp); ++ } ++ if (!obj->config.servers[server_index].disable_tcp) ++ { ++ xpthread_join (obj->servers[server_index].thread_tcp); ++ close (obj->servers[server_index].socket_tcp); ++ } ++ } ++ ++ free (obj); ++} +diff --git a/support/resolv_test.h b/support/resolv_test.h +new file mode 100644 +index 0000000..7a9f1f7 +--- /dev/null ++++ b/support/resolv_test.h +@@ -0,0 +1,169 @@ ++/* DNS test framework and libresolv redirection. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_RESOLV_TEST_H ++#define SUPPORT_RESOLV_TEST_H ++ ++#include ++#include ++#include ++ ++__BEGIN_DECLS ++ ++/* This struct provides context information when the response callback ++ specified in struct resolv_redirect_config is invoked. */ ++struct resolv_response_context ++{ ++ const unsigned char *query_buffer; ++ size_t query_length; ++ int server_index; ++ bool tcp; ++}; ++ ++/* This opaque struct is used to construct responses from within the ++ response callback function. */ ++struct resolv_response_builder; ++ ++/* This opaque struct collects information about the resolver testing ++ currently in progress. */ ++struct resolv_test; ++ ++enum ++ { ++ /* Maximum number of test servers supported by the framework. */ ++ resolv_max_test_servers = 3, ++ }; ++ ++/* Configuration settings specific to individual test servers. */ ++struct resolv_redirect_server_config ++{ ++ bool disable_tcp; /* If true, no TCP server is listening. */ ++ bool disable_udp; /* If true, no UDP server is listening. */ ++}; ++ ++/* Instructions for setting up the libresolv redirection. */ ++struct resolv_redirect_config ++{ ++ /* The response_callback function is called for every incoming DNS ++ packet, over UDP or TCP. It must be specified, the other ++ configuration settings are optional. */ ++ void (*response_callback) (const struct resolv_response_context *, ++ struct resolv_response_builder *, ++ const char *qname, ++ uint16_t qclass, uint16_t qtype); ++ ++ /* Per-server configuration. */ ++ struct resolv_redirect_server_config servers[resolv_max_test_servers]; ++ ++ /* Search path entries. The first entry serves as the default ++ domain name as well. */ ++ const char *search[7]; ++ ++ /* Number of servers to activate in resolv. 0 means the default, ++ resolv_max_test_servers. */ ++ int nscount; ++ ++ /* If true, use a single thread to process all UDP queries. This ++ may results in more predictable ordering of queries and ++ responses. */ ++ bool single_thread_udp; ++}; ++ ++/* Configure NSS to use, nss_dns only for aplicable databases, and try ++ to put the process into a network namespace for better isolation. ++ This may have to be called before resolv_test_start, before the ++ process creates any threads. Otherwise, initialization is ++ performed by resolv_test_start implicitly. */ ++void resolv_test_init (void); ++ ++/* Initiate resolver testing. This updates the _res variable as ++ needed. As a side effect, NSS is reconfigured to use nss_dns only ++ for aplicable databases, and the process may enter a network ++ namespace for better isolation. */ ++struct resolv_test *resolv_test_start (struct resolv_redirect_config); ++ ++/* Call this function at the end of resolver testing, to free ++ resources and report pending errors (if any). */ ++void resolv_test_end (struct resolv_test *); ++ ++/* The remaining facilities in this file are used for constructing ++ response packets from the response_callback function. */ ++ ++/* Special settings for constructing responses from the callback. */ ++struct resolv_response_flags ++{ ++ /* 4-bit response code to incorporate into the response. */ ++ unsigned char rcode; ++ ++ /* If true, the TC (truncation) flag will be set. */ ++ bool tc; ++ ++ /* Initial section count values. Can be used to artificially ++ increase the counts, for malformed packet testing.*/ ++ unsigned short qdcount; ++ unsigned short ancount; ++ unsigned short nscount; ++ unsigned short adcount; ++}; ++ ++/* Begin a new response with the requested flags. Must be called ++ first. */ ++void resolv_response_init (struct resolv_response_builder *, ++ struct resolv_response_flags); ++ ++/* Switches to the section in the response packet. Only forward ++ movement is supported. */ ++void resolv_response_section (struct resolv_response_builder *, ns_sect); ++ ++/* Add a question record to the question section. */ ++void resolv_response_add_question (struct resolv_response_builder *, ++ const char *name, uint16_t class, ++ uint16_t type); ++/* Starts a new resource record with the specified owner name, class, ++ type, and TTL. Data is supplied with resolv_response_add_data or ++ resolv_response_add_name. */ ++void resolv_response_open_record (struct resolv_response_builder *, ++ const char *name, uint16_t class, ++ uint16_t type, uint32_t ttl); ++ ++/* Add unstructed bytes to the RDATA part of a resource record. */ ++void resolv_response_add_data (struct resolv_response_builder *, ++ const void *, size_t); ++ ++/* Add a compressed domain name to the RDATA part of a resource ++ record. */ ++void resolv_response_add_name (struct resolv_response_builder *, ++ const char *name); ++ ++/* Mark the end of the constructed record. Must be called last. */ ++void resolv_response_close_record (struct resolv_response_builder *); ++ ++/* Drop this query packet (that is, do not send a response, not even ++ an empty packet). */ ++void resolv_response_drop (struct resolv_response_builder *); ++ ++/* In TCP mode, close the connection after this packet (if a response ++ is sent). */ ++void resolv_response_close (struct resolv_response_builder *); ++ ++/* The size of the response packet built so far. */ ++size_t resolv_response_length (const struct resolv_response_builder *); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_RESOLV_TEST_H */ +diff --git a/support/run_diff.h b/support/run_diff.h +new file mode 100644 +index 0000000..f65b5dd +--- /dev/null ++++ b/support/run_diff.h +@@ -0,0 +1,31 @@ ++/* Invoke the system diff tool to compare two strings. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_RUN_DIFF_H ++#define SUPPORT_RUN_DIFF_H ++ ++/* Compare the two NUL-terminated strings LEFT and RIGHT using the ++ diff tool. Label the sides of the diff with LEFT_LABEL and ++ RIGHT_LABEL, respectively. ++ ++ This function assumes that LEFT and RIGHT are different ++ strings. */ ++void support_run_diff (const char *left_label, const char *left, ++ const char *right_label, const char *right); ++ ++#endif /* SUPPORT_RUN_DIFF_H */ +diff --git a/support/set_fortify_handler.c b/support/set_fortify_handler.c +new file mode 100644 +index 0000000..f434a80 +--- /dev/null ++++ b/support/set_fortify_handler.c +@@ -0,0 +1,34 @@ ++/* Set signal handler for use in fortify tests. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++ ++void ++set_fortify_handler (void (*handler) (int sig)) ++{ ++ struct sigaction sa; ++ ++ sa.sa_handler = handler; ++ sa.sa_flags = 0; ++ sigemptyset (&sa.sa_mask); ++ ++ sigaction (SIGABRT, &sa, NULL); ++ ignore_stderr (); ++} +diff --git a/support/support.h b/support/support.h +new file mode 100644 +index 0000000..7292e2a +--- /dev/null ++++ b/support/support.h +@@ -0,0 +1,59 @@ ++/* Common extra functions. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This header file should only contain definitions compatible with ++ C90. (Using __attribute__ is fine because provides a ++ fallback.) */ ++ ++#ifndef SUPPORT_H ++#define SUPPORT_H ++ ++#include ++#include ++ ++__BEGIN_DECLS ++ ++/* Write a message to standard output. Can be used in signal ++ handlers. */ ++void write_message (const char *message) __attribute__ ((nonnull (1))); ++ ++/* Avoid all the buffer overflow messages on stderr. */ ++void ignore_stderr (void); ++ ++/* Set fortification error handler. Used when tests want to verify that bad ++ code is caught by the library. */ ++void set_fortify_handler (void (*handler) (int sig)); ++ ++/* Report an out-of-memory error for the allocation of SIZE bytes in ++ FUNCTION, terminating the process. */ ++void oom_error (const char *function, size_t size) ++ __attribute__ ((nonnull (1))); ++ ++/* Error-checking wrapper functions which terminate the process on ++ error. */ ++ ++void *xmalloc (size_t) __attribute__ ((malloc)); ++void *xcalloc (size_t n, size_t s) __attribute__ ((malloc)); ++void *xrealloc (void *p, size_t n); ++char *xasprintf (const char *format, ...) ++ __attribute__ ((format (printf, 1, 2), malloc)); ++char *xstrdup (const char *); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_H */ +diff --git a/support/support_become_root.c b/support/support_become_root.c +new file mode 100644 +index 0000000..3fa0bd4 +--- /dev/null ++++ b/support/support_become_root.c +@@ -0,0 +1,40 @@ ++/* Acquire root privileges. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++bool ++support_become_root (void) ++{ ++#ifdef CLONE_NEWUSER ++ if (unshare (CLONE_NEWUSER | CLONE_NEWNS) == 0) ++ /* Even if we do not have UID zero, we have extended privileges at ++ this point. */ ++ return true; ++#endif ++ if (setuid (0) != 0) ++ { ++ printf ("warning: could not become root outside namespace (%m)\n"); ++ return false; ++ } ++ return true; ++} +diff --git a/support/support_enter_network_namespace.c b/support/support_enter_network_namespace.c +new file mode 100644 +index 0000000..d2e78fe +--- /dev/null ++++ b/support/support_enter_network_namespace.c +@@ -0,0 +1,74 @@ ++/* Enter a network namespace. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static bool in_uts_namespace; ++ ++bool ++support_enter_network_namespace (void) ++{ ++#ifdef CLONE_NEWUTS ++ if (unshare (CLONE_NEWUTS) == 0) ++ in_uts_namespace = true; ++ else ++ printf ("warning: unshare (CLONE_NEWUTS) failed: %m\n"); ++#endif ++ ++#ifdef CLONE_NEWNET ++ if (unshare (CLONE_NEWNET) == 0) ++ { ++ /* Bring up the loopback interface. */ ++ int fd = xsocket (AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0); ++ struct ifreq req; ++ strcpy (req.ifr_name, "lo"); ++ TEST_VERIFY_EXIT (ioctl (fd, SIOCGIFFLAGS, &req) == 0); ++ bool already_up = req.ifr_flags & IFF_UP; ++ if (already_up) ++ /* This means that we likely have not achieved isolation from ++ the parent namespace. */ ++ printf ("warning: loopback interface already exists" ++ " in new network namespace\n"); ++ else ++ { ++ req.ifr_flags |= IFF_UP | IFF_RUNNING; ++ TEST_VERIFY_EXIT (ioctl (fd, SIOCSIFFLAGS, &req) == 0); ++ } ++ close (fd); ++ ++ return !already_up; ++ } ++#endif ++ printf ("warning: could not enter network namespace\n"); ++ return false; ++} ++ ++bool ++support_in_uts_namespace (void) ++{ ++ return in_uts_namespace; ++} +diff --git a/support/support_format_address_family.c b/support/support_format_address_family.c +new file mode 100644 +index 0000000..5d42c42 +--- /dev/null ++++ b/support/support_format_address_family.c +@@ -0,0 +1,35 @@ ++/* Convert an address family to a string. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++ ++char * ++support_format_address_family (int family) ++{ ++ switch (family) ++ { ++ case AF_INET: ++ return xstrdup ("INET"); ++ case AF_INET6: ++ return xstrdup ("INET6"); ++ default: ++ return xasprintf ("", family); ++ } ++} +diff --git a/support/support_format_addrinfo.c b/support/support_format_addrinfo.c +new file mode 100644 +index 0000000..262e0df +--- /dev/null ++++ b/support/support_format_addrinfo.c +@@ -0,0 +1,202 @@ ++/* Convert struct addrinfo values to a string. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++static size_t ++socket_address_length (int family) ++{ ++ switch (family) ++ { ++ case AF_INET: ++ return sizeof (struct sockaddr_in); ++ case AF_INET6: ++ return sizeof (struct sockaddr_in6); ++ default: ++ return -1; ++ } ++} ++ ++static void ++format_ai_flags (FILE *out, struct addrinfo *ai, int flag, const char *name, ++ int * flags_printed) ++{ ++ if ((ai->ai_flags & flag) != 0) ++ fprintf (out, " %s", name); ++ *flags_printed |= flag; ++} ++ ++static void ++format_ai_one (FILE *out, struct addrinfo *ai, int *flags) ++{ ++ /* ai_flags */ ++ if (ai->ai_flags != *flags) ++ { ++ fprintf (out, "flags:"); ++ int flags_printed = 0; ++#define FLAG(flag) format_ai_flags (out, ai, flag, #flag, &flags_printed) ++ FLAG (AI_PASSIVE); ++ FLAG (AI_CANONNAME); ++ FLAG (AI_NUMERICHOST); ++ FLAG (AI_V4MAPPED); ++ FLAG (AI_ALL); ++ FLAG (AI_ADDRCONFIG); ++ FLAG (AI_IDN); ++ FLAG (AI_CANONIDN); ++ FLAG (AI_IDN_ALLOW_UNASSIGNED); ++ FLAG (AI_IDN_USE_STD3_ASCII_RULES); ++ FLAG (AI_NUMERICSERV); ++#undef FLAG ++ int remaining = ai->ai_flags & ~flags_printed; ++ if (remaining != 0) ++ fprintf (out, " %08x", remaining); ++ fprintf (out, "\n"); ++ *flags = ai->ai_flags; ++ } ++ ++ { ++ char type_buf[32]; ++ const char *type_str; ++ char proto_buf[32]; ++ const char *proto_str; ++ ++ /* ai_socktype */ ++ switch (ai->ai_socktype) ++ { ++ case SOCK_RAW: ++ type_str = "RAW"; ++ break; ++ case SOCK_DGRAM: ++ type_str = "DGRAM"; ++ break; ++ case SOCK_STREAM: ++ type_str = "STREAM"; ++ break; ++ default: ++ snprintf (type_buf, sizeof (type_buf), "%d", ai->ai_socktype); ++ type_str = type_buf; ++ } ++ ++ /* ai_protocol */ ++ switch (ai->ai_protocol) ++ { ++ case IPPROTO_IP: ++ proto_str = "IP"; ++ break; ++ case IPPROTO_UDP: ++ proto_str = "UDP"; ++ break; ++ case IPPROTO_TCP: ++ proto_str = "TCP"; ++ break; ++ default: ++ snprintf (proto_buf, sizeof (proto_buf), "%d", ai->ai_protocol); ++ proto_str = proto_buf; ++ } ++ fprintf (out, "address: %s/%s", type_str, proto_str); ++ } ++ ++ /* ai_addrlen */ ++ if (ai->ai_addrlen != socket_address_length (ai->ai_family)) ++ { ++ char *family = support_format_address_family (ai->ai_family); ++ fprintf (out, "error: invalid address length %d for %s\n", ++ ai->ai_addrlen, family); ++ free (family); ++ } ++ ++ /* ai_addr */ ++ { ++ char buf[128]; ++ uint16_t port; ++ const char *ret; ++ switch (ai->ai_family) ++ { ++ case AF_INET: ++ { ++ struct sockaddr_in *sin = (struct sockaddr_in *) ai->ai_addr; ++ ret = inet_ntop (AF_INET, &sin->sin_addr, buf, sizeof (buf)); ++ port = sin->sin_port; ++ } ++ break; ++ case AF_INET6: ++ { ++ struct sockaddr_in6 *sin = (struct sockaddr_in6 *) ai->ai_addr; ++ ret = inet_ntop (AF_INET6, &sin->sin6_addr, buf, sizeof (buf)); ++ port = sin->sin6_port; ++ } ++ break; ++ default: ++ errno = EAFNOSUPPORT; ++ ret = NULL; ++ } ++ if (ret == NULL) ++ fprintf (out, "error: inet_top failed: %m\n"); ++ else ++ fprintf (out, " %s %u\n", buf, ntohs (port)); ++ } ++ ++ /* ai_canonname */ ++ if (ai->ai_canonname != NULL) ++ fprintf (out, "canonname: %s\n", ai->ai_canonname); ++} ++ ++/* Format all the addresses in one address family. */ ++static void ++format_ai_family (FILE *out, struct addrinfo *ai, int family, int *flags) ++{ ++ while (ai) ++ { ++ if (ai->ai_family == family) ++ format_ai_one (out, ai, flags); ++ ai = ai->ai_next; ++ } ++} ++ ++char * ++support_format_addrinfo (struct addrinfo *ai, int ret) ++{ ++ int errno_copy = errno; ++ ++ struct xmemstream mem; ++ xopen_memstream (&mem); ++ if (ret != 0) ++ { ++ fprintf (mem.out, "error: %s\n", gai_strerror (ret)); ++ if (ret == EAI_SYSTEM) ++ { ++ errno = errno_copy; ++ fprintf (mem.out, "error: %m\n"); ++ } ++ } ++ else ++ { ++ int flags = 0; ++ format_ai_family (mem.out, ai, AF_INET, &flags); ++ format_ai_family (mem.out, ai, AF_INET6, &flags); ++ } ++ ++ xfclose_memstream (&mem); ++ return mem.buffer; ++} +diff --git a/support/support_format_dns_packet.c b/support/support_format_dns_packet.c +new file mode 100644 +index 0000000..21fe7e5 +--- /dev/null ++++ b/support/support_format_dns_packet.c +@@ -0,0 +1,215 @@ ++/* Convert a DNS packet to a human-readable representation. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++struct in_buffer ++{ ++ const unsigned char *data; ++ size_t size; ++}; ++ ++static inline bool ++extract_8 (struct in_buffer *in, unsigned char *value) ++{ ++ if (in->size == 0) ++ return false; ++ *value = in->data[0]; ++ ++in->data; ++ --in->size; ++ return true; ++} ++ ++static inline bool ++extract_16 (struct in_buffer *in, unsigned short *value) ++{ ++ if (in->size < 2) ++ return false; ++ *value = (in->data[0] << 8) | in->data[1]; ++ in->data += 2; ++ in->size -= 2; ++ return true; ++} ++ ++static inline bool ++extract_32 (struct in_buffer *in, unsigned *value) ++{ ++ if (in->size < 4) ++ return false; ++ unsigned a = in->data[0]; ++ unsigned b = in->data[1]; ++ unsigned c = in->data[2]; ++ unsigned d = in->data[3]; ++ *value = (a << 24) | (b << 16) | (c << 8) | d; ++ in->data += 4; ++ in->size -= 4; ++ return true; ++} ++ ++static inline bool ++extract_bytes (struct in_buffer *in, size_t length, struct in_buffer *value) ++{ ++ if (in->size < length) ++ return false; ++ *value = (struct in_buffer) {in->data, length}; ++ in->data += length; ++ in->size -= length; ++ return true; ++} ++ ++struct dname ++{ ++ char name[MAXDNAME + 1]; ++}; ++ ++static bool ++extract_name (struct in_buffer full, struct in_buffer *in, struct dname *value) ++{ ++ const unsigned char *full_end = full.data + full.size; ++ /* Sanity checks; these indicate buffer misuse. */ ++ TEST_VERIFY_EXIT ++ (!(in->data < full.data || in->data > full_end ++ || in->size > (size_t) (full_end - in->data))); ++ int ret = dn_expand (full.data, full_end, in->data, ++ value->name, sizeof (value->name)); ++ if (ret < 0) ++ return false; ++ in->data += ret; ++ in->size -= ret; ++ return true; ++} ++ ++char * ++support_format_dns_packet (const unsigned char *buffer, size_t length) ++{ ++ struct in_buffer full = { buffer, length }; ++ struct in_buffer in = full; ++ struct xmemstream mem; ++ xopen_memstream (&mem); ++ ++ unsigned short txnid; ++ unsigned short flags; ++ unsigned short qdcount; ++ unsigned short ancount; ++ unsigned short nscount; ++ unsigned short adcount; ++ if (!(extract_16 (&in, &txnid) ++ && extract_16 (&in, &flags) ++ && extract_16 (&in, &qdcount) ++ && extract_16 (&in, &ancount) ++ && extract_16 (&in, &nscount) ++ && extract_16 (&in, &adcount))) ++ { ++ fprintf (mem.out, "error: could not parse DNS header\n"); ++ goto out; ++ } ++ if (qdcount != 1) ++ { ++ fprintf (mem.out, "error: question count is %d, not 1\n", qdcount); ++ goto out; ++ } ++ struct dname qname; ++ if (!extract_name (full, &in, &qname)) ++ { ++ fprintf (mem.out, "error: malformed QNAME\n"); ++ goto out; ++ } ++ unsigned short qtype; ++ unsigned short qclass; ++ if (!(extract_16 (&in, &qtype) ++ && extract_16 (&in, &qclass))) ++ { ++ fprintf (mem.out, "error: malformed question\n"); ++ goto out; ++ } ++ if (qtype != T_A && qtype != T_AAAA && qtype != T_PTR) ++ { ++ fprintf (mem.out, "error: unsupported QTYPE %d\n", qtype); ++ goto out; ++ } ++ ++ fprintf (mem.out, "name: %s\n", qname.name); ++ ++ for (int i = 0; i < ancount; ++i) ++ { ++ struct dname rname; ++ if (!extract_name (full, &in, &rname)) ++ { ++ fprintf (mem.out, "error: malformed record name\n"); ++ goto out; ++ } ++ unsigned short rtype; ++ unsigned short rclass; ++ unsigned ttl; ++ unsigned short rdlen; ++ struct in_buffer rdata; ++ if (!(extract_16 (&in, &rtype) ++ && extract_16 (&in, &rclass) ++ && extract_32 (&in, &ttl) ++ && extract_16 (&in, &rdlen) ++ && extract_bytes (&in, rdlen, &rdata))) ++ { ++ fprintf (mem.out, "error: malformed record header\n"); ++ goto out; ++ } ++ /* Skip non-matching record types. */ ++ if (rtype != qtype || rclass != qclass) ++ continue; ++ switch (rtype) ++ { ++ case T_A: ++ if (rdlen == 4) ++ fprintf (mem.out, "address: %d.%d.%d.%d\n", ++ rdata.data[0], ++ rdata.data[1], ++ rdata.data[2], ++ rdata.data[3]); ++ else ++ fprintf (mem.out, "error: A record of size %d: %s\n", rdlen, rname.name); ++ break; ++ case T_AAAA: ++ { ++ char buf[100]; ++ if (inet_ntop (AF_INET6, rdata.data, buf, sizeof (buf)) == NULL) ++ fprintf (mem.out, "error: AAAA record decoding failed: %m\n"); ++ else ++ fprintf (mem.out, "address: %s\n", buf); ++ } ++ break; ++ case T_CNAME: ++ case T_PTR: ++ { ++ struct dname name; ++ if (extract_name (full, &in, &name)) ++ fprintf (mem.out, "name: %s\n", name.name); ++ else ++ fprintf (mem.out, "error: malformed CNAME/PTR record\n"); ++ } ++ } ++ } ++ ++ out: ++ xfclose_memstream (&mem); ++ return mem.buffer; ++} +diff --git a/support/support_format_herrno.c b/support/support_format_herrno.c +new file mode 100644 +index 0000000..493d6ae +--- /dev/null ++++ b/support/support_format_herrno.c +@@ -0,0 +1,45 @@ ++/* Convert a h_errno error code to a string. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++ ++char * ++support_format_herrno (int code) ++{ ++ const char *errstr; ++ switch (code) ++ { ++ case HOST_NOT_FOUND: ++ errstr = "HOST_NOT_FOUND"; ++ break; ++ case NO_ADDRESS: ++ errstr = "NO_ADDRESS"; ++ break; ++ case NO_RECOVERY: ++ errstr = "NO_RECOVERY"; ++ break; ++ case TRY_AGAIN: ++ errstr = "TRY_AGAIN"; ++ break; ++ default: ++ return xasprintf ("\n", code); ++ } ++ return xstrdup (errstr); ++} +diff --git a/support/support_format_hostent.c b/support/support_format_hostent.c +new file mode 100644 +index 0000000..5b5f260 +--- /dev/null ++++ b/support/support_format_hostent.c +@@ -0,0 +1,75 @@ ++/* Convert a struct hostent object to a string. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++static int ++address_length (int family) ++{ ++ switch (family) ++ { ++ case AF_INET: ++ return 4; ++ case AF_INET6: ++ return 16; ++ } ++ return -1; ++} ++ ++char * ++support_format_hostent (struct hostent *h) ++{ ++ if (h == NULL) ++ { ++ char *value = support_format_herrno (h_errno); ++ char *result = xasprintf ("error: %s\n", value); ++ free (value); ++ return result; ++ } ++ ++ struct xmemstream mem; ++ xopen_memstream (&mem); ++ ++ fprintf (mem.out, "name: %s\n", h->h_name); ++ for (char **alias = h->h_aliases; *alias != NULL; ++alias) ++ fprintf (mem.out, "alias: %s\n", *alias); ++ for (unsigned i = 0; h->h_addr_list[i] != NULL; ++i) ++ { ++ char buf[128]; ++ if (inet_ntop (h->h_addrtype, h->h_addr_list[i], ++ buf, sizeof (buf)) == NULL) ++ fprintf (mem.out, "error: inet_ntop failed: %m\n"); ++ else ++ fprintf (mem.out, "address: %s\n", buf); ++ } ++ if (h->h_length != address_length (h->h_addrtype)) ++ { ++ char *family = support_format_address_family (h->h_addrtype); ++ fprintf (mem.out, "error: invalid address length %d for %s\n", ++ h->h_length, family); ++ free (family); ++ } ++ ++ xfclose_memstream (&mem); ++ return mem.buffer; ++} +diff --git a/support/support_format_netent.c b/support/support_format_netent.c +new file mode 100644 +index 0000000..020f572 +--- /dev/null ++++ b/support/support_format_netent.c +@@ -0,0 +1,52 @@ ++/* Convert a struct netent object to a string. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++char * ++support_format_netent (struct netent *e) ++{ ++ if (e == NULL) ++ { ++ char *value = support_format_herrno (h_errno); ++ char *result = xasprintf ("error: %s\n", value); ++ free (value); ++ return result; ++ } ++ ++ struct xmemstream mem; ++ xopen_memstream (&mem); ++ ++ if (e->n_name != NULL) ++ fprintf (mem.out, "name: %s\n", e->n_name); ++ for (char **ap = e->n_aliases; *ap != NULL; ++ap) ++ fprintf (mem.out, "alias: %s\n", *ap); ++ if (e->n_addrtype != AF_INET) ++ fprintf (mem.out, "addrtype: %d\n", e->n_addrtype); ++ /* On alpha, e->n_net is an unsigned long. */ ++ unsigned int n_net = e->n_net; ++ fprintf (mem.out, "net: 0x%08x\n", n_net); ++ ++ xfclose_memstream (&mem); ++ return mem.buffer; ++} +diff --git a/support/support_record_failure.c b/support/support_record_failure.c +new file mode 100644 +index 0000000..684055c +--- /dev/null ++++ b/support/support_record_failure.c +@@ -0,0 +1,106 @@ ++/* Global test failure counter. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* This structure keeps track of test failures. The counter is ++ incremented on each failure. The failed member is set to true if a ++ failure is detected, so that even if the counter wraps around to ++ zero, the failure of a test can be detected. ++ ++ The init constructor function below puts *state on a shared ++ annonymous mapping, so that failure reports from subprocesses ++ propagate to the parent process. */ ++struct test_failures ++{ ++ unsigned int counter; ++ unsigned int failed; ++}; ++static struct test_failures *state; ++ ++static __attribute__ ((constructor)) void ++init (void) ++{ ++ void *ptr = mmap (NULL, sizeof (*state), PROT_READ | PROT_WRITE, ++ MAP_ANONYMOUS | MAP_SHARED, -1, 0); ++ if (ptr == MAP_FAILED) ++ { ++ printf ("error: could not map %zu bytes: %m\n", sizeof (*state)); ++ exit (1); ++ } ++ /* Zero-initialization of the struct is sufficient. */ ++ state = ptr; ++} ++ ++void ++support_record_failure (void) ++{ ++ if (state == NULL) ++ { ++ write_message ++ ("error: support_record_failure called without initialization\n"); ++ _exit (1); ++ } ++ /* Relaxed MO is sufficient because we are only interested in the ++ values themselves, in isolation. */ ++ __atomic_store_n (&state->failed, 1, __ATOMIC_RELEASE); ++ __atomic_add_fetch (&state->counter, 1, __ATOMIC_RELEASE); ++} ++ ++int ++support_report_failure (int status) ++{ ++ if (state == NULL) ++ { ++ write_message ++ ("error: support_report_failure called without initialization\n"); ++ return 1; ++ } ++ ++ /* Relaxed MO is sufficient because acquire test result reporting ++ assumes that exiting from the main thread happens before the ++ error reporting via support_record_failure, which requires some ++ form of external synchronization. */ ++ bool failed = __atomic_load_n (&state->failed, __ATOMIC_RELAXED); ++ if (failed) ++ printf ("error: %u test failures\n", ++ __atomic_load_n (&state->counter, __ATOMIC_RELAXED)); ++ ++ if ((status == 0 || status == EXIT_UNSUPPORTED) && failed) ++ /* If we have a recorded failure, it overrides a non-failure ++ report from the test function. */ ++ status = 1; ++ return status; ++} ++ ++void ++support_record_failure_reset (void) ++{ ++ /* Only used for testing the test framework, with external ++ synchronization, but use release MO for consistency. */ ++ __atomic_store_n (&state->failed, 0, __ATOMIC_RELAXED); ++ __atomic_add_fetch (&state->counter, 0, __ATOMIC_RELAXED); ++} +diff --git a/support/support_run_diff.c b/support/support_run_diff.c +new file mode 100644 +index 0000000..3085037 +--- /dev/null ++++ b/support/support_run_diff.c +@@ -0,0 +1,76 @@ ++/* Invoke the system diff tool to compare two strings. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static char * ++write_to_temp_file (const char *prefix, const char *str) ++{ ++ char *template = xasprintf ("run_diff-%s", prefix); ++ char *name = NULL; ++ int fd = create_temp_file (template, &name); ++ TEST_VERIFY_EXIT (fd >= 0); ++ free (template); ++ xwrite (fd, str, strlen (str)); ++ TEST_VERIFY_EXIT (close (fd) == 0); ++ return name; ++} ++ ++void ++support_run_diff (const char *left_label, const char *left, ++ const char *right_label, const char *right) ++{ ++ /* Ensure that the diff command output is ordered properly with ++ standard output. */ ++ TEST_VERIFY_EXIT (fflush (stdout) == 0); ++ ++ char *left_path = write_to_temp_file ("left-diff", left); ++ char *right_path = write_to_temp_file ("right-diff", right); ++ ++ pid_t pid = xfork (); ++ if (pid == 0) ++ { ++ execlp ("diff", "diff", "-u", ++ "--label", left_label, "--label", right_label, ++ "--", left_path, right_path, ++ NULL); ++ _exit (17); ++ } ++ else ++ { ++ int status; ++ xwaitpid (pid, &status, 0); ++ if (!WIFEXITED (status) || WEXITSTATUS (status) != 1) ++ printf ("warning: could not run diff, exit status: %d\n" ++ "*** %s ***\n%s\n" ++ "*** %s ***\n%s\n", ++ status, left_label, left, right_label, right); ++ } ++ ++ free (right_path); ++ free (left_path); ++} +diff --git a/support/support_test_main.c b/support/support_test_main.c +new file mode 100644 +index 0000000..914d64f +--- /dev/null ++++ b/support/support_test_main.c +@@ -0,0 +1,423 @@ ++/* Main worker function for the test driver. ++ Copyright (C) 1998-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static const struct option default_options[] = ++{ ++ TEST_DEFAULT_OPTIONS ++ { NULL, 0, NULL, 0 } ++}; ++ ++/* Show people how to run the program. */ ++static void ++usage (const struct option *options) ++{ ++ size_t i; ++ ++ printf ("Usage: %s [options]\n" ++ "\n" ++ "Environment Variables:\n" ++ " TIMEOUTFACTOR An integer used to scale the timeout\n" ++ " TMPDIR Where to place temporary files\n" ++ " TEST_COREDUMPS Do not disable coredumps if set\n" ++ "\n", ++ program_invocation_short_name); ++ printf ("Options:\n"); ++ for (i = 0; options[i].name; ++i) ++ { ++ int indent; ++ ++ indent = printf (" --%s", options[i].name); ++ if (options[i].has_arg == required_argument) ++ indent += printf (" "); ++ printf ("%*s", 25 - indent, ""); ++ switch (options[i].val) ++ { ++ case 'v': ++ printf ("Increase the output verbosity"); ++ break; ++ case OPT_DIRECT: ++ printf ("Run the test directly (instead of forking & monitoring)"); ++ break; ++ case OPT_TESTDIR: ++ printf ("Override the TMPDIR env var"); ++ break; ++ } ++ printf ("\n"); ++ } ++} ++ ++/* The PID of the test process. */ ++static pid_t test_pid; ++ ++/* The cleanup handler passed to test_main. */ ++static void (*cleanup_function) (void); ++ ++/* Timeout handler. We kill the child and exit with an error. */ ++static void ++__attribute__ ((noreturn)) ++signal_handler (int sig) ++{ ++ int killed; ++ int status; ++ ++ assert (test_pid > 1); ++ /* Kill the whole process group. */ ++ kill (-test_pid, SIGKILL); ++ /* In case setpgid failed in the child, kill it individually too. */ ++ kill (test_pid, SIGKILL); ++ ++ /* Wait for it to terminate. */ ++ int i; ++ for (i = 0; i < 5; ++i) ++ { ++ killed = waitpid (test_pid, &status, WNOHANG|WUNTRACED); ++ if (killed != 0) ++ break; ++ ++ /* Delay, give the system time to process the kill. If the ++ nanosleep() call return prematurely, all the better. We ++ won't restart it since this probably means the child process ++ finally died. */ ++ struct timespec ts; ++ ts.tv_sec = 0; ++ ts.tv_nsec = 100000000; ++ nanosleep (&ts, NULL); ++ } ++ if (killed != 0 && killed != test_pid) ++ { ++ printf ("Failed to kill test process: %m\n"); ++ exit (1); ++ } ++ ++ if (cleanup_function != NULL) ++ cleanup_function (); ++ ++ if (sig == SIGINT) ++ { ++ signal (sig, SIG_DFL); ++ raise (sig); ++ } ++ ++ if (killed == 0 || (WIFSIGNALED (status) && WTERMSIG (status) == SIGKILL)) ++ puts ("Timed out: killed the child process"); ++ else if (WIFSTOPPED (status)) ++ printf ("Timed out: the child process was %s\n", ++ strsignal (WSTOPSIG (status))); ++ else if (WIFSIGNALED (status)) ++ printf ("Timed out: the child process got signal %s\n", ++ strsignal (WTERMSIG (status))); ++ else ++ printf ("Timed out: killed the child process but it exited %d\n", ++ WEXITSTATUS (status)); ++ ++ /* Exit with an error. */ ++ exit (1); ++} ++ ++/* Run test_function or test_function_argv. */ ++static int ++run_test_function (int argc, char **argv, const struct test_config *config) ++{ ++ if (config->test_function != NULL) ++ return config->test_function (); ++ else if (config->test_function_argv != NULL) ++ return config->test_function_argv (argc, argv); ++ else ++ { ++ printf ("error: no test function defined\n"); ++ exit (1); ++ } ++} ++ ++static bool test_main_called; ++ ++const char *test_dir = NULL; ++unsigned int test_verbose = 0; ++ ++/* If test failure reporting has been linked in, it may contribute ++ additional test failures. */ ++static int ++adjust_exit_status (int status) ++{ ++ if (support_report_failure != NULL) ++ return support_report_failure (status); ++ return status; ++} ++ ++int ++support_test_main (int argc, char **argv, const struct test_config *config) ++{ ++ if (test_main_called) ++ { ++ printf ("error: test_main called for a second time\n"); ++ exit (1); ++ } ++ test_main_called = true; ++ const struct option *options; ++ if (config->options != NULL) ++ options = config->options; ++ else ++ options = default_options; ++ ++ cleanup_function = config->cleanup_function; ++ ++ int direct = 0; /* Directly call the test function? */ ++ int status; ++ int opt; ++ unsigned int timeoutfactor = 1; ++ pid_t termpid; ++ ++ if (!config->no_mallopt) ++ { ++ /* Make uses of freed and uninitialized memory known. Do not ++ pull in a definition for mallopt if it has not been defined ++ already. */ ++ extern __typeof__ (mallopt) mallopt __attribute__ ((weak)); ++ if (mallopt != NULL) ++ mallopt (M_PERTURB, 42); ++ } ++ ++ while ((opt = getopt_long (argc, argv, "+", options, NULL)) != -1) ++ switch (opt) ++ { ++ case '?': ++ usage (options); ++ exit (1); ++ case 'v': ++ ++test_verbose; ++ break; ++ case OPT_DIRECT: ++ direct = 1; ++ break; ++ case OPT_TESTDIR: ++ test_dir = optarg; ++ break; ++ default: ++ if (config->cmdline_function != NULL) ++ config->cmdline_function (opt); ++ } ++ ++ /* If set, read the test TIMEOUTFACTOR value from the environment. ++ This value is used to scale the default test timeout values. */ ++ char *envstr_timeoutfactor = getenv ("TIMEOUTFACTOR"); ++ if (envstr_timeoutfactor != NULL) ++ { ++ char *envstr_conv = envstr_timeoutfactor; ++ unsigned long int env_fact; ++ ++ env_fact = strtoul (envstr_timeoutfactor, &envstr_conv, 0); ++ if (*envstr_conv == '\0' && envstr_conv != envstr_timeoutfactor) ++ timeoutfactor = MAX (env_fact, 1); ++ } ++ ++ /* Set TMPDIR to specified test directory. */ ++ if (test_dir != NULL) ++ { ++ setenv ("TMPDIR", test_dir, 1); ++ ++ if (chdir (test_dir) < 0) ++ { ++ printf ("chdir: %m\n"); ++ exit (1); ++ } ++ } ++ else ++ { ++ test_dir = getenv ("TMPDIR"); ++ if (test_dir == NULL || test_dir[0] == '\0') ++ test_dir = "/tmp"; ++ } ++ if (support_set_test_dir != NULL) ++ support_set_test_dir (test_dir); ++ ++ int timeout = config->timeout; ++ if (timeout == 0) ++ timeout = DEFAULT_TIMEOUT; ++ ++ /* Make sure we see all message, even those on stdout. */ ++ setvbuf (stdout, NULL, _IONBF, 0); ++ ++ /* Make sure temporary files are deleted. */ ++ if (support_delete_temp_files != NULL) ++ atexit (support_delete_temp_files); ++ ++ /* Correct for the possible parameters. */ ++ argv[optind - 1] = argv[0]; ++ argv += optind - 1; ++ argc -= optind - 1; ++ ++ /* Call the initializing function, if one is available. */ ++ if (config->prepare_function != NULL) ++ config->prepare_function (argc, argv); ++ ++ const char *envstr_direct = getenv ("TEST_DIRECT"); ++ if (envstr_direct != NULL) ++ { ++ FILE *f = fopen (envstr_direct, "w"); ++ if (f == NULL) ++ { ++ printf ("cannot open TEST_DIRECT output file '%s': %m\n", ++ envstr_direct); ++ exit (1); ++ } ++ ++ fprintf (f, "timeout=%u\ntimeoutfactor=%u\n", ++ config->timeout, timeoutfactor); ++ if (config->expected_status != 0) ++ fprintf (f, "exit=%u\n", config->expected_status); ++ if (config->expected_signal != 0) ++ fprintf (f, "signal=%s\n", strsignal (config->expected_signal)); ++ ++ if (support_print_temp_files != NULL) ++ support_print_temp_files (f); ++ ++ fclose (f); ++ direct = 1; ++ } ++ ++ bool disable_coredumps; ++ { ++ const char *coredumps = getenv ("TEST_COREDUMPS"); ++ disable_coredumps = coredumps == NULL || coredumps[0] == '\0'; ++ } ++ ++ /* If we are not expected to fork run the function immediately. */ ++ if (direct) ++ return adjust_exit_status (run_test_function (argc, argv, config)); ++ ++ /* Set up the test environment: ++ - prevent core dumps ++ - set up the timer ++ - fork and execute the function. */ ++ ++ test_pid = fork (); ++ if (test_pid == 0) ++ { ++ /* This is the child. */ ++ if (disable_coredumps) ++ { ++ /* Try to avoid dumping core. This is necessary because we ++ run the test from the source tree, and the coredumps ++ would end up there (and not in the build tree). */ ++ struct rlimit core_limit; ++ core_limit.rlim_cur = 0; ++ core_limit.rlim_max = 0; ++ setrlimit (RLIMIT_CORE, &core_limit); ++ } ++ ++ /* We put the test process in its own pgrp so that if it bogusly ++ generates any job control signals, they won't hit the whole build. */ ++ if (setpgid (0, 0) != 0) ++ printf ("Failed to set the process group ID: %m\n"); ++ ++ /* Execute the test function and exit with the return value. */ ++ exit (run_test_function (argc, argv, config)); ++ } ++ else if (test_pid < 0) ++ { ++ printf ("Cannot fork test program: %m\n"); ++ exit (1); ++ } ++ ++ /* Set timeout. */ ++ signal (SIGALRM, signal_handler); ++ alarm (timeout * timeoutfactor); ++ ++ /* Make sure we clean up if the wrapper gets interrupted. */ ++ signal (SIGINT, signal_handler); ++ ++ /* Wait for the regular termination. */ ++ termpid = TEMP_FAILURE_RETRY (waitpid (test_pid, &status, 0)); ++ if (termpid == -1) ++ { ++ printf ("Waiting for test program failed: %m\n"); ++ exit (1); ++ } ++ if (termpid != test_pid) ++ { ++ printf ("Oops, wrong test program terminated: expected %ld, got %ld\n", ++ (long int) test_pid, (long int) termpid); ++ exit (1); ++ } ++ ++ /* Process terminated normaly without timeout etc. */ ++ if (WIFEXITED (status)) ++ { ++ if (config->expected_status == 0) ++ { ++ if (config->expected_signal == 0) ++ /* Exit with the return value of the test. */ ++ return adjust_exit_status (WEXITSTATUS (status)); ++ else ++ { ++ printf ("Expected signal '%s' from child, got none\n", ++ strsignal (config->expected_signal)); ++ exit (1); ++ } ++ } ++ else ++ { ++ /* Non-zero exit status is expected */ ++ if (WEXITSTATUS (status) != config->expected_status) ++ { ++ printf ("Expected status %d, got %d\n", ++ config->expected_status, WEXITSTATUS (status)); ++ exit (1); ++ } ++ } ++ return adjust_exit_status (0); ++ } ++ /* Process was killed by timer or other signal. */ ++ else ++ { ++ if (config->expected_signal == 0) ++ { ++ printf ("Didn't expect signal from child: got `%s'\n", ++ strsignal (WTERMSIG (status))); ++ exit (1); ++ } ++ else if (WTERMSIG (status) != config->expected_signal) ++ { ++ printf ("Incorrect signal from child: got `%s', need `%s'\n", ++ strsignal (WTERMSIG (status)), ++ strsignal (config->expected_signal)); ++ exit (1); ++ } ++ ++ return adjust_exit_status (0); ++ } ++} +diff --git a/support/support_test_verify_impl.c b/support/support_test_verify_impl.c +new file mode 100644 +index 0000000..5bae38f +--- /dev/null ++++ b/support/support_test_verify_impl.c +@@ -0,0 +1,33 @@ ++/* Implementation of the TEST_VERIFY and TEST_VERIFY_EXIT macros. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++ ++void ++support_test_verify_impl (int status, const char *file, int line, ++ const char *expr) ++{ ++ support_record_failure (); ++ printf ("error: %s:%d: not true: %s\n", file, line, expr); ++ if (status >= 0) ++ exit (status); ++ ++} +diff --git a/support/temp_file-internal.h b/support/temp_file-internal.h +new file mode 100644 +index 0000000..fb6cceb +--- /dev/null ++++ b/support/temp_file-internal.h +@@ -0,0 +1,31 @@ ++/* Internal weak declarations for temporary file handling. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_TEMP_FILE_INTERNAL_H ++#define SUPPORT_TEMP_FILE_INTERNAL_H ++ ++/* These functions are called by the test driver if they are ++ defined. Tests should not call them directly. */ ++ ++#include ++ ++void support_set_test_dir (const char *name) __attribute__ ((weak)); ++void support_delete_temp_files (void) __attribute__ ((weak)); ++void support_print_temp_files (FILE *) __attribute__ ((weak)); ++ ++#endif /* SUPPORT_TEMP_FILE_INTERNAL_H */ +diff --git a/support/temp_file.c b/support/temp_file.c +new file mode 100644 +index 0000000..f06647a +--- /dev/null ++++ b/support/temp_file.c +@@ -0,0 +1,125 @@ ++/* Temporary file handling for tests. ++ Copyright (C) 1998-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This is required to get an mkstemp which can create large files on ++ some 32-bit platforms. */ ++#define _FILE_OFFSET_BITS 64 ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* List of temporary files. */ ++static struct temp_name_list ++{ ++ struct qelem q; ++ char *name; ++} *temp_name_list; ++ ++/* Location of the temporary files. Set by the test skeleton via ++ support_set_test_dir. The string is not be freed. */ ++static const char *test_dir = _PATH_TMP; ++ ++void ++add_temp_file (const char *name) ++{ ++ struct temp_name_list *newp ++ = (struct temp_name_list *) xcalloc (sizeof (*newp), 1); ++ char *newname = strdup (name); ++ if (newname != NULL) ++ { ++ newp->name = newname; ++ if (temp_name_list == NULL) ++ temp_name_list = (struct temp_name_list *) &newp->q; ++ else ++ insque (newp, temp_name_list); ++ } ++ else ++ free (newp); ++} ++ ++int ++create_temp_file (const char *base, char **filename) ++{ ++ char *fname; ++ int fd; ++ ++ fname = (char *) xmalloc (strlen (test_dir) + 1 + strlen (base) ++ + sizeof ("XXXXXX")); ++ strcpy (stpcpy (stpcpy (stpcpy (fname, test_dir), "/"), base), "XXXXXX"); ++ ++ fd = mkstemp (fname); ++ if (fd == -1) ++ { ++ printf ("cannot open temporary file '%s': %m\n", fname); ++ free (fname); ++ return -1; ++ } ++ ++ add_temp_file (fname); ++ if (filename != NULL) ++ *filename = fname; ++ else ++ free (fname); ++ ++ return fd; ++} ++ ++/* Helper functions called by the test skeleton follow. */ ++ ++void ++support_set_test_dir (const char *path) ++{ ++ test_dir = path; ++} ++ ++void ++support_delete_temp_files (void) ++{ ++ while (temp_name_list != NULL) ++ { ++ remove (temp_name_list->name); ++ free (temp_name_list->name); ++ ++ struct temp_name_list *next ++ = (struct temp_name_list *) temp_name_list->q.q_forw; ++ free (temp_name_list); ++ temp_name_list = next; ++ } ++} ++ ++void ++support_print_temp_files (FILE *f) ++{ ++ if (temp_name_list != NULL) ++ { ++ struct temp_name_list *n; ++ fprintf (f, "temp_files=(\n"); ++ for (n = temp_name_list; ++ n != NULL; ++ n = (struct temp_name_list *) n->q.q_forw) ++ fprintf (f, " '%s'\n", n->name); ++ fprintf (f, ")\n"); ++ } ++} +diff --git a/support/temp_file.h b/support/temp_file.h +new file mode 100644 +index 0000000..6fed8df +--- /dev/null ++++ b/support/temp_file.h +@@ -0,0 +1,37 @@ ++/* Declarations for temporary file handling. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_TEMP_FILE_H ++#define SUPPORT_TEMP_FILE_H ++ ++#include ++ ++__BEGIN_DECLS ++ ++/* Schedule a temporary file for deletion on exit. */ ++void add_temp_file (const char *name); ++ ++/* Create a temporary file. Return the opened file descriptor on ++ success, or -1 on failure. Write the file name to *FILENAME if ++ FILENAME is not NULL. In this case, the caller is expected to free ++ *FILENAME. */ ++int create_temp_file (const char *base, char **filename); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_TEMP_FILE_H */ +diff --git a/support/test-driver.c b/support/test-driver.c +new file mode 100644 +index 0000000..482066d +--- /dev/null ++++ b/support/test-driver.c +@@ -0,0 +1,156 @@ ++/* Main function for test programs. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This file should be included from test cases. It will define a ++ main function which provides the test wrapper. ++ ++ It assumes that the test case defines a function ++ ++ int do_test (void); ++ ++ and arranges for that function being called under the test wrapper. ++ The do_test function should return 0 to indicate a passing test, 1 ++ to indicate a failing test, or 77 to indicate an unsupported test. ++ Other result values could be used to indicate a failing test, but ++ the result of the expression is passed to exit and exit only ++ returns the lower 8 bits of its input. A non-zero return with some ++ values could cause a test to incorrectly be considered passing when ++ it really failed. For this reason, the function should always ++ return 0 (EXIT_SUCCESS), 1 (EXIT_FAILURE), or 77 ++ (EXIT_UNSUPPORTED). ++ ++ The test function may print out diagnostic or warning messages as well ++ as messages about failures. These messages should be printed to stdout ++ and not stderr so that the output is properly ordered with respect to ++ the rest of the glibc testsuite run output. ++ ++ Several preprocessors macros can be defined before including this ++ file. ++ ++ The name of the do_test function can be changed with the ++ TEST_FUNCTION macro. It must expand to the desired function name. ++ ++ If the test case needs access to command line parameters, it must ++ define the TEST_FUNCTION_ARGV macro with the name of the test ++ function. It must have the following type: ++ ++ int TEST_FUNCTION_ARGV (int argc, char **argv); ++ ++ This overrides the do_test default function and is incompatible ++ with the TEST_FUNCTION macro. ++ ++ If PREPARE is defined, it must expand to the name of a function of ++ the type ++ ++ void PREPARE (int argc, char **); ++ ++ This function will be called early, after parsing the command line, ++ but before running the test, in the parent process which acts as ++ the test supervisor. ++ ++ If CLEANUP_HANDLER is defined, it must expand to the name of a ++ function of the type ++ ++ void CLEANUP_HANDLER (void); ++ ++ This function will be called from the timeout (SIGALRM) signal ++ handler. ++ ++ If EXPECTED_SIGNAL is defined, it must expanded to a constant which ++ denotes the expected signal number. ++ ++ If EXPECTED_STATUS is defined, it must expand to the expected exit ++ status. ++ ++ If TIMEOUT is defined, it must be positive constant. It overrides ++ the default test timeout and is measured in seconds. ++ ++ If TEST_NO_MALLOPT is defined, the test wrapper will not call ++ mallopt. ++ ++ Custom command line handling can be implemented by defining the ++ CMDLINE_OPTION macro (after including the header; this ++ requires _GNU_SOURCE to be defined). This macro must expand to a ++ to a comma-separated list of braced initializers for struct option ++ from , with a trailing comma. CMDLINE_PROCESS can be ++ defined as the name of a function which is called to process these ++ options. The function is passed the option character/number and ++ has this type: ++ ++ void CMDLINE_PROCESS (int); ++*/ ++ ++#include ++ ++#include ++ ++int ++main (int argc, char **argv) ++{ ++ struct test_config test_config; ++ memset (&test_config, 0, sizeof (test_config)); ++ ++#ifdef PREPARE ++ test_config.prepare_function = (PREPARE); ++#endif ++ ++#if defined (TEST_FUNCTION) && defined (TEST_FUNCTON_ARGV) ++# error TEST_FUNCTION and TEST_FUNCTION_ARGV cannot be defined at the same time ++#endif ++#if defined (TEST_FUNCTION) ++ test_config.test_function = TEST_FUNCTION; ++#elif defined (TEST_FUNCTION_ARGV) ++ test_config.test_function_argv = TEST_FUNCTION_ARGV; ++#else ++ test_config.test_function = do_test; ++#endif ++ ++#ifdef CLEANUP_HANDLER ++ test_config.cleanup_function = CLEANUP_HANDLER; ++#endif ++ ++#ifdef EXPECTED_SIGNAL ++ test_config.expected_signal = (EXPECTED_SIGNAL); ++#endif ++ ++#ifdef EXPECTED_STATUS ++ test_config.expected_status = (EXPECTED_STATUS); ++#endif ++ ++#ifdef TEST_NO_MALLOPT ++ test_config.no_mallopt = 1; ++#endif ++ ++#ifdef TIMEOUT ++ test_config.timeout = TIMEOUT; ++#endif ++ ++#ifdef CMDLINE_OPTIONS ++ struct option options[] = ++ { ++ CMDLINE_OPTIONS ++ TEST_DEFAULT_OPTIONS ++ }; ++ test_config.options = &options; ++#endif ++#ifdef CMDLINE_PROCESS ++ test_config.cmdline_function = CMDLINE_PROCESS; ++#endif ++ ++ return support_test_main (argc, argv, &test_config); ++} +diff --git a/support/test-driver.h b/support/test-driver.h +new file mode 100644 +index 0000000..af1971a +--- /dev/null ++++ b/support/test-driver.h +@@ -0,0 +1,74 @@ ++/* Interfaces for the test driver. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_TEST_DRIVER_H ++#define SUPPORT_TEST_DRIVER_H ++ ++#include ++ ++__BEGIN_DECLS ++ ++struct test_config ++{ ++ void (*prepare_function) (int argc, char **argv); ++ int (*test_function) (void); ++ int (*test_function_argv) (int argc, char **argv); ++ void (*cleanup_function) (void); ++ void (*cmdline_function) (int); ++ const void *options; /* Custom options if not NULL. */ ++ int timeout; /* Test timeout in seconds. */ ++ int expected_status; /* Expected exit status. */ ++ int expected_signal; /* If non-zero, expect termination by signal. */ ++ char no_mallopt; /* Boolean flag to disable mallopt. */ ++}; ++ ++enum ++ { ++ /* Test exit status which indicates that the feature is ++ unsupported. */ ++ EXIT_UNSUPPORTED = 77, ++ ++ /* Default timeout is twenty seconds. Tests should normally ++ complete faster than this, but if they don't, that's abnormal ++ (a bug) anyways. */ ++ DEFAULT_TIMEOUT = 20, ++ ++ /* Used for command line argument parsing. */ ++ OPT_DIRECT = 1000, ++ OPT_TESTDIR, ++ }; ++ ++/* Options provided by the test driver. */ ++#define TEST_DEFAULT_OPTIONS \ ++ { "verbose", no_argument, NULL, 'v' }, \ ++ { "direct", no_argument, NULL, OPT_DIRECT }, \ ++ { "test-dir", required_argument, NULL, OPT_TESTDIR }, \ ++ ++/* The directory the test should use for temporary files. */ ++extern const char *test_dir; ++ ++/* The number of --verbose arguments specified during program ++ invocation. This variable can be used to control the verbosity of ++ tests. */ ++extern unsigned int test_verbose; ++ ++int support_test_main (int argc, char **argv, const struct test_config *); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_TEST_DRIVER_H */ +diff --git a/support/tst-support-namespace.c b/support/tst-support-namespace.c +new file mode 100644 +index 0000000..a50b074 +--- /dev/null ++++ b/support/tst-support-namespace.c +@@ -0,0 +1,34 @@ ++/* Test entering namespaces. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ if (support_become_root ()) ++ printf ("info: acquired root-like privileges\n"); ++ if (support_enter_network_namespace ()) ++ printf ("info: entered network namespace\n"); ++ if (support_in_uts_namespace ()) ++ printf ("info: also entered UTS namespace\n"); ++ return 0; ++} ++ ++#include +diff --git a/support/tst-support_record_failure-2.sh b/support/tst-support_record_failure-2.sh +new file mode 100644 +index 0000000..1751377 +--- /dev/null ++++ b/support/tst-support_record_failure-2.sh +@@ -0,0 +1,69 @@ ++#!/bin/sh ++# Test failure recording (with and without --direct). ++# Copyright (C) 2016-2017 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++ ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . */ ++ ++set -e ++ ++common_objpfx=$1; shift ++test_program_prefix_before_env=$1; shift ++run_program_env=$1; shift ++test_program_prefix_after_env=$1; shift ++ ++run_test () { ++ expected_status="$1" ++ expected_output="$2" ++ shift 2 ++ args="${common_objpfx}support/tst-support_record_failure $*" ++ echo "running: $args" ++ set +e ++ output="$(${test_program_prefix_before_env} \ ++ ${run_program} ${test_program_prefix_after_env} $args)" ++ status=$? ++ set -e ++ echo " exit status: $status" ++ if test "$output" != "$expected_output" ; then ++ echo "error: unexpected ouput: $output" ++ exit 1 ++ fi ++ if test "$status" -ne "$expected_status" ; then ++ echo "error: exit status $expected_status expected" ++ exit 1 ++ fi ++} ++ ++different_status () { ++ direct="$1" ++ run_test 1 "error: 1 test failures" $direct --status=0 ++ run_test 1 "error: 1 test failures" $direct --status=1 ++ run_test 2 "error: 1 test failures" $direct --status=2 ++ run_test 1 "error: 1 test failures" $direct --status=77 ++ run_test 2 "error: tst-support_record_failure.c:108: not true: false ++error: 1 test failures" $direct --test-verify ++ run_test 2 "error: tst-support_record_failure.c:108: not true: false ++info: execution passed failed TEST_VERIFY ++error: 1 test failures" $direct --test-verify --verbose ++} ++ ++different_status ++different_status --direct ++ ++run_test 1 "error: tst-support_record_failure.c:115: not true: false ++error: 1 test failures" --test-verify-exit ++# --direct does not print the summary error message if exit is called. ++run_test 1 "error: tst-support_record_failure.c:115: not true: false" \ ++ --direct --test-verify-exit +diff --git a/support/tst-support_record_failure.c b/support/tst-support_record_failure.c +new file mode 100644 +index 0000000..62d8e1f +--- /dev/null ++++ b/support/tst-support_record_failure.c +@@ -0,0 +1,152 @@ ++/* Test support_record_failure state sharing. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++static int exit_status_with_failure = -1; ++static bool test_verify; ++static bool test_verify_exit; ++enum ++ { ++ OPT_STATUS = 10001, ++ OPT_TEST_VERIFY, ++ OPT_TEST_VERIFY_EXIT, ++ }; ++#define CMDLINE_OPTIONS \ ++ { "status", required_argument, NULL, OPT_STATUS }, \ ++ { "test-verify", no_argument, NULL, OPT_TEST_VERIFY }, \ ++ { "test-verify-exit", no_argument, NULL, OPT_TEST_VERIFY_EXIT }, ++static void ++cmdline_process (int c) ++{ ++ switch (c) ++ { ++ case OPT_STATUS: ++ exit_status_with_failure = atoi (optarg); ++ break; ++ case OPT_TEST_VERIFY: ++ test_verify = true; ++ break; ++ case OPT_TEST_VERIFY_EXIT: ++ test_verify_exit = true; ++ break; ++ } ++} ++#define CMDLINE_PROCESS cmdline_process ++ ++static void ++check_failure_reporting (int phase, int zero, int unsupported) ++{ ++ int status = support_report_failure (0); ++ if (status != zero) ++ { ++ printf ("real-error (phase %d): support_report_failure (0) == %d\n", ++ phase, status); ++ exit (1); ++ } ++ status = support_report_failure (1); ++ if (status != 1) ++ { ++ printf ("real-error (phase %d): support_report_failure (1) == %d\n", ++ phase, status); ++ exit (1); ++ } ++ status = support_report_failure (2); ++ if (status != 2) ++ { ++ printf ("real-error (phase %d): support_report_failure (2) == %d\n", ++ phase, status); ++ exit (1); ++ } ++ status = support_report_failure (EXIT_UNSUPPORTED); ++ if (status != unsupported) ++ { ++ printf ("real-error (phase %d): " ++ "support_report_failure (EXIT_UNSUPPORTED) == %d\n", ++ phase, status); ++ exit (1); ++ } ++} ++ ++static int ++do_test (void) ++{ ++ if (exit_status_with_failure >= 0) ++ { ++ /* External invocation with requested error status. Used by ++ tst-support_report_failure-2.sh. */ ++ support_record_failure (); ++ return exit_status_with_failure; ++ } ++ TEST_VERIFY (true); ++ TEST_VERIFY_EXIT (true); ++ if (test_verify) ++ { ++ TEST_VERIFY (false); ++ if (test_verbose) ++ printf ("info: execution passed failed TEST_VERIFY\n"); ++ return 2; /* Expected exit status. */ ++ } ++ if (test_verify_exit) ++ { ++ TEST_VERIFY_EXIT (false); ++ return 3; /* Not reached. Expected exit status is 1. */ ++ } ++ ++ printf ("info: This test tests the test framework.\n" ++ "info: It reports some expected errors on stdout.\n"); ++ ++ /* Check that the status is passed through unchanged. */ ++ check_failure_reporting (1, 0, EXIT_UNSUPPORTED); ++ ++ /* Check state propagation from a subprocess. */ ++ pid_t pid = xfork (); ++ if (pid == 0) ++ { ++ support_record_failure (); ++ _exit (0); ++ } ++ int status; ++ xwaitpid (pid, &status, 0); ++ if (status != 0) ++ { ++ printf ("real-error: incorrect status from subprocess: %d\n", status); ++ return 1; ++ } ++ check_failure_reporting (2, 1, 1); ++ ++ /* Also test directly in the parent process. */ ++ support_record_failure_reset (); ++ check_failure_reporting (3, 0, EXIT_UNSUPPORTED); ++ support_record_failure (); ++ check_failure_reporting (4, 1, 1); ++ ++ /* We need to mask the failure above. */ ++ support_record_failure_reset (); ++ return 0; ++} ++ ++#include +diff --git a/support/write_message.c b/support/write_message.c +new file mode 100644 +index 0000000..f03ed93 +--- /dev/null ++++ b/support/write_message.c +@@ -0,0 +1,29 @@ ++/* Write a message to standard output. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++ ++void ++write_message (const char *message) ++{ ++ ssize_t unused __attribute__ ((unused)); ++ unused = write (STDOUT_FILENO, message, strlen (message)); ++} +diff --git a/support/xaccept.c b/support/xaccept.c +new file mode 100644 +index 0000000..7b25af3 +--- /dev/null ++++ b/support/xaccept.c +@@ -0,0 +1,32 @@ ++/* accept with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++int ++xaccept (int fd, struct sockaddr *sa, socklen_t *salen) ++{ ++ int clientfd = accept (fd, sa, salen); ++ if (clientfd < 0) ++ FAIL_EXIT1 ("accept (%d): %m", fd); ++ return clientfd; ++} +diff --git a/support/xasprintf.c b/support/xasprintf.c +new file mode 100644 +index 0000000..5157680 +--- /dev/null ++++ b/support/xasprintf.c +@@ -0,0 +1,36 @@ ++/* Error-checking wrapper for asprintf. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++char * ++xasprintf (const char *format, ...) ++{ ++ va_list ap; ++ va_start (ap, format); ++ char *result; ++ if (vasprintf (&result, format, ap) < 0) ++ FAIL_EXIT1 ("asprintf: %m"); ++ va_end (ap); ++ return result; ++} +diff --git a/support/xbind.c b/support/xbind.c +new file mode 100644 +index 0000000..cfc6dd8 +--- /dev/null ++++ b/support/xbind.c +@@ -0,0 +1,30 @@ ++/* bind with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++void ++xbind (int fd, const struct sockaddr *sa, socklen_t sa_len) ++{ ++ if (bind (fd, sa, sa_len) != 0) ++ FAIL_EXIT1 ("bind (%d), family %d: %m", fd, sa->sa_family); ++} +diff --git a/support/xcalloc.c b/support/xcalloc.c +new file mode 100644 +index 0000000..135f42d +--- /dev/null ++++ b/support/xcalloc.c +@@ -0,0 +1,34 @@ ++/* Error-checking wrapper for calloc. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++void * ++xcalloc (size_t n, size_t s) ++{ ++ void *p; ++ ++ p = calloc (n, s); ++ if (p == NULL) ++ oom_error ("calloc", n * s); ++ return p; ++} +diff --git a/support/xconnect.c b/support/xconnect.c +new file mode 100644 +index 0000000..0266dbc +--- /dev/null ++++ b/support/xconnect.c +@@ -0,0 +1,30 @@ ++/* connect with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++void ++xconnect (int fd, const struct sockaddr *sa, socklen_t sa_len) ++{ ++ if (connect (fd, sa, sa_len) != 0) ++ FAIL_EXIT1 ("connect (%d), family %d: %m", fd, sa->sa_family); ++} +diff --git a/support/xfclose.c b/support/xfclose.c +new file mode 100644 +index 0000000..2737f05 +--- /dev/null ++++ b/support/xfclose.c +@@ -0,0 +1,33 @@ ++/* fclose with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++ ++void ++xfclose (FILE *fp) ++{ ++ if (ferror (fp)) ++ FAIL_EXIT1 ("stdio stream closed with pending errors"); ++ if (fflush (fp) != 0) ++ FAIL_EXIT1 ("fflush: %m"); ++ if (fclose (fp) != 0) ++ FAIL_EXIT1 ("fclose: %m"); ++} +diff --git a/support/xfopen.c b/support/xfopen.c +new file mode 100644 +index 0000000..14532a0 +--- /dev/null ++++ b/support/xfopen.c +@@ -0,0 +1,31 @@ ++/* fopen with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++ ++FILE * ++xfopen (const char *path, const char *mode) ++{ ++ FILE *fp = fopen (path, mode); ++ if (fp == NULL) ++ FAIL_EXIT1 ("could not open %s (mode \"%s\"): %m", path, mode); ++ return fp; ++} +diff --git a/support/xfork.c b/support/xfork.c +new file mode 100644 +index 0000000..aa52ba6 +--- /dev/null ++++ b/support/xfork.c +@@ -0,0 +1,32 @@ ++/* fork with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++pid_t ++xfork (void) ++{ ++ pid_t result = fork (); ++ if (result < 0) ++ FAIL_EXIT1 ("fork: %m"); ++ return result; ++} +diff --git a/support/xgetsockname.c b/support/xgetsockname.c +new file mode 100644 +index 0000000..c3bd884 +--- /dev/null ++++ b/support/xgetsockname.c +@@ -0,0 +1,30 @@ ++/* getsockname with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++void ++xgetsockname (int fd, struct sockaddr *sa, socklen_t *plen) ++{ ++ if (getsockname (fd, sa, plen) != 0) ++ FAIL_EXIT1 ("setsockopt (%d): %m", fd); ++} +diff --git a/support/xlisten.c b/support/xlisten.c +new file mode 100644 +index 0000000..1953e59 +--- /dev/null ++++ b/support/xlisten.c +@@ -0,0 +1,30 @@ ++/* listen with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++void ++xlisten (int fd, int backlog) ++{ ++ if (listen (fd, backlog) != 0) ++ FAIL_EXIT1 ("listen (%d, %d): %m", fd, backlog); ++} +diff --git a/support/xmalloc.c b/support/xmalloc.c +new file mode 100644 +index 0000000..450f699 +--- /dev/null ++++ b/support/xmalloc.c +@@ -0,0 +1,34 @@ ++/* Error-checking wrapper for malloc. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++void * ++xmalloc (size_t n) ++{ ++ void *p; ++ ++ p = malloc (n); ++ if (p == NULL) ++ oom_error ("malloc", n); ++ return p; ++} +diff --git a/support/xmemstream.c b/support/xmemstream.c +new file mode 100644 +index 0000000..bce6dc9 +--- /dev/null ++++ b/support/xmemstream.c +@@ -0,0 +1,42 @@ ++/* Error-checking wrappers for memstream functions. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++void ++xopen_memstream (struct xmemstream *stream) ++{ ++ int old_errno = errno; ++ *stream = (struct xmemstream) {}; ++ stream->out = open_memstream (&stream->buffer, &stream->length); ++ if (stream->out == NULL) ++ FAIL_EXIT1 ("open_memstream: %m"); ++ errno = old_errno; ++} ++ ++void ++xfclose_memstream (struct xmemstream *stream) ++{ ++ xfclose (stream->out); ++ stream->out = NULL; ++} +diff --git a/support/xmemstream.h b/support/xmemstream.h +new file mode 100644 +index 0000000..e5ba231 +--- /dev/null ++++ b/support/xmemstream.h +@@ -0,0 +1,49 @@ ++/* Error-checking wrappers for memstream functions. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_XMEMSTREAM_H ++#define SUPPORT_XMEMSTREAM_H ++ ++#include ++#include ++ ++__BEGIN_DECLS ++ ++/* Wrappers for other libc functions. */ ++struct xmemstream ++{ ++ FILE *out; ++ char *buffer; ++ size_t length; ++}; ++ ++/* Create a new in-memory stream. Initializes *STREAM. After this ++ function returns, STREAM->out is a file descriptor open for ++ writing. errno is preserved, so that the %m format specifier can ++ be used for writing to STREAM->out. */ ++void xopen_memstream (struct xmemstream *stream); ++ ++/* Closes STREAM->OUT. After this function returns, STREAM->buffer ++ and STREAM->length denote a memory range which contains the bytes ++ written to the output stream. The caller should free ++ STREAM->buffer. */ ++void xfclose_memstream (struct xmemstream *stream); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_XMEMSTREAM_H */ +diff --git a/support/xmmap.c b/support/xmmap.c +new file mode 100644 +index 0000000..435b1eb +--- /dev/null ++++ b/support/xmmap.c +@@ -0,0 +1,31 @@ ++/* mmap with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++void * ++xmmap (void *addr, size_t length, int prot, int flags, int fd) ++{ ++ void *result = mmap (addr, length, prot, flags, fd, 0); ++ if (result == MAP_FAILED) ++ FAIL_EXIT1 ("mmap of %zu bytes, prot=0x%x, flags=0x%x: %m", ++ length, prot, flags); ++ return result; ++} +diff --git a/support/xmunmap.c b/support/xmunmap.c +new file mode 100644 +index 0000000..6ef5a4a +--- /dev/null ++++ b/support/xmunmap.c +@@ -0,0 +1,28 @@ ++/* munmap with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++void ++xmunmap (void *addr, size_t length) ++{ ++ if (munmap (addr, length) != 0) ++ FAIL_EXIT1 ("munmap of %zu bytes: %m", length); ++} +diff --git a/support/xpoll.c b/support/xpoll.c +new file mode 100644 +index 0000000..bec2521 +--- /dev/null ++++ b/support/xpoll.c +@@ -0,0 +1,32 @@ ++/* poll with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++int ++xpoll (struct pollfd *fds, nfds_t nfds, int timeout) ++{ ++ int ret = poll (fds, nfds, timeout); ++ if (ret < 0) ++ FAIL_EXIT1 ("poll: %m"); ++ return ret; ++} +diff --git a/support/xpthread_attr_destroy.c b/support/xpthread_attr_destroy.c +new file mode 100644 +index 0000000..664c809 +--- /dev/null ++++ b/support/xpthread_attr_destroy.c +@@ -0,0 +1,26 @@ ++/* pthread_attr_destroy with error checking. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_attr_destroy (pthread_attr_t *attr) ++{ ++ xpthread_check_return ("pthread_attr_destroy", ++ pthread_attr_destroy (attr)); ++} +diff --git a/support/xpthread_attr_init.c b/support/xpthread_attr_init.c +new file mode 100644 +index 0000000..2e30ade +--- /dev/null ++++ b/support/xpthread_attr_init.c +@@ -0,0 +1,25 @@ ++/* pthread_attr_init with error checking. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_attr_init (pthread_attr_t *attr) ++{ ++ xpthread_check_return ("pthread_attr_init", pthread_attr_init (attr)); ++} +diff --git a/support/xpthread_attr_setdetachstate.c b/support/xpthread_attr_setdetachstate.c +new file mode 100644 +index 0000000..b544dba +--- /dev/null ++++ b/support/xpthread_attr_setdetachstate.c +@@ -0,0 +1,27 @@ ++/* pthread_attr_setdetachstate with error checking. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_attr_setdetachstate (pthread_attr_t *attr, int detachstate) ++{ ++ xpthread_check_return ("pthread_attr_setdetachstate", ++ pthread_attr_setdetachstate (attr, ++ detachstate)); ++} +diff --git a/support/xpthread_attr_setstacksize.c b/support/xpthread_attr_setstacksize.c +new file mode 100644 +index 0000000..02d0631 +--- /dev/null ++++ b/support/xpthread_attr_setstacksize.c +@@ -0,0 +1,26 @@ ++/* pthread_attr_setstacksize with error checking. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_attr_setstacksize (pthread_attr_t *attr, size_t stacksize) ++{ ++ xpthread_check_return ("pthread_attr_setstacksize", ++ pthread_attr_setstacksize (attr, stacksize)); ++} +diff --git a/support/xpthread_barrier_destroy.c b/support/xpthread_barrier_destroy.c +new file mode 100644 +index 0000000..efc0719 +--- /dev/null ++++ b/support/xpthread_barrier_destroy.c +@@ -0,0 +1,26 @@ ++/* pthread_barrier_destroy with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_barrier_destroy (pthread_barrier_t *barrier) ++{ ++ xpthread_check_return ("pthread_barrier_destroy", ++ pthread_barrier_destroy (barrier)); ++} +diff --git a/support/xpthread_barrier_init.c b/support/xpthread_barrier_init.c +new file mode 100644 +index 0000000..b32dad1 +--- /dev/null ++++ b/support/xpthread_barrier_init.c +@@ -0,0 +1,27 @@ ++/* pthread_barrier_init with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_barrier_init (pthread_barrier_t *barrier, ++ pthread_barrierattr_t *attr, unsigned int count) ++{ ++ xpthread_check_return ("pthread_barrier_init", ++ pthread_barrier_init (barrier, attr, count)); ++} +diff --git a/support/xpthread_barrier_wait.c b/support/xpthread_barrier_wait.c +new file mode 100644 +index 0000000..7cee44d +--- /dev/null ++++ b/support/xpthread_barrier_wait.c +@@ -0,0 +1,28 @@ ++/* pthread_barrier_wait with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++int ++xpthread_barrier_wait (pthread_barrier_t *barrier) ++{ ++ int ret = pthread_barrier_wait (barrier); ++ if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD) ++ xpthread_check_return ("pthread_barrier_wait", ret); ++ return ret == PTHREAD_BARRIER_SERIAL_THREAD; ++} +diff --git a/support/xpthread_cancel.c b/support/xpthread_cancel.c +new file mode 100644 +index 0000000..3af16f9 +--- /dev/null ++++ b/support/xpthread_cancel.c +@@ -0,0 +1,25 @@ ++/* pthread_cancel with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_cancel (pthread_t thr) ++{ ++ xpthread_check_return ("pthread_cancel", pthread_cancel (thr)); ++} +diff --git a/support/xpthread_check_return.c b/support/xpthread_check_return.c +new file mode 100644 +index 0000000..3094d82 +--- /dev/null ++++ b/support/xpthread_check_return.c +@@ -0,0 +1,34 @@ ++/* Return value checking for pthread functions, exit variant. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++void ++xpthread_check_return (const char *function, int value) ++{ ++ if (value != 0) ++ { ++ errno = value; ++ FAIL_EXIT1 ("%s: %m", function); ++ } ++} +diff --git a/support/xpthread_cond_wait.c b/support/xpthread_cond_wait.c +new file mode 100644 +index 0000000..b0e9b2a +--- /dev/null ++++ b/support/xpthread_cond_wait.c +@@ -0,0 +1,26 @@ ++/* pthread_cond_wait with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex) ++{ ++ xpthread_check_return ++ ("pthread_cond_wait", pthread_cond_wait (cond, mutex)); ++} +diff --git a/support/xpthread_create.c b/support/xpthread_create.c +new file mode 100644 +index 0000000..98c63e5 +--- /dev/null ++++ b/support/xpthread_create.c +@@ -0,0 +1,29 @@ ++/* pthread_create with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++pthread_t ++xpthread_create (pthread_attr_t *attr, ++ void *(*thread_func) (void *), void *closure) ++{ ++ pthread_t thr; ++ xpthread_check_return ++ ("pthread_create", pthread_create (&thr, attr, thread_func, closure)); ++ return thr; ++} +diff --git a/support/xpthread_detach.c b/support/xpthread_detach.c +new file mode 100644 +index 0000000..2088af2 +--- /dev/null ++++ b/support/xpthread_detach.c +@@ -0,0 +1,25 @@ ++/* pthread_detach with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_detach (pthread_t thr) ++{ ++ xpthread_check_return ("pthread_detach", pthread_detach (thr)); ++} +diff --git a/support/xpthread_join.c b/support/xpthread_join.c +new file mode 100644 +index 0000000..f23bb9a +--- /dev/null ++++ b/support/xpthread_join.c +@@ -0,0 +1,27 @@ ++/* pthread_join with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void * ++xpthread_join (pthread_t thr) ++{ ++ void *result; ++ xpthread_check_return ("pthread_join", pthread_join (thr, &result)); ++ return result; ++} +diff --git a/support/xpthread_mutex_consistent.c b/support/xpthread_mutex_consistent.c +new file mode 100644 +index 0000000..52364be +--- /dev/null ++++ b/support/xpthread_mutex_consistent.c +@@ -0,0 +1,26 @@ ++/* pthread_mutex_consistent with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_mutex_consistent (pthread_mutex_t *mutex) ++{ ++ xpthread_check_return ("pthread_mutex_consistent", ++ pthread_mutex_consistent (mutex)); ++} +diff --git a/support/xpthread_mutex_destroy.c b/support/xpthread_mutex_destroy.c +new file mode 100644 +index 0000000..f11f8f0 +--- /dev/null ++++ b/support/xpthread_mutex_destroy.c +@@ -0,0 +1,26 @@ ++/* pthread_mutex_destroy with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_mutex_destroy (pthread_mutex_t *mutex) ++{ ++ xpthread_check_return ("pthread_mutex_destroy", ++ pthread_mutex_destroy (mutex)); ++} +diff --git a/support/xpthread_mutex_init.c b/support/xpthread_mutex_init.c +new file mode 100644 +index 0000000..2d16d1b +--- /dev/null ++++ b/support/xpthread_mutex_init.c +@@ -0,0 +1,26 @@ ++/* pthread_mutex_init with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_mutex_init (pthread_mutex_t *mutex, const pthread_mutexattr_t *attr) ++{ ++ xpthread_check_return ("pthread_mutex_init", ++ pthread_mutex_init (mutex, attr)); ++} +diff --git a/support/xpthread_mutex_lock.c b/support/xpthread_mutex_lock.c +new file mode 100644 +index 0000000..af727b4 +--- /dev/null ++++ b/support/xpthread_mutex_lock.c +@@ -0,0 +1,25 @@ ++/* pthread_mutex_lock with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_mutex_lock (pthread_mutex_t *mutex) ++{ ++ xpthread_check_return ("pthread_mutex_lock", pthread_mutex_lock (mutex)); ++} +diff --git a/support/xpthread_mutex_unlock.c b/support/xpthread_mutex_unlock.c +new file mode 100644 +index 0000000..161b41e +--- /dev/null ++++ b/support/xpthread_mutex_unlock.c +@@ -0,0 +1,25 @@ ++/* pthread_mutex_unlock with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_mutex_unlock (pthread_mutex_t *mutex) ++{ ++ xpthread_check_return ("pthread_mutex_unlock", pthread_mutex_unlock (mutex)); ++} +diff --git a/support/xpthread_mutexattr_destroy.c b/support/xpthread_mutexattr_destroy.c +new file mode 100644 +index 0000000..c699e32 +--- /dev/null ++++ b/support/xpthread_mutexattr_destroy.c +@@ -0,0 +1,26 @@ ++/* pthread_mutexattr_destroy with error checking. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_mutexattr_destroy (pthread_mutexattr_t *attr) ++{ ++ xpthread_check_return ("pthread_mutexattr_destroy", ++ pthread_mutexattr_destroy (attr)); ++} +diff --git a/support/xpthread_mutexattr_init.c b/support/xpthread_mutexattr_init.c +new file mode 100644 +index 0000000..fa93fab +--- /dev/null ++++ b/support/xpthread_mutexattr_init.c +@@ -0,0 +1,25 @@ ++/* pthread_mutexattr_init with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_mutexattr_init (pthread_mutexattr_t *attr) ++{ ++ xpthread_check_return ("pthread_mutexattr_init", pthread_mutexattr_init (attr)); ++} +diff --git a/support/xpthread_mutexattr_setprotocol.c b/support/xpthread_mutexattr_setprotocol.c +new file mode 100644 +index 0000000..353f75e +--- /dev/null ++++ b/support/xpthread_mutexattr_setprotocol.c +@@ -0,0 +1,26 @@ ++/* pthread_mutexattr_setprotocol with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_mutexattr_setprotocol (pthread_mutexattr_t *attr, int flag) ++{ ++ xpthread_check_return ("pthread_mutexattr_setprotocol", ++ pthread_mutexattr_setprotocol (attr, flag)); ++} +diff --git a/support/xpthread_mutexattr_setpshared.c b/support/xpthread_mutexattr_setpshared.c +new file mode 100644 +index 0000000..242da1a +--- /dev/null ++++ b/support/xpthread_mutexattr_setpshared.c +@@ -0,0 +1,26 @@ ++/* pthread_mutexattr_setpshared with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_mutexattr_setpshared (pthread_mutexattr_t *attr, int flag) ++{ ++ xpthread_check_return ("pthread_mutexattr_setpshared", ++ pthread_mutexattr_setpshared (attr, flag)); ++} +diff --git a/support/xpthread_mutexattr_setrobust.c b/support/xpthread_mutexattr_setrobust.c +new file mode 100644 +index 0000000..d7d6fa8 +--- /dev/null ++++ b/support/xpthread_mutexattr_setrobust.c +@@ -0,0 +1,26 @@ ++/* pthread_mutexattr_setrobust with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_mutexattr_setrobust (pthread_mutexattr_t *attr, int flag) ++{ ++ xpthread_check_return ("pthread_mutexattr_setrobust", ++ pthread_mutexattr_setrobust (attr, flag)); ++} +diff --git a/support/xpthread_mutexattr_settype.c b/support/xpthread_mutexattr_settype.c +new file mode 100644 +index 0000000..cf22170 +--- /dev/null ++++ b/support/xpthread_mutexattr_settype.c +@@ -0,0 +1,26 @@ ++/* pthread_mutexattr_settype with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_mutexattr_settype (pthread_mutexattr_t *attr, int flag) ++{ ++ xpthread_check_return ("pthread_mutexattr_settype", ++ pthread_mutexattr_settype (attr, flag)); ++} +diff --git a/support/xpthread_once.c b/support/xpthread_once.c +new file mode 100644 +index 0000000..70d58db +--- /dev/null ++++ b/support/xpthread_once.c +@@ -0,0 +1,25 @@ ++/* pthread_once with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_once (pthread_once_t *guard, void (*func) (void)) ++{ ++ xpthread_check_return ("pthread_once", pthread_once (guard, func)); ++} +diff --git a/support/xpthread_sigmask.c b/support/xpthread_sigmask.c +new file mode 100644 +index 0000000..0ba9ca0 +--- /dev/null ++++ b/support/xpthread_sigmask.c +@@ -0,0 +1,34 @@ ++/* pthread_sigmask with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#include ++ ++void ++xpthread_sigmask (int how, const sigset_t *set, sigset_t *oldset) ++{ ++ if (pthread_sigmask (how, set, oldset) != 0) ++ { ++ write_message ("error: pthread_setmask failed\n"); ++ /* Do not use exit because pthread_sigmask can be called from a ++ signal handler. */ ++ _exit (1); ++ } ++} +diff --git a/support/xpthread_spin_lock.c b/support/xpthread_spin_lock.c +new file mode 100644 +index 0000000..6975215 +--- /dev/null ++++ b/support/xpthread_spin_lock.c +@@ -0,0 +1,25 @@ ++/* pthread_spin_lock with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_spin_lock (pthread_spinlock_t *lock) ++{ ++ xpthread_check_return ("pthread_spin_lock", pthread_spin_lock (lock)); ++} +diff --git a/support/xpthread_spin_unlock.c b/support/xpthread_spin_unlock.c +new file mode 100644 +index 0000000..4f19a44 +--- /dev/null ++++ b/support/xpthread_spin_unlock.c +@@ -0,0 +1,25 @@ ++/* pthread_spin_unlock with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++xpthread_spin_unlock (pthread_spinlock_t *lock) ++{ ++ xpthread_check_return ("pthread_spin_unlock", pthread_spin_unlock (lock)); ++} +diff --git a/support/xrealloc.c b/support/xrealloc.c +new file mode 100644 +index 0000000..00c3138 +--- /dev/null ++++ b/support/xrealloc.c +@@ -0,0 +1,32 @@ ++/* Error-checking wrapper for realloc. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++void * ++xrealloc (void *p, size_t n) ++{ ++ void *result = realloc (p, n); ++ if (result == NULL && (n > 0 || p == NULL)) ++ oom_error ("realloc", n); ++ return result; ++} +diff --git a/support/xrecvfrom.c b/support/xrecvfrom.c +new file mode 100644 +index 0000000..17809c4 +--- /dev/null ++++ b/support/xrecvfrom.c +@@ -0,0 +1,33 @@ ++/* recvfrom with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++size_t ++xrecvfrom (int fd, void *buf, size_t buflen, int flags, ++ struct sockaddr *sa, socklen_t *salen) ++{ ++ ssize_t ret = recvfrom (fd, buf, buflen, flags, sa, salen); ++ if (ret < 0) ++ FAIL_EXIT1 ("error: recvfrom (%d), %zu bytes buffer: %m", fd, buflen); ++ return ret; ++} +diff --git a/support/xsendto.c b/support/xsendto.c +new file mode 100644 +index 0000000..20bddf6 +--- /dev/null ++++ b/support/xsendto.c +@@ -0,0 +1,35 @@ ++/* sendto with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++void ++xsendto (int fd, const void *buf, size_t buflen, int flags, ++ const struct sockaddr *sa, socklen_t salen) ++{ ++ ssize_t ret = sendto (fd, buf, buflen, flags, sa, salen); ++ if (ret < 0) ++ FAIL_EXIT1 ("sendto (%d), %zu bytes, family %d: %m", ++ fd, buflen, sa->sa_family); ++ if (ret != buflen) ++ FAIL_EXIT1 ("sendto (%d) sent %zd bytes instead of %zu", fd, ret, buflen); ++} +diff --git a/support/xsetsockopt.c b/support/xsetsockopt.c +new file mode 100644 +index 0000000..9931882 +--- /dev/null ++++ b/support/xsetsockopt.c +@@ -0,0 +1,31 @@ ++/* setsockopt with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++void ++xsetsockopt (int fd, int level, int name, const void *val, socklen_t vallen) ++{ ++ if (setsockopt (fd, level, name, val, vallen) != 0) ++ FAIL_EXIT1 ("setsockopt (%d, %d, %d), %zu bytes: %m", ++ fd, level, name, (size_t) vallen); ++} +diff --git a/support/xsignal.h b/support/xsignal.h +new file mode 100644 +index 0000000..3dc0d9d +--- /dev/null ++++ b/support/xsignal.h +@@ -0,0 +1,34 @@ ++/* Support functionality for using signals. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_SIGNAL_H ++#define SUPPORT_SIGNAL_H ++ ++#include ++#include ++ ++__BEGIN_DECLS ++ ++/* The following functions call the corresponding libpthread functions ++ and terminate the process on error. */ ++ ++void xpthread_sigmask (int how, const sigset_t *set, sigset_t *oldset); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_SIGNAL_H */ +diff --git a/support/xsocket.c b/support/xsocket.c +new file mode 100644 +index 0000000..c1deaee +--- /dev/null ++++ b/support/xsocket.c +@@ -0,0 +1,32 @@ ++/* socket with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++int ++xsocket (int domain, int type, int protocol) ++{ ++ int fd = socket (domain, type, protocol); ++ if (fd < 0) ++ FAIL_EXIT1 ("socket (%d, %d, %d): %m\n", domain, type, protocol); ++ return fd; ++} +diff --git a/support/xsocket.h b/support/xsocket.h +new file mode 100644 +index 0000000..0dbf13a +--- /dev/null ++++ b/support/xsocket.h +@@ -0,0 +1,38 @@ ++/* Error-checking wrappers for socket functions. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_XSOCKET_H ++#define SUPPORT_XSOCKET_H ++ ++#include ++#include ++#include ++ ++int xsocket (int, int, int); ++void xsetsockopt (int, int, int, const void *, socklen_t); ++void xgetsockname (int, struct sockaddr *, socklen_t *); ++void xconnect (int, const struct sockaddr *, socklen_t); ++void xbind (int, const struct sockaddr *, socklen_t); ++void xlisten (int, int); ++int xaccept (int, struct sockaddr *, socklen_t *); ++void xsendto (int, const void *, size_t, int, ++ const struct sockaddr *, socklen_t); ++size_t xrecvfrom (int, void *, size_t, int, struct sockaddr *, socklen_t *); ++int xpoll (struct pollfd *, nfds_t, int); ++ ++#endif /* SUPPORT_XSOCKET_H */ +diff --git a/support/xstdio.h b/support/xstdio.h +new file mode 100644 +index 0000000..bcc2e86 +--- /dev/null ++++ b/support/xstdio.h +@@ -0,0 +1,32 @@ ++/* Error-checking wrappers for stdio functions. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_XSTDIO_H ++#define SUPPORT_XSTDIO_H ++ ++#include ++#include ++ ++__BEGIN_DECLS ++ ++FILE *xfopen (const char *path, const char *mode); ++void xfclose (FILE *); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_XSTDIO_H */ +diff --git a/support/xstrdup.c b/support/xstrdup.c +new file mode 100644 +index 0000000..d6a8c04 +--- /dev/null ++++ b/support/xstrdup.c +@@ -0,0 +1,30 @@ ++/* strdup with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++ ++char * ++xstrdup (const char *s) ++{ ++ char *p = strdup (s); ++ if (p == NULL) ++ oom_error ("strdup", strlen (s)); ++ return p; ++} +diff --git a/support/xthread.h b/support/xthread.h +new file mode 100644 +index 0000000..6dd7e70 +--- /dev/null ++++ b/support/xthread.h +@@ -0,0 +1,77 @@ ++/* Support functionality for using threads. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SUPPORT_THREAD_H ++#define SUPPORT_THREAD_H ++ ++#include ++#include ++ ++__BEGIN_DECLS ++ ++/* Terminate the process (with exit status 0) after SECONDS have ++ elapsed, from a helper thread. The process is terminated with the ++ exit function, so atexit handlers are executed. */ ++void delayed_exit (int seconds); ++ ++/* Terminate the process (with exit status 1) if VALUE is not zero. ++ In that case, print a failure message to standard output mentioning ++ FUNCTION. The process is terminated with the exit function, so ++ atexit handlers are executed. */ ++void xpthread_check_return (const char *function, int value); ++ ++/* The following functions call the corresponding libpthread functions ++ and terminate the process on error. */ ++ ++void xpthread_barrier_init (pthread_barrier_t *barrier, ++ pthread_barrierattr_t *attr, unsigned int count); ++void xpthread_barrier_destroy (pthread_barrier_t *barrier); ++void xpthread_mutexattr_destroy (pthread_mutexattr_t *); ++void xpthread_mutexattr_init (pthread_mutexattr_t *); ++void xpthread_mutexattr_setprotocol (pthread_mutexattr_t *, int); ++void xpthread_mutexattr_setpshared (pthread_mutexattr_t *, int); ++void xpthread_mutexattr_setrobust (pthread_mutexattr_t *, int); ++void xpthread_mutexattr_settype (pthread_mutexattr_t *, int); ++void xpthread_mutex_init (pthread_mutex_t *, const pthread_mutexattr_t *); ++void xpthread_mutex_destroy (pthread_mutex_t *); ++void xpthread_mutex_lock (pthread_mutex_t *mutex); ++void xpthread_mutex_unlock (pthread_mutex_t *mutex); ++void xpthread_mutex_consistent (pthread_mutex_t *); ++void xpthread_spin_lock (pthread_spinlock_t *lock); ++void xpthread_spin_unlock (pthread_spinlock_t *lock); ++void xpthread_cond_wait (pthread_cond_t * cond, pthread_mutex_t * mutex); ++pthread_t xpthread_create (pthread_attr_t *attr, ++ void *(*thread_func) (void *), void *closure); ++void xpthread_detach (pthread_t thr); ++void xpthread_cancel (pthread_t thr); ++void *xpthread_join (pthread_t thr); ++void xpthread_once (pthread_once_t *guard, void (*func) (void)); ++void xpthread_attr_destroy (pthread_attr_t *attr); ++void xpthread_attr_init (pthread_attr_t *attr); ++void xpthread_attr_setdetachstate (pthread_attr_t *attr, ++ int detachstate); ++void xpthread_attr_setstacksize (pthread_attr_t *attr, ++ size_t stacksize); ++ ++/* This function returns non-zero if pthread_barrier_wait returned ++ PTHREAD_BARRIER_SERIAL_THREAD. */ ++int xpthread_barrier_wait (pthread_barrier_t *barrier); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_THREAD_H */ +diff --git a/support/xunistd.h b/support/xunistd.h +new file mode 100644 +index 0000000..a83b1f4 +--- /dev/null ++++ b/support/xunistd.h +@@ -0,0 +1,43 @@ ++/* POSIX-specific extra functions. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* These wrapper functions use POSIX types and therefore cannot be ++ declared in . */ ++ ++#ifndef SUPPORT_XUNISTD_H ++#define SUPPORT_XUNISTD_H ++ ++#include ++#include ++ ++__BEGIN_DECLS ++ ++pid_t xfork (void); ++pid_t xwaitpid (pid_t, int *status, int flags); ++ ++/* Write the buffer. Retry on short writes. */ ++void xwrite (int, const void *, size_t); ++ ++/* Invoke mmap with a zero file offset. */ ++void *xmmap (void *addr, size_t length, int prot, int flags, int fd); ++ ++void xmunmap (void *addr, size_t length); ++ ++__END_DECLS ++ ++#endif /* SUPPORT_XUNISTD_H */ +diff --git a/support/xwaitpid.c b/support/xwaitpid.c +new file mode 100644 +index 0000000..204795e +--- /dev/null ++++ b/support/xwaitpid.c +@@ -0,0 +1,33 @@ ++/* waitpid with error checking. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++int ++xwaitpid (int pid, int *status, int flags) ++{ ++ pid_t result = waitpid (pid, status, flags); ++ if (result < 0) ++ FAIL_EXIT1 ("waitpid: %m\n"); ++ return result; ++} +diff --git a/support/xwrite.c b/support/xwrite.c +new file mode 100644 +index 0000000..134e8ee +--- /dev/null ++++ b/support/xwrite.c +@@ -0,0 +1,39 @@ ++/* write with error checking and retries. ++ Copyright (C) 2016-2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++ ++void ++xwrite (int fd, const void *buffer, size_t length) ++{ ++ const char *p = buffer; ++ const char *end = p + length; ++ while (p < end) ++ { ++ ssize_t ret = write (fd, p, end - p); ++ if (ret < 0) ++ FAIL_EXIT1 ("write of %zu bytes failed after %td: %m", ++ length, p - (const char *) buffer); ++ if (ret == 0) ++ FAIL_EXIT1 ("write return 0 after writing %td bytes of %zu", ++ p - (const char *) buffer, length); ++ p += ret; ++ } ++} diff --git a/SOURCES/glibc-rh1418978-2-1.patch b/SOURCES/glibc-rh1418978-2-1.patch new file mode 100644 index 0000000..5ed2eba --- /dev/null +++ b/SOURCES/glibc-rh1418978-2-1.patch @@ -0,0 +1,18 @@ +commit ace4acc8ace692f64051594afe47efb1135b3c29 +Author: Siddhesh Poyarekar +Date: Fri Mar 1 20:45:17 2013 +0530 + + Fix build warning + +Index: b/nptl/tst-oddstacklimit.c +=================================================================== +--- a/nptl/tst-oddstacklimit.c ++++ b/nptl/tst-oddstacklimit.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + /* This sets the stack resource limit to 1023kb, which is not a multiple + of the page size since every architecture's page size is > 1k. */ diff --git a/SOURCES/glibc-rh1418978-2-2.patch b/SOURCES/glibc-rh1418978-2-2.patch new file mode 100644 index 0000000..720cc65 --- /dev/null +++ b/SOURCES/glibc-rh1418978-2-2.patch @@ -0,0 +1,18 @@ +commit aba8ef95b56313269512f5ec449e0d9c74d2a1e2 +Author: Roland McGrath +Date: Mon Jun 23 14:07:59 2014 -0700 + + Add missing #include in get-rounding-mode.h + +Index: b/sysdeps/generic/get-rounding-mode.h +=================================================================== +--- a/sysdeps/generic/get-rounding-mode.h ++++ b/sysdeps/generic/get-rounding-mode.h +@@ -20,6 +20,7 @@ + #define _GET_ROUNDING_MODE_H 1 + + #include ++#include + + /* Define values for FE_* modes not defined for this architecture. */ + #ifdef FE_DOWNWARD diff --git a/SOURCES/glibc-rh1418978-2-3.patch b/SOURCES/glibc-rh1418978-2-3.patch new file mode 100644 index 0000000..10c2d56 --- /dev/null +++ b/SOURCES/glibc-rh1418978-2-3.patch @@ -0,0 +1,20 @@ +commit d3c827e7c8208afeaed880cf8cf2515c86d10f17 +Author: Andreas Krebbel +Date: Fri Sep 19 11:26:31 2014 +0200 + + stdlib/longlong.h: Add __udiv_w_sdiv prototype. + +Index: b/stdlib/longlong.h +=================================================================== +--- a/stdlib/longlong.h ++++ b/stdlib/longlong.h +@@ -1642,7 +1642,8 @@ extern UHItype __stormy16_count_leading_ + #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) + #define udiv_qrnnd(q, r, nh, nl, d) \ + do { \ +- USItype __r; \ ++ extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \ ++ UWtype __r; \ + (q) = __udiv_w_sdiv (&__r, nh, nl, d); \ + (r) = __r; \ + } while (0) diff --git a/SOURCES/glibc-rh1418978-2-4.patch b/SOURCES/glibc-rh1418978-2-4.patch new file mode 100644 index 0000000..882ea30 --- /dev/null +++ b/SOURCES/glibc-rh1418978-2-4.patch @@ -0,0 +1,18 @@ +commit a849e800352ac5068dc6f1191c86ff62ba014c61 +Author: Andreas Schwab +Date: Thu May 1 22:00:34 2014 +0200 + + Fix implicit declaration + +Index: b/nscd/nscd-client.h +=================================================================== +--- a/nscd/nscd-client.h ++++ b/nscd/nscd-client.h +@@ -24,6 +24,7 @@ + + #include + #include ++#include + #include + #include + #include diff --git a/SOURCES/glibc-rh1418978-2-5.patch b/SOURCES/glibc-rh1418978-2-5.patch new file mode 100644 index 0000000..dd11eb8 --- /dev/null +++ b/SOURCES/glibc-rh1418978-2-5.patch @@ -0,0 +1,56 @@ +commit ffaa74cf68a370e232279a9a9b0a02ade287cc99 +Author: Siddhesh Poyarekar +Date: Mon Feb 18 18:17:05 2013 +0530 + + Fix build warnings in some test cases + + Include stdlib.h to get declaration of exit(3) + +Index: b/misc/tst-pselect.c +=================================================================== +--- a/misc/tst-pselect.c ++++ b/misc/tst-pselect.c +@@ -4,6 +4,7 @@ + #include + #include + #include ++#include + + + static volatile int handler_called; +Index: b/nptl/sysdeps/pthread/tst-timer.c +=================================================================== +--- a/nptl/sysdeps/pthread/tst-timer.c ++++ b/nptl/sysdeps/pthread/tst-timer.c +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + + static void +Index: b/nptl/tst-barrier4.c +=================================================================== +--- a/nptl/tst-barrier4.c ++++ b/nptl/tst-barrier4.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + + static pthread_barrier_t b1; +Index: b/nptl/tst-robust7.c +=================================================================== +--- a/nptl/tst-robust7.c ++++ b/nptl/tst-robust7.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + + static pthread_barrier_t b; diff --git a/SOURCES/glibc-rh1418978-2-6.patch b/SOURCES/glibc-rh1418978-2-6.patch new file mode 100644 index 0000000..34095c3 --- /dev/null +++ b/SOURCES/glibc-rh1418978-2-6.patch @@ -0,0 +1,24 @@ +commit e6fb95871cd3557e5882a6f969c11bc6a3cbe781 +Author: Florian Weimer +Date: Mon Sep 8 13:49:36 2014 +0200 + + Turn on -Werror=implicit-function-declaration + + GCC 4.4, the minimum compiler version, supports this option. Unlike + other warnings, -Wimplicit-function-declaration warnings should be + independent of compiler versions, so this change should not cause + compiler-specific build failures. + +Index: b/Makeconfig +=================================================================== +--- a/Makeconfig ++++ b/Makeconfig +@@ -643,7 +643,7 @@ ifeq ($(all-warnings),yes) + else + +gccwarn := -Wall -Wwrite-strings -Winline + endif +-+gccwarn-c = -Wstrict-prototypes +++gccwarn-c = -Wstrict-prototypes -Werror=implicit-function-declaration + + # We do not depend on the address of constants in different files to be + # actually different, so allow the compiler to merge them all. diff --git a/SOURCES/glibc-rh1418978-3-1.patch b/SOURCES/glibc-rh1418978-3-1.patch new file mode 100644 index 0000000..9997ad3 --- /dev/null +++ b/SOURCES/glibc-rh1418978-3-1.patch @@ -0,0 +1,127 @@ +commit c5c13355132e73578bbc0c612ddff964e6199747 +Author: Will Newton +Date: Fri Apr 11 15:21:23 2014 +0100 + + test-skeleton.c: Use stdout for error messages + + At the moment the test skeleton uses a mixture of stdout and + stderr for error message output. Using stdout for all test output + keeps all output correctly ordered and properly redirected to the + output file. The suggestion to use stdout is also made on the wiki: + + https://sourceware.org/glibc/wiki/Testing/Testsuite#Writing_a_test_case + + ChangeLog: + + 2014-06-23 Will Newton + + * test-skeleton.c (signal_handler): Use printf and %m + rather than perror. Use printf rather than fprintf to + stderr. Use puts rather than fputs to stderr. + (main): Likewise. + +Index: b/test-skeleton.c +=================================================================== +--- a/test-skeleton.c ++++ b/test-skeleton.c +@@ -188,7 +188,7 @@ signal_handler (int sig __attribute__ (( + } + if (killed != 0 && killed != pid) + { +- perror ("Failed to kill test process"); ++ printf ("Failed to kill test process: %m\n"); + exit (1); + } + +@@ -209,16 +209,16 @@ signal_handler (int sig __attribute__ (( + #endif + + if (killed == 0 || (WIFSIGNALED (status) && WTERMSIG (status) == SIGKILL)) +- fputs ("Timed out: killed the child process\n", stderr); ++ puts ("Timed out: killed the child process"); + else if (WIFSTOPPED (status)) +- fprintf (stderr, "Timed out: the child process was %s\n", +- strsignal (WSTOPSIG (status))); ++ printf ("Timed out: the child process was %s\n", ++ strsignal (WSTOPSIG (status))); + else if (WIFSIGNALED (status)) +- fprintf (stderr, "Timed out: the child process got signal %s\n", +- strsignal (WTERMSIG (status))); ++ printf ("Timed out: the child process got signal %s\n", ++ strsignal (WTERMSIG (status))); + else +- fprintf (stderr, "Timed out: killed the child process but it exited %d\n", +- WEXITSTATUS (status)); ++ printf ("Timed out: killed the child process but it exited %d\n", ++ WEXITSTATUS (status)); + + /* Exit with an error. */ + exit (1); +@@ -308,7 +308,7 @@ main (int argc, char *argv[]) + + if (chdir (test_dir) < 0) + { +- perror ("chdir"); ++ printf ("chdir: %m\n"); + exit (1); + } + } +@@ -367,10 +367,10 @@ main (int argc, char *argv[]) + data_limit.rlim_cur = MIN ((rlim_t) TEST_DATA_LIMIT, + data_limit.rlim_max); + if (setrlimit (RLIMIT_DATA, &data_limit) < 0) +- perror ("setrlimit: RLIMIT_DATA"); ++ printf ("setrlimit: RLIMIT_DATA: %m\n"); + } + else +- perror ("getrlimit: RLIMIT_DATA"); ++ printf ("getrlimit: RLIMIT_DATA: %m\n"); + #endif + + /* We put the test process in its own pgrp so that if it bogusly +@@ -382,7 +382,7 @@ main (int argc, char *argv[]) + } + else if (pid < 0) + { +- perror ("Cannot fork test program"); ++ printf ("Cannot fork test program: %m\n"); + exit (1); + } + +@@ -420,18 +420,16 @@ main (int argc, char *argv[]) + if (EXPECTED_SIGNAL != 0) + { + if (WTERMSIG (status) == 0) +- fprintf (stderr, +- "Expected signal '%s' from child, got none\n", +- strsignal (EXPECTED_SIGNAL)); ++ printf ("Expected signal '%s' from child, got none\n", ++ strsignal (EXPECTED_SIGNAL)); + else +- fprintf (stderr, +- "Incorrect signal from child: got `%s', need `%s'\n", +- strsignal (WTERMSIG (status)), +- strsignal (EXPECTED_SIGNAL)); ++ printf ("Incorrect signal from child: got `%s', need `%s'\n", ++ strsignal (WTERMSIG (status)), ++ strsignal (EXPECTED_SIGNAL)); + } + else +- fprintf (stderr, "Didn't expect signal from child: got `%s'\n", +- strsignal (WTERMSIG (status))); ++ printf ("Didn't expect signal from child: got `%s'\n", ++ strsignal (WTERMSIG (status))); + exit (1); + } + +@@ -441,8 +439,8 @@ main (int argc, char *argv[]) + #else + if (WEXITSTATUS (status) != EXPECTED_STATUS) + { +- fprintf (stderr, "Expected status %d, got %d\n", +- EXPECTED_STATUS, WEXITSTATUS (status)); ++ printf ("Expected status %d, got %d\n", ++ EXPECTED_STATUS, WEXITSTATUS (status)); + exit (1); + } + diff --git a/SOURCES/glibc-rh1418978-3-2.patch b/SOURCES/glibc-rh1418978-3-2.patch new file mode 100644 index 0000000..f85c01a --- /dev/null +++ b/SOURCES/glibc-rh1418978-3-2.patch @@ -0,0 +1,41 @@ +commit 900056024b75eae8b550d7fee1dec9e71f28344e +Author: Florian Weimer +Date: Mon Mar 7 13:48:47 2016 +0100 + + test-skeleton.c: Do not set RLIMIT_DATA [BZ #19648] + + With older kernels, it is mostly ineffective because it causes malloc + to switch from sbrk to mmap (potentially invalidating malloc testing + compared to what real appliations do). With newer kernels which + have switched to enforcing RLIMIT_DATA for mmap as well, some test + cases will fail in an unintended fashion because the limit which was + set previously does not include room for all mmap mappings. + +Index: b/test-skeleton.c +=================================================================== +--- a/test-skeleton.c ++++ b/test-skeleton.c +@@ -356,23 +356,6 @@ main (int argc, char *argv[]) + setrlimit (RLIMIT_CORE, &core_limit); + #endif + +-#ifdef RLIMIT_DATA +- /* Try to avoid eating all memory if a test leaks. */ +- struct rlimit data_limit; +- if (getrlimit (RLIMIT_DATA, &data_limit) == 0) +- { +- if (TEST_DATA_LIMIT == RLIM_INFINITY) +- data_limit.rlim_cur = data_limit.rlim_max; +- else if (data_limit.rlim_cur > (rlim_t) TEST_DATA_LIMIT) +- data_limit.rlim_cur = MIN ((rlim_t) TEST_DATA_LIMIT, +- data_limit.rlim_max); +- if (setrlimit (RLIMIT_DATA, &data_limit) < 0) +- printf ("setrlimit: RLIMIT_DATA: %m\n"); +- } +- else +- printf ("getrlimit: RLIMIT_DATA: %m\n"); +-#endif +- + /* We put the test process in its own pgrp so that if it bogusly + generates any job control signals, they won't hit the whole build. */ + setpgid (0, 0); diff --git a/SOURCES/glibc-rh1418997.patch b/SOURCES/glibc-rh1418997.patch new file mode 100644 index 0000000..973029d --- /dev/null +++ b/SOURCES/glibc-rh1418997.patch @@ -0,0 +1,51 @@ +commit 78b7adbaea643f2f213bb113e3ec933416a769a8 +Author: Joseph Myers +Date: Tue Oct 25 15:54:16 2016 +0000 + + Fix cmpli usage in power6 memset. + + Building glibc for powerpc64 with recent (2.27.51.20161012) binutils, + with multi-arch enabled, I get the error: + + ../sysdeps/powerpc/powerpc64/power6/memset.S: Assembler messages: + ../sysdeps/powerpc/powerpc64/power6/memset.S:254: Error: operand out of range (5 is not between 0 and 1) + ../sysdeps/powerpc/powerpc64/power6/memset.S:254: Error: operand out of range (128 is not between 0 and 31) + ../sysdeps/powerpc/powerpc64/power6/memset.S:254: Error: missing operand + + Indeed, cmpli is documented as a four-operand instruction, and looking + at nearby code it seems likely cmpldi was intended. This patch fixes + this powerpc64 code accordingly, and makes a corresponding change to + the powerpc32 code. + + Tested for powerpc, powerpc64 and powerpc64le by Tulio Magno Quites + Machado Filho + + * sysdeps/powerpc/powerpc32/power6/memset.S (memset): Use cmplwi + instead of cmpli. + * sysdeps/powerpc/powerpc64/power6/memset.S (memset): Use cmpldi + instead of cmpli. + +diff -rup a/sysdeps/powerpc/powerpc32/power6/memset.S b/sysdeps/powerpc/powerpc32/power6/memset.S +--- a/sysdeps/powerpc/powerpc32/power6/memset.S 2017-03-06 13:52:27.000000000 -0500 ++++ b/sysdeps/powerpc/powerpc32/power6/memset.S 2017-03-06 13:54:49.705201476 -0500 +@@ -396,7 +396,7 @@ L(cacheAlignedx): + /* A simple loop for the longer (>640 bytes) lengths. This form limits + the branch miss-predicted to exactly 1 at loop exit.*/ + L(cacheAligned512): +- cmpli cr1,rLEN,128 ++ cmplwi cr1,rLEN,128 + blt cr1,L(cacheAligned1) + dcbz 0,rMEMP + addi rLEN,rLEN,-128 +diff -rup a/sysdeps/powerpc/powerpc64/power6/memset.S b/sysdeps/powerpc/powerpc64/power6/memset.S +--- a/sysdeps/powerpc/powerpc64/power6/memset.S 2017-03-06 13:52:22.000000000 -0500 ++++ b/sysdeps/powerpc/powerpc64/power6/memset.S 2017-03-06 13:54:35.824216755 -0500 +@@ -269,7 +269,7 @@ L(cacheAlignedx): + /* A simple loop for the longer (>640 bytes) lengths. This form limits + the branch miss-predicted to exactly 1 at loop exit.*/ + L(cacheAligned512): +- cmpli cr1,rLEN,128 ++ cmpldi cr1,rLEN,128 + blt cr1,L(cacheAligned1) + dcbz 0,rMEMP + addi rLEN,rLEN,-128 diff --git a/SOURCES/glibc-rh1421155.patch b/SOURCES/glibc-rh1421155.patch new file mode 100644 index 0000000..08e0b1f --- /dev/null +++ b/SOURCES/glibc-rh1421155.patch @@ -0,0 +1,2278 @@ +Full backports of the following patches: + +commit b97eb2bdb1ed72982a7821c3078be591051cef59 +Author: H.J. Lu +Date: Mon Mar 16 14:58:43 2015 -0700 + + Preserve bound registers in _dl_runtime_resolve + + We need to add a BND prefix before indirect branch at the end of + _dl_runtime_resolve to preserve bound registers. + +commit ddd85a65b6e3d6ec1e756c1f78559f99a2c943ca +Author: H.J. Lu +Date: Tue Jul 7 05:23:24 2015 -0700 + + Add and use sysdeps/i386/link-defines.sym + + Define macros for fields in La_i86_regs and La_i86_retval and use them + in dl-trampoline.S, instead of hardcoded values. + +commit 14c5cbabc2d11004ab223ae5eae761ddf83ef99e +Author: Igor Zamyatin +Date: Thu Jul 9 06:50:12 2015 -0700 + + Preserve bound registers for pointer pass/return + + We need to save/restore bound registers and add a BND prefix before + branches in _dl_runtime_profile so that bound registers for pointer + pass and return are preserved when LD_AUDIT is used. + + +commit f3dcae82d54e5097e18e1d6ef4ff55c2ea4e621e +Author: H.J. Lu +Date: Tue Aug 25 04:33:54 2015 -0700 + + Save and restore vector registers in x86-64 ld.so + + This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve + and _dl_runtime_profile, which save and restore the first 8 vector + registers used for parameter passing. elf_machine_runtime_setup + selects the proper _dl_runtime_resolve or _dl_runtime_profile based + on _dl_x86_cpu_features. It avoids race condition caused by + FOREIGN_CALL macros, which are only used for x86-64. + + Performance impact of saving and restoring 8 vector registers are + negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when + ld.so is optimized with SSE2. + +commit fb0f7a6755c1bfaec38f490fbfcaa39a66ee3604 +Author: H.J. Lu +Date: Tue Sep 6 08:50:55 2016 -0700 + + X86-64: Add _dl_runtime_resolve_avx[512]_{opt|slow} [BZ #20508] + + There is transition penalty when SSE instructions are mixed with 256-bit + AVX or 512-bit AVX512 load instructions. Since _dl_runtime_resolve_avx + and _dl_runtime_profile_avx512 save/restore 256-bit YMM/512-bit ZMM + registers, there is transition penalty when SSE instructions are used + with lazy binding on AVX and AVX512 processors. + + To avoid SSE transition penalty, if only the lower 128 bits of the first + 8 vector registers are non-zero, we can preserve %xmm0 - %xmm7 registers + with the zero upper bits. + + For AVX and AVX512 processors which support XGETBV with ECX == 1, we can + use XGETBV with ECX == 1 to check if the upper 128 bits of YMM registers + or the upper 256 bits of ZMM registers are zero. We can restore only the + non-zero portion of vector registers with AVX/AVX512 load instructions + which will zero-extend upper bits of vector registers. + + This patch adds _dl_runtime_resolve_sse_vex which saves and restores + XMM registers with 128-bit AVX store/load instructions. It is used to + preserve YMM/ZMM registers when only the lower 128 bits are non-zero. + _dl_runtime_resolve_avx_opt and _dl_runtime_resolve_avx512_opt are added + and used on AVX/AVX512 processors supporting XGETBV with ECX == 1 so + that we store and load only the non-zero portion of vector registers. + This avoids SSE transition penalty caused by _dl_runtime_resolve_avx and + _dl_runtime_profile_avx512 when only the lower 128 bits of vector + registers are used. + + _dl_runtime_resolve_avx_slow is added and used for AVX processors which + don't support XGETBV with ECX == 1. Since there is no SSE transition + penalty on AVX512 processors which don't support XGETBV with ECX == 1, + _dl_runtime_resolve_avx512_slow isn't provided. + +commit 3403a17fea8ccef7dc5f99553a13231acf838744 +Author: H.J. Lu +Date: Thu Feb 9 12:19:44 2017 -0800 + + x86-64: Verify that _dl_runtime_resolve preserves vector registers + + On x86-64, _dl_runtime_resolve must preserve the first 8 vector + registers. Add 3 _dl_runtime_resolve tests to verify that SSE, + AVX and AVX512 registers are preserved. + +commit c15f8eb50cea7ad1a4ccece6e0982bf426d52c00 +Author: H.J. Lu +Date: Tue Mar 21 10:59:31 2017 -0700 + + x86-64: Improve branch predication in _dl_runtime_resolve_avx512_opt [BZ #21258] + + On Skylake server, _dl_runtime_resolve_avx512_opt is used to preserve + the first 8 vector registers. The code layout is + + if only %xmm0 - %xmm7 registers are used + preserve %xmm0 - %xmm7 registers + if only %ymm0 - %ymm7 registers are used + preserve %ymm0 - %ymm7 registers + preserve %zmm0 - %zmm7 registers + + Branch predication always executes the fallthrough code path to preserve + %zmm0 - %zmm7 registers speculatively, even though only %xmm0 - %xmm7 + registers are used. This leads to lower CPU frequency on Skylake + server. This patch changes the fallthrough code path to preserve + %xmm0 - %xmm7 registers instead: + + if whole %zmm0 - %zmm7 registers are used + preserve %zmm0 - %zmm7 registers + if only %ymm0 - %ymm7 registers are used + preserve %ymm0 - %ymm7 registers + preserve %xmm0 - %xmm7 registers + + Tested on Skylake server. + + [BZ #21258] + * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_resolve_opt): + Define only if _dl_runtime_resolve is defined to + _dl_runtime_resolve_sse_vex. + * sysdeps/x86_64/dl-trampoline.h (_dl_runtime_resolve_opt): + Fallthrough to _dl_runtime_resolve_sse_vex. + +Index: glibc-2.17-c758a686/nptl/sysdeps/x86_64/tcb-offsets.sym +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/x86_64/tcb-offsets.sym ++++ glibc-2.17-c758a686/nptl/sysdeps/x86_64/tcb-offsets.sym +@@ -15,7 +15,6 @@ VGETCPU_CACHE_OFFSET offsetof (tcbhead_t + #ifndef __ASSUME_PRIVATE_FUTEX + PRIVATE_FUTEX offsetof (tcbhead_t, private_futex) + #endif +-RTLD_SAVESPACE_SSE offsetof (tcbhead_t, rtld_savespace_sse) + + -- Not strictly offsets, but these values are also used in the TCB. + TCB_CANCELSTATE_BITMASK CANCELSTATE_BITMASK +Index: glibc-2.17-c758a686/nptl/sysdeps/x86_64/tls.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/x86_64/tls.h ++++ glibc-2.17-c758a686/nptl/sysdeps/x86_64/tls.h +@@ -67,12 +67,13 @@ typedef struct + # else + int __unused1; + # endif +- int rtld_must_xmm_save; ++ int __glibc_unused1; + /* Reservation of some values for the TM ABI. */ + void *__private_tm[5]; + long int __unused2; +- /* Have space for the post-AVX register size. */ +- __128bits rtld_savespace_sse[8][4] __attribute__ ((aligned (32))); ++ /* Must be kept even if it is no longer used by glibc since programs, ++ like AddressSanitizer, depend on the size of tcbhead_t. */ ++ __128bits __glibc_unused2[8][4] __attribute__ ((aligned (32))); + + void *__padding[8]; + } tcbhead_t; +@@ -380,41 +381,6 @@ typedef struct + # define THREAD_GSCOPE_WAIT() \ + GL(dl_wait_lookup_done) () + +- +-# ifdef SHARED +-/* Defined in dl-trampoline.S. */ +-extern void _dl_x86_64_save_sse (void); +-extern void _dl_x86_64_restore_sse (void); +- +-# define RTLD_CHECK_FOREIGN_CALL \ +- (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) != 0) +- +-/* NB: Don't use the xchg operation because that would imply a lock +- prefix which is expensive and unnecessary. The cache line is also +- not contested at all. */ +-# define RTLD_ENABLE_FOREIGN_CALL \ +- int old_rtld_must_xmm_save = THREAD_GETMEM (THREAD_SELF, \ +- header.rtld_must_xmm_save); \ +- THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 1) +- +-# define RTLD_PREPARE_FOREIGN_CALL \ +- do if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save)) \ +- { \ +- _dl_x86_64_save_sse (); \ +- THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0); \ +- } \ +- while (0) +- +-# define RTLD_FINALIZE_FOREIGN_CALL \ +- do { \ +- if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) == 0) \ +- _dl_x86_64_restore_sse (); \ +- THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, \ +- old_rtld_must_xmm_save); \ +- } while (0) +-# endif +- +- + #endif /* __ASSEMBLER__ */ + + #endif /* tls.h */ +Index: glibc-2.17-c758a686/sysdeps/i386/Makefile +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/i386/Makefile ++++ glibc-2.17-c758a686/sysdeps/i386/Makefile +@@ -33,6 +33,7 @@ sysdep-CFLAGS += -mpreferred-stack-bound + else + ifeq ($(subdir),csu) + sysdep-CFLAGS += -mpreferred-stack-boundary=4 ++gen-as-const-headers += link-defines.sym + else + # Likewise, any function which calls user callbacks + uses-callbacks += -mpreferred-stack-boundary=4 +Index: glibc-2.17-c758a686/sysdeps/i386/configure +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/i386/configure ++++ glibc-2.17-c758a686/sysdeps/i386/configure +@@ -179,5 +179,32 @@ fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_novzeroupper" >&5 + $as_echo "$libc_cv_cc_novzeroupper" >&6; } + ++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5 ++$as_echo_n "checking for Intel MPX support... " >&6; } ++if ${libc_cv_asm_mpx+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ cat > conftest.s <<\EOF ++ bndmov %bnd0,(%esp) ++EOF ++if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; }; then ++ libc_cv_asm_mpx=yes ++else ++ libc_cv_asm_mpx=no ++fi ++rm -f conftest* ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5 ++$as_echo "$libc_cv_asm_mpx" >&6; } ++if test $libc_cv_asm_mpx == yes; then ++ $as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h ++ ++fi ++ + $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h + +Index: glibc-2.17-c758a686/sysdeps/i386/configure.in +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/i386/configure.in ++++ glibc-2.17-c758a686/sysdeps/i386/configure.in +@@ -53,6 +53,21 @@ LIBC_TRY_CC_OPTION([-mno-vzeroupper], + [libc_cv_cc_novzeroupper=no]) + ]) + ++dnl Check whether asm supports Intel MPX ++AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl ++cat > conftest.s <<\EOF ++ bndmov %bnd0,(%esp) ++EOF ++if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then ++ libc_cv_asm_mpx=yes ++else ++ libc_cv_asm_mpx=no ++fi ++rm -f conftest*]) ++if test $libc_cv_asm_mpx == yes; then ++ AC_DEFINE(HAVE_MPX_SUPPORT) ++fi ++ + dnl It is always possible to access static and hidden symbols in an + dnl position independent way. + AC_DEFINE(PI_STATIC_AND_HIDDEN) +Index: glibc-2.17-c758a686/sysdeps/i386/dl-trampoline.S +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/i386/dl-trampoline.S ++++ glibc-2.17-c758a686/sysdeps/i386/dl-trampoline.S +@@ -17,6 +17,13 @@ + . */ + + #include ++#include ++ ++#ifdef HAVE_MPX_SUPPORT ++# define PRESERVE_BND_REGS_PREFIX bnd ++#else ++# define PRESERVE_BND_REGS_PREFIX .byte 0xf2 ++#endif + + .text + .globl _dl_runtime_resolve +@@ -161,24 +168,47 @@ _dl_runtime_profile: + +4 free + %esp free + */ +- subl $20, %esp +- cfi_adjust_cfa_offset (20) +- movl %eax, (%esp) +- movl %edx, 4(%esp) +- fstpt 8(%esp) +- fstpt 20(%esp) ++#if LONG_DOUBLE_SIZE != 12 ++# error "long double size must be 12 bytes" ++#endif ++ # Allocate space for La_i86_retval and subtract 12 free bytes. ++ subl $(LRV_SIZE - 12), %esp ++ cfi_adjust_cfa_offset (LRV_SIZE - 12) ++ movl %eax, LRV_EAX_OFFSET(%esp) ++ movl %edx, LRV_EDX_OFFSET(%esp) ++ fstpt LRV_ST0_OFFSET(%esp) ++ fstpt LRV_ST1_OFFSET(%esp) ++#ifdef HAVE_MPX_SUPPORT ++ bndmov %bnd0, LRV_BND0_OFFSET(%esp) ++ bndmov %bnd1, LRV_BND1_OFFSET(%esp) ++#else ++ .byte 0x66,0x0f,0x1b,0x44,0x24,LRV_BND0_OFFSET ++ .byte 0x66,0x0f,0x1b,0x4c,0x24,LRV_BND1_OFFSET ++#endif + pushl %esp + cfi_adjust_cfa_offset (4) +- leal 36(%esp), %ecx +- movl 56(%esp), %eax +- movl 60(%esp), %edx ++ # Address of La_i86_regs area. ++ leal (LRV_SIZE + 4)(%esp), %ecx ++ # PLT2 ++ movl (LRV_SIZE + 4 + LR_SIZE)(%esp), %eax ++ # PLT1 ++ movl (LRV_SIZE + 4 + LR_SIZE + 4)(%esp), %edx + call _dl_call_pltexit +- movl (%esp), %eax +- movl 4(%esp), %edx +- fldt 20(%esp) +- fldt 8(%esp) +- addl $60, %esp +- cfi_adjust_cfa_offset (-60) ++ movl LRV_EAX_OFFSET(%esp), %eax ++ movl LRV_EDX_OFFSET(%esp), %edx ++ fldt LRV_ST1_OFFSET(%esp) ++ fldt LRV_ST0_OFFSET(%esp) ++#ifdef HAVE_MPX_SUPPORT ++ bndmov LRV_BND0_OFFSET(%esp), %bnd0 ++ bndmov LRV_BND1_OFFSET(%esp), %bnd1 ++#else ++ .byte 0x66,0x0f,0x1a,0x44,0x24,LRV_BND0_OFFSET ++ .byte 0x66,0x0f,0x1a,0x4c,0x24,LRV_BND1_OFFSET ++#endif ++ # Restore stack before return. ++ addl $(LRV_SIZE + 4 + LR_SIZE + 4), %esp ++ cfi_adjust_cfa_offset (-(LRV_SIZE + 4 + LR_SIZE + 4)) ++ PRESERVE_BND_REGS_PREFIX + ret + cfi_endproc + .size _dl_runtime_profile, .-_dl_runtime_profile +Index: glibc-2.17-c758a686/sysdeps/i386/link-defines.sym +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/i386/link-defines.sym +@@ -0,0 +1,20 @@ ++#include "link.h" ++#include ++ ++-- ++LONG_DOUBLE_SIZE sizeof (long double) ++ ++LR_SIZE sizeof (struct La_i86_regs) ++LR_EDX_OFFSET offsetof (struct La_i86_regs, lr_edx) ++LR_ECX_OFFSET offsetof (struct La_i86_regs, lr_ecx) ++LR_EAX_OFFSET offsetof (struct La_i86_regs, lr_eax) ++LR_EBP_OFFSET offsetof (struct La_i86_regs, lr_ebp) ++LR_ESP_OFFSET offsetof (struct La_i86_regs, lr_esp) ++ ++LRV_SIZE sizeof (struct La_i86_retval) ++LRV_EAX_OFFSET offsetof (struct La_i86_retval, lrv_eax) ++LRV_EDX_OFFSET offsetof (struct La_i86_retval, lrv_edx) ++LRV_ST0_OFFSET offsetof (struct La_i86_retval, lrv_st0) ++LRV_ST1_OFFSET offsetof (struct La_i86_retval, lrv_st1) ++LRV_BND0_OFFSET offsetof (struct La_i86_retval, lrv_bnd0) ++LRV_BND1_OFFSET offsetof (struct La_i86_retval, lrv_bnd1) +Index: glibc-2.17-c758a686/sysdeps/x86/bits/link.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/x86/bits/link.h ++++ glibc-2.17-c758a686/sysdeps/x86/bits/link.h +@@ -38,6 +38,8 @@ typedef struct La_i86_retval + uint32_t lrv_edx; + long double lrv_st0; + long double lrv_st1; ++ uint64_t lrv_bnd0; ++ uint64_t lrv_bnd1; + } La_i86_retval; + + +Index: glibc-2.17-c758a686/sysdeps/x86/cpu-features.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/x86/cpu-features.c ++++ glibc-2.17-c758a686/sysdeps/x86/cpu-features.c +@@ -130,6 +130,20 @@ init_cpu_features (struct cpu_features * + break; + } + } ++ ++ /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow. ++ If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt. */ ++ cpu_features->feature[index_Use_dl_runtime_resolve_slow] ++ |= bit_Use_dl_runtime_resolve_slow; ++ if (cpu_features->max_cpuid >= 0xd) ++ { ++ unsigned int eax; ++ ++ __cpuid_count (0xd, 1, eax, ebx, ecx, edx); ++ if ((eax & (1 << 2)) != 0) ++ cpu_features->feature[index_Use_dl_runtime_resolve_opt] ++ |= bit_Use_dl_runtime_resolve_opt; ++ } + } + /* This spells out "AuthenticAMD". */ + else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) +Index: glibc-2.17-c758a686/sysdeps/x86/cpu-features.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/x86/cpu-features.h ++++ glibc-2.17-c758a686/sysdeps/x86/cpu-features.h +@@ -34,6 +34,9 @@ + #define bit_AVX512DQ_Usable (1 << 13) + #define bit_Prefer_MAP_32BIT_EXEC (1 << 16) + #define bit_Prefer_No_VZEROUPPER (1 << 17) ++#define bit_Use_dl_runtime_resolve_opt (1 << 20) ++#define bit_Use_dl_runtime_resolve_slow (1 << 21) ++ + + /* CPUID Feature flags. */ + +@@ -95,6 +98,9 @@ + # define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE + # define index_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE + # define index_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE ++# define index_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE ++# define index_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE ++ + + # if defined (_LIBC) && !IS_IN (nonlib) + # ifdef __x86_64__ +@@ -273,6 +279,8 @@ extern const struct cpu_features *__get_ + # define index_AVX512DQ_Usable FEATURE_INDEX_1 + # define index_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1 + # define index_Prefer_No_VZEROUPPER FEATURE_INDEX_1 ++# define index_Use_dl_runtime_resolve_opt FEATURE_INDEX_1 ++# define index_Use_dl_runtime_resolve_slow FEATURE_INDEX_1 + + #endif /* !__ASSEMBLER__ */ + +Index: glibc-2.17-c758a686/sysdeps/x86_64/Makefile +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/x86_64/Makefile ++++ glibc-2.17-c758a686/sysdeps/x86_64/Makefile +@@ -21,6 +21,11 @@ endif + ifeq ($(subdir),elf) + sysdep-dl-routines += tlsdesc dl-tlsdesc + ++tests += ifuncmain8 ++modules-names += ifuncmod8 ++ ++$(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so ++ + tests += tst-quad1 tst-quad2 + modules-names += tst-quadmod1 tst-quadmod2 + +@@ -34,18 +39,32 @@ tests-pie += $(quad-pie-test) + $(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o + $(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o + ++tests += tst-sse tst-avx tst-avx512 ++test-extras += tst-avx-aux tst-avx512-aux ++extra-test-objs += tst-avx-aux.o tst-avx512-aux.o ++ + tests += tst-audit10 +-modules-names += tst-auditmod10a tst-auditmod10b ++modules-names += tst-auditmod10a tst-auditmod10b \ ++ tst-ssemod tst-avxmod tst-avx512mod + + $(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so + $(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so + tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so + ++$(objpfx)tst-sse: $(objpfx)tst-ssemod.so ++$(objpfx)tst-avx: $(objpfx)tst-avx-aux.o $(objpfx)tst-avxmod.so ++$(objpfx)tst-avx512: $(objpfx)tst-avx512-aux.o $(objpfx)tst-avx512mod.so ++ ++CFLAGS-tst-avx-aux.c += $(AVX-CFLAGS) ++CFLAGS-tst-avxmod.c += $(AVX-CFLAGS) ++ + ifeq (yes,$(config-cflags-avx512)) + AVX512-CFLAGS = -mavx512f + CFLAGS-tst-audit10.c += $(AVX512-CFLAGS) + CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS) + CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS) ++CFLAGS-tst-avx512-aux.c += $(AVX512-CFLAGS) ++CFLAGS-tst-avx512mod.c += $(AVX512-CFLAGS) + endif + endif + +Index: glibc-2.17-c758a686/sysdeps/x86_64/dl-machine.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/x86_64/dl-machine.h ++++ glibc-2.17-c758a686/sysdeps/x86_64/dl-machine.h +@@ -66,8 +66,15 @@ static inline int __attribute__ ((unused + elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) + { + Elf64_Addr *got; +- extern void _dl_runtime_resolve (ElfW(Word)) attribute_hidden; +- extern void _dl_runtime_profile (ElfW(Word)) attribute_hidden; ++ extern void _dl_runtime_resolve_sse (ElfW(Word)) attribute_hidden; ++ extern void _dl_runtime_resolve_avx (ElfW(Word)) attribute_hidden; ++ extern void _dl_runtime_resolve_avx_slow (ElfW(Word)) attribute_hidden; ++ extern void _dl_runtime_resolve_avx_opt (ElfW(Word)) attribute_hidden; ++ extern void _dl_runtime_resolve_avx512 (ElfW(Word)) attribute_hidden; ++ extern void _dl_runtime_resolve_avx512_opt (ElfW(Word)) attribute_hidden; ++ extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden; ++ extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden; ++ extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden; + + if (l->l_info[DT_JMPREL] && lazy) + { +@@ -95,7 +102,12 @@ elf_machine_runtime_setup (struct link_m + end in this function. */ + if (__builtin_expect (profile, 0)) + { +- *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile; ++ if (HAS_ARCH_FEATURE (AVX512F_Usable)) ++ *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512; ++ else if (HAS_ARCH_FEATURE (AVX_Usable)) ++ *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx; ++ else ++ *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_sse; + + if (GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), l)) +@@ -104,9 +116,34 @@ elf_machine_runtime_setup (struct link_m + GL(dl_profile_map) = l; + } + else +- /* This function will get called to fix up the GOT entry indicated by +- the offset on the stack, and then jump to the resolved address. */ +- *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve; ++ { ++ /* This function will get called to fix up the GOT entry ++ indicated by the offset on the stack, and then jump to ++ the resolved address. */ ++ if (HAS_ARCH_FEATURE (AVX512F_Usable)) ++ { ++ if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt)) ++ *(ElfW(Addr) *) (got + 2) ++ = (ElfW(Addr)) &_dl_runtime_resolve_avx512_opt; ++ else ++ *(ElfW(Addr) *) (got + 2) ++ = (ElfW(Addr)) &_dl_runtime_resolve_avx512; ++ } ++ else if (HAS_ARCH_FEATURE (AVX_Usable)) ++ { ++ if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt)) ++ *(ElfW(Addr) *) (got + 2) ++ = (ElfW(Addr)) &_dl_runtime_resolve_avx_opt; ++ else if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_slow)) ++ *(ElfW(Addr) *) (got + 2) ++ = (ElfW(Addr)) &_dl_runtime_resolve_avx_slow; ++ else ++ *(ElfW(Addr) *) (got + 2) ++ = (ElfW(Addr)) &_dl_runtime_resolve_avx; ++ } ++ else ++ *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_sse; ++ } + } + + if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy) +Index: glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/x86_64/dl-trampoline.S ++++ glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S +@@ -18,28 +18,52 @@ + + #include + #include ++#include + #include + +-#if (RTLD_SAVESPACE_SSE % 32) != 0 +-# error RTLD_SAVESPACE_SSE must be aligned to 32 bytes ++#ifndef DL_STACK_ALIGNMENT ++/* Due to GCC bug: ++ ++ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 ++ ++ __tls_get_addr may be called with 8-byte stack alignment. Although ++ this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume ++ that stack will be always aligned at 16 bytes. We use unaligned ++ 16-byte move to load and store SSE registers, which has no penalty ++ on modern processors if stack is 16-byte aligned. */ ++# define DL_STACK_ALIGNMENT 8 + #endif + ++#ifndef DL_RUNIME_UNALIGNED_VEC_SIZE ++/* The maximum size of unaligned vector load and store. */ ++# define DL_RUNIME_UNALIGNED_VEC_SIZE 16 ++#endif ++ ++/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */ ++#define DL_RUNIME_RESOLVE_REALIGN_STACK \ ++ (VEC_SIZE > DL_STACK_ALIGNMENT \ ++ && VEC_SIZE > DL_RUNIME_UNALIGNED_VEC_SIZE) ++ ++/* Align vector register save area to 16 bytes. */ ++#define REGISTER_SAVE_VEC_OFF 0 ++ + /* Area on stack to save and restore registers used for parameter + passing when calling _dl_fixup. */ + #ifdef __ILP32__ +-/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX. */ +-# define REGISTER_SAVE_AREA (8 * 7) +-# define REGISTER_SAVE_RAX 0 ++# define REGISTER_SAVE_RAX (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8) ++# define PRESERVE_BND_REGS_PREFIX + #else +-/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0, +- BND1, BND2, BND3. */ +-# define REGISTER_SAVE_AREA (8 * 7 + 16 * 4) + /* Align bound register save area to 16 bytes. */ +-# define REGISTER_SAVE_BND0 0 ++# define REGISTER_SAVE_BND0 (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8) + # define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16) + # define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16) + # define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16) + # define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16) ++# ifdef HAVE_MPX_SUPPORT ++# define PRESERVE_BND_REGS_PREFIX bnd ++# else ++# define PRESERVE_BND_REGS_PREFIX .byte 0xf2 ++# endif + #endif + #define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8) + #define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8) +@@ -48,376 +72,71 @@ + #define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8) + #define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8) + +- .text +- .globl _dl_runtime_resolve +- .type _dl_runtime_resolve, @function +- .align 16 +- cfi_startproc +-_dl_runtime_resolve: +- cfi_adjust_cfa_offset(16) # Incorporate PLT +- subq $REGISTER_SAVE_AREA,%rsp +- cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) +- # Preserve registers otherwise clobbered. +- movq %rax, REGISTER_SAVE_RAX(%rsp) +- movq %rcx, REGISTER_SAVE_RCX(%rsp) +- movq %rdx, REGISTER_SAVE_RDX(%rsp) +- movq %rsi, REGISTER_SAVE_RSI(%rsp) +- movq %rdi, REGISTER_SAVE_RDI(%rsp) +- movq %r8, REGISTER_SAVE_R8(%rsp) +- movq %r9, REGISTER_SAVE_R9(%rsp) +-#ifndef __ILP32__ +- # We also have to preserve bound registers. These are nops if +- # Intel MPX isn't available or disabled. +-# ifdef HAVE_MPX_SUPPORT +- bndmov %bnd0, REGISTER_SAVE_BND0(%rsp) +- bndmov %bnd1, REGISTER_SAVE_BND1(%rsp) +- bndmov %bnd2, REGISTER_SAVE_BND2(%rsp) +- bndmov %bnd3, REGISTER_SAVE_BND3(%rsp) +-# else +- .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0 +- .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1 +- .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2 +- .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3 +-# endif +-#endif +- # Copy args pushed by PLT in register. +- # %rdi: link_map, %rsi: reloc_index +- movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi +- movq REGISTER_SAVE_AREA(%rsp), %rdi +- call _dl_fixup # Call resolver. +- movq %rax, %r11 # Save return value +-#ifndef __ILP32__ +- # Restore bound registers. These are nops if Intel MPX isn't +- # avaiable or disabled. +-# ifdef HAVE_MPX_SUPPORT +- bndmov REGISTER_SAVE_BND3(%rsp), %bnd3 +- bndmov REGISTER_SAVE_BND2(%rsp), %bnd2 +- bndmov REGISTER_SAVE_BND1(%rsp), %bnd1 +- bndmov REGISTER_SAVE_BND0(%rsp), %bnd0 +-# else +- .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3 +- .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2 +- .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1 +- .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0 +-# endif ++#define VEC_SIZE 64 ++#define VMOVA vmovdqa64 ++#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT ++# define VMOV vmovdqa64 ++#else ++# define VMOV vmovdqu64 + #endif +- # Get register content back. +- movq REGISTER_SAVE_R9(%rsp), %r9 +- movq REGISTER_SAVE_R8(%rsp), %r8 +- movq REGISTER_SAVE_RDI(%rsp), %rdi +- movq REGISTER_SAVE_RSI(%rsp), %rsi +- movq REGISTER_SAVE_RDX(%rsp), %rdx +- movq REGISTER_SAVE_RCX(%rsp), %rcx +- movq REGISTER_SAVE_RAX(%rsp), %rax +- # Adjust stack(PLT did 2 pushes) +- addq $(REGISTER_SAVE_AREA + 16), %rsp +- cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16)) +- jmp *%r11 # Jump to function address. +- cfi_endproc +- .size _dl_runtime_resolve, .-_dl_runtime_resolve +- +- +-#ifndef PROF +- .globl _dl_runtime_profile +- .type _dl_runtime_profile, @function +- .align 16 +- cfi_startproc +- +-_dl_runtime_profile: +- cfi_adjust_cfa_offset(16) # Incorporate PLT +- /* The La_x86_64_regs data structure pointed to by the +- fourth paramater must be 16-byte aligned. This must +- be explicitly enforced. We have the set up a dynamically +- sized stack frame. %rbx points to the top half which +- has a fixed size and preserves the original stack pointer. */ +- +- subq $32, %rsp # Allocate the local storage. +- cfi_adjust_cfa_offset(32) +- movq %rbx, (%rsp) +- cfi_rel_offset(%rbx, 0) +- +- /* On the stack: +- 56(%rbx) parameter #1 +- 48(%rbx) return address +- +- 40(%rbx) reloc index +- 32(%rbx) link_map +- +- 24(%rbx) La_x86_64_regs pointer +- 16(%rbx) framesize +- 8(%rbx) rax +- (%rbx) rbx +- */ +- +- movq %rax, 8(%rsp) +- movq %rsp, %rbx +- cfi_def_cfa_register(%rbx) +- +- /* Actively align the La_x86_64_regs structure. */ +- andq $0xfffffffffffffff0, %rsp +-# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT +- /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers +- to detect if any xmm0-xmm7 registers are changed by audit +- module. */ +- subq $(LR_SIZE + XMM_SIZE*8), %rsp +-# else +- subq $LR_SIZE, %rsp # sizeof(La_x86_64_regs) +-# endif +- movq %rsp, 24(%rbx) +- +- /* Fill the La_x86_64_regs structure. */ +- movq %rdx, LR_RDX_OFFSET(%rsp) +- movq %r8, LR_R8_OFFSET(%rsp) +- movq %r9, LR_R9_OFFSET(%rsp) +- movq %rcx, LR_RCX_OFFSET(%rsp) +- movq %rsi, LR_RSI_OFFSET(%rsp) +- movq %rdi, LR_RDI_OFFSET(%rsp) +- movq %rbp, LR_RBP_OFFSET(%rsp) +- +- leaq 48(%rbx), %rax +- movq %rax, LR_RSP_OFFSET(%rsp) +- +- /* We always store the XMM registers even if AVX is available. +- This is to provide backward binary compatility for existing +- audit modules. */ +- movaps %xmm0, (LR_XMM_OFFSET)(%rsp) +- movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) +- movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) +- movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) +- movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) +- movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) +- movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) +- movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) +- +-# ifndef __ILP32__ +-# ifdef HAVE_MPX_SUPPORT +- bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound +- bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if +- bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available +- bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled. +-# else +- .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET) +- .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE) +- .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET + BND_SIZE*2) +- .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3) +-# endif +-# endif +- +-# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT +- .data +-L(have_avx): +- .zero 4 +- .size L(have_avx), 4 +- .previous +- +- cmpl $0, L(have_avx)(%rip) +- jne L(defined) +- movq %rbx, %r11 # Save rbx +- movl $1, %eax +- cpuid +- movq %r11,%rbx # Restore rbx +- xorl %eax, %eax +- // AVX and XSAVE supported? +- andl $((1 << 28) | (1 << 27)), %ecx +- cmpl $((1 << 28) | (1 << 27)), %ecx +- jne 10f +-# ifdef HAVE_AVX512_ASM_SUPPORT +- // AVX512 supported in processor? +- movq %rbx, %r11 # Save rbx +- xorl %ecx, %ecx +- mov $0x7, %eax +- cpuid +- andl $(1 << 16), %ebx +-# endif +- xorl %ecx, %ecx +- // Get XFEATURE_ENABLED_MASK +- xgetbv +-# ifdef HAVE_AVX512_ASM_SUPPORT +- test %ebx, %ebx +- movq %r11, %rbx # Restore rbx +- je 20f +- // Verify that XCR0[7:5] = '111b' and +- // XCR0[2:1] = '11b' which means +- // that zmm state is enabled +- andl $0xe6, %eax +- cmpl $0xe6, %eax +- jne 20f +- movl %eax, L(have_avx)(%rip) +-L(avx512): +-# define RESTORE_AVX +-# define VMOV vmovdqu64 +-# define VEC(i) zmm##i +-# define MORE_CODE +-# include "dl-trampoline.h" +-# undef VMOV +-# undef VEC +-# undef RESTORE_AVX +-# endif +-20: andl $0x6, %eax +-10: subl $0x5, %eax +- movl %eax, L(have_avx)(%rip) +- cmpl $0, %eax +- +-L(defined): +- js L(no_avx) +-# ifdef HAVE_AVX512_ASM_SUPPORT +- cmpl $0xe6, L(have_avx)(%rip) +- je L(avx512) +-# endif +- +-# define RESTORE_AVX +-# define VMOV vmovdqu +-# define VEC(i) ymm##i +-# define MORE_CODE +-# include "dl-trampoline.h" +- +- .align 16 +-L(no_avx): +-# endif +- +-# undef RESTORE_AVX +-# include "dl-trampoline.h" +- +- cfi_endproc +- .size _dl_runtime_profile, .-_dl_runtime_profile ++#define VEC(i) zmm##i ++#define _dl_runtime_resolve _dl_runtime_resolve_avx512 ++#define _dl_runtime_profile _dl_runtime_profile_avx512 ++#define RESTORE_AVX ++#include "dl-trampoline.h" ++#undef _dl_runtime_resolve ++#undef _dl_runtime_profile ++#undef VEC ++#undef VMOV ++#undef VMOVA ++#undef VEC_SIZE ++ ++#define VEC_SIZE 32 ++#define VMOVA vmovdqa ++#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT ++# define VMOV vmovdqa ++#else ++# define VMOV vmovdqu + #endif +- +- +-#ifdef SHARED +- .globl _dl_x86_64_save_sse +- .type _dl_x86_64_save_sse, @function +- .align 16 +- cfi_startproc +-_dl_x86_64_save_sse: +-# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT +- cmpl $0, L(have_avx)(%rip) +- jne L(defined_5) +- movq %rbx, %r11 # Save rbx +- movl $1, %eax +- cpuid +- movq %r11,%rbx # Restore rbx +- xorl %eax, %eax +- // AVX and XSAVE supported? +- andl $((1 << 28) | (1 << 27)), %ecx +- cmpl $((1 << 28) | (1 << 27)), %ecx +- jne 1f +-# ifdef HAVE_AVX512_ASM_SUPPORT +- // AVX512 supported in a processor? +- movq %rbx, %r11 # Save rbx +- xorl %ecx,%ecx +- mov $0x7,%eax +- cpuid +- andl $(1 << 16), %ebx +-# endif +- xorl %ecx, %ecx +- // Get XFEATURE_ENABLED_MASK +- xgetbv +-# ifdef HAVE_AVX512_ASM_SUPPORT +- test %ebx, %ebx +- movq %r11, %rbx # Restore rbx +- je 2f +- // Verify that XCR0[7:5] = '111b' and +- // XCR0[2:1] = '11b' which means +- // that zmm state is enabled +- andl $0xe6, %eax +- movl %eax, L(have_avx)(%rip) +- cmpl $0xe6, %eax +- je L(avx512_5) +-# endif +- +-2: andl $0x6, %eax +-1: subl $0x5, %eax +- movl %eax, L(have_avx)(%rip) +- cmpl $0, %eax +- +-L(defined_5): +- js L(no_avx5) +-# ifdef HAVE_AVX512_ASM_SUPPORT +- cmpl $0xe6, L(have_avx)(%rip) +- je L(avx512_5) +-# endif +- +- vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE +- vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE +- vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE +- vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE +- vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE +- vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE +- vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE +- vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE +- ret +-# ifdef HAVE_AVX512_ASM_SUPPORT +-L(avx512_5): +- vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE +- vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE +- vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE +- vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE +- vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE +- vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE +- vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE +- vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE +- ret +-# endif +-L(no_avx5): +-# endif +- movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE +- movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE +- movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE +- movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE +- movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE +- movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE +- movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE +- movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE +- ret +- cfi_endproc +- .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse +- +- +- .globl _dl_x86_64_restore_sse +- .type _dl_x86_64_restore_sse, @function +- .align 16 +- cfi_startproc +-_dl_x86_64_restore_sse: +-# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT +- cmpl $0, L(have_avx)(%rip) +- js L(no_avx6) +-# ifdef HAVE_AVX512_ASM_SUPPORT +- cmpl $0xe6, L(have_avx)(%rip) +- je L(avx512_6) +-# endif +- +- vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0 +- vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1 +- vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2 +- vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3 +- vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4 +- vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5 +- vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6 +- vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7 +- ret +-# ifdef HAVE_AVX512_ASM_SUPPORT +-L(avx512_6): +- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0 +- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1 +- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2 +- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3 +- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4 +- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5 +- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6 +- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7 +- ret +-# endif +-L(no_avx6): +-# endif +- movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0 +- movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1 +- movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2 +- movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3 +- movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4 +- movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5 +- movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6 +- movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7 +- ret +- cfi_endproc +- .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse ++#define VEC(i) ymm##i ++#define _dl_runtime_resolve _dl_runtime_resolve_avx ++#define _dl_runtime_resolve_opt _dl_runtime_resolve_avx_opt ++#define _dl_runtime_profile _dl_runtime_profile_avx ++#include "dl-trampoline.h" ++#undef _dl_runtime_resolve ++#undef _dl_runtime_resolve_opt ++#undef _dl_runtime_profile ++#undef VEC ++#undef VMOV ++#undef VMOVA ++#undef VEC_SIZE ++ ++/* movaps/movups is 1-byte shorter. */ ++#define VEC_SIZE 16 ++#define VMOVA movaps ++#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT ++# define VMOV movaps ++#else ++# define VMOV movups ++ #endif ++#define VEC(i) xmm##i ++#define _dl_runtime_resolve _dl_runtime_resolve_sse ++#define _dl_runtime_profile _dl_runtime_profile_sse ++#undef RESTORE_AVX ++#include "dl-trampoline.h" ++#undef _dl_runtime_resolve ++#undef _dl_runtime_profile ++#undef VMOV ++#undef VMOVA ++ ++/* Used by _dl_runtime_resolve_avx_opt/_dl_runtime_resolve_avx512_opt ++ to preserve the full vector registers with zero upper bits. */ ++#define VMOVA vmovdqa ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT ++# define VMOV vmovdqa ++#else ++# define VMOV vmovdqu + #endif ++#define _dl_runtime_resolve _dl_runtime_resolve_sse_vex ++#define _dl_runtime_resolve_opt _dl_runtime_resolve_avx512_opt ++#include "dl-trampoline.h" +Index: glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/x86_64/dl-trampoline.h ++++ glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h +@@ -1,6 +1,5 @@ +-/* Partial PLT profile trampoline to save and restore x86-64 vector +- registers. +- Copyright (C) 2009, 2011 Free Software Foundation, Inc. ++/* PLT trampolines. x86-64 version. ++ Copyright (C) 2009-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -17,16 +16,355 @@ + License along with the GNU C Library; if not, see + . */ + +-#ifdef RESTORE_AVX ++#undef REGISTER_SAVE_AREA_RAW ++#ifdef __ILP32__ ++/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to ++ VEC7. */ ++# define REGISTER_SAVE_AREA_RAW (8 * 7 + VEC_SIZE * 8) ++#else ++/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as ++ BND0, BND1, BND2, BND3 and VEC0 to VEC7. */ ++# define REGISTER_SAVE_AREA_RAW (8 * 7 + 16 * 4 + VEC_SIZE * 8) ++#endif ++ ++#undef REGISTER_SAVE_AREA ++#undef LOCAL_STORAGE_AREA ++#undef BASE ++#if DL_RUNIME_RESOLVE_REALIGN_STACK ++# define REGISTER_SAVE_AREA (REGISTER_SAVE_AREA_RAW + 8) ++/* Local stack area before jumping to function address: RBX. */ ++# define LOCAL_STORAGE_AREA 8 ++# define BASE rbx ++# if (REGISTER_SAVE_AREA % VEC_SIZE) != 0 ++# error REGISTER_SAVE_AREA must be multples of VEC_SIZE ++# endif ++#else ++# define REGISTER_SAVE_AREA REGISTER_SAVE_AREA_RAW ++/* Local stack area before jumping to function address: All saved ++ registers. */ ++# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA ++# define BASE rsp ++# if (REGISTER_SAVE_AREA % 16) != 8 ++# error REGISTER_SAVE_AREA must be odd multples of 8 ++# endif ++#endif ++ ++ .text ++#ifdef _dl_runtime_resolve_opt ++/* Use the smallest vector registers to preserve the full YMM/ZMM ++ registers to avoid SSE transition penalty. */ ++ ++# if VEC_SIZE == 32 ++/* Check if the upper 128 bits in %ymm0 - %ymm7 registers are non-zero ++ and preserve %xmm0 - %xmm7 registers with the zero upper bits. Since ++ there is no SSE transition penalty on AVX512 processors which don't ++ support XGETBV with ECX == 1, _dl_runtime_resolve_avx512_slow isn't ++ provided. */ ++ .globl _dl_runtime_resolve_avx_slow ++ .hidden _dl_runtime_resolve_avx_slow ++ .type _dl_runtime_resolve_avx_slow, @function ++ .align 16 ++_dl_runtime_resolve_avx_slow: ++ cfi_startproc ++ cfi_adjust_cfa_offset(16) # Incorporate PLT ++ vorpd %ymm0, %ymm1, %ymm8 ++ vorpd %ymm2, %ymm3, %ymm9 ++ vorpd %ymm4, %ymm5, %ymm10 ++ vorpd %ymm6, %ymm7, %ymm11 ++ vorpd %ymm8, %ymm9, %ymm9 ++ vorpd %ymm10, %ymm11, %ymm10 ++ vpcmpeqd %xmm8, %xmm8, %xmm8 ++ vorpd %ymm9, %ymm10, %ymm10 ++ vptest %ymm10, %ymm8 ++ # Preserve %ymm0 - %ymm7 registers if the upper 128 bits of any ++ # %ymm0 - %ymm7 registers aren't zero. ++ PRESERVE_BND_REGS_PREFIX ++ jnc _dl_runtime_resolve_avx ++ # Use vzeroupper to avoid SSE transition penalty. ++ vzeroupper ++ # Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits ++ # when the upper 128 bits of %ymm0 - %ymm7 registers are zero. ++ PRESERVE_BND_REGS_PREFIX ++ jmp _dl_runtime_resolve_sse_vex ++ cfi_adjust_cfa_offset(-16) # Restore PLT adjustment ++ cfi_endproc ++ .size _dl_runtime_resolve_avx_slow, .-_dl_runtime_resolve_avx_slow ++# endif ++ ++/* Use XGETBV with ECX == 1 to check which bits in vector registers are ++ non-zero and only preserve the non-zero lower bits with zero upper ++ bits. */ ++ .globl _dl_runtime_resolve_opt ++ .hidden _dl_runtime_resolve_opt ++ .type _dl_runtime_resolve_opt, @function ++ .align 16 ++_dl_runtime_resolve_opt: ++ cfi_startproc ++ cfi_adjust_cfa_offset(16) # Incorporate PLT ++ pushq %rax ++ cfi_adjust_cfa_offset(8) ++ cfi_rel_offset(%rax, 0) ++ pushq %rcx ++ cfi_adjust_cfa_offset(8) ++ cfi_rel_offset(%rcx, 0) ++ pushq %rdx ++ cfi_adjust_cfa_offset(8) ++ cfi_rel_offset(%rdx, 0) ++ movl $1, %ecx ++ xgetbv ++ movl %eax, %r11d ++ popq %rdx ++ cfi_adjust_cfa_offset(-8) ++ cfi_restore (%rdx) ++ popq %rcx ++ cfi_adjust_cfa_offset(-8) ++ cfi_restore (%rcx) ++ popq %rax ++ cfi_adjust_cfa_offset(-8) ++ cfi_restore (%rax) ++# if VEC_SIZE == 32 ++ # For YMM registers, check if YMM state is in use. ++ andl $bit_YMM_state, %r11d ++ # Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits if ++ # YMM state isn't in use. ++ PRESERVE_BND_REGS_PREFIX ++ jz _dl_runtime_resolve_sse_vex ++# elif VEC_SIZE == 16 ++ # For ZMM registers, check if YMM state and ZMM state are in ++ # use. ++ andl $(bit_YMM_state | bit_ZMM0_15_state), %r11d ++ cmpl $bit_YMM_state, %r11d ++ # Preserve %zmm0 - %zmm7 registers if ZMM state is in use. ++ PRESERVE_BND_REGS_PREFIX ++ jg _dl_runtime_resolve_avx512 ++ # Preserve %ymm0 - %ymm7 registers with the zero upper 256 bits if ++ # ZMM state isn't in use. ++ PRESERVE_BND_REGS_PREFIX ++ je _dl_runtime_resolve_avx ++ # Preserve %xmm0 - %xmm7 registers with the zero upper 384 bits if ++ # neither YMM state nor ZMM state are in use. ++# else ++# error Unsupported VEC_SIZE! ++# endif ++ cfi_adjust_cfa_offset(-16) # Restore PLT adjustment ++ cfi_endproc ++ .size _dl_runtime_resolve_opt, .-_dl_runtime_resolve_opt ++#endif ++ .globl _dl_runtime_resolve ++ .hidden _dl_runtime_resolve ++ .type _dl_runtime_resolve, @function ++ .align 16 ++ cfi_startproc ++_dl_runtime_resolve: ++ cfi_adjust_cfa_offset(16) # Incorporate PLT ++#if DL_RUNIME_RESOLVE_REALIGN_STACK ++# if LOCAL_STORAGE_AREA != 8 ++# error LOCAL_STORAGE_AREA must be 8 ++# endif ++ pushq %rbx # push subtracts stack by 8. ++ cfi_adjust_cfa_offset(8) ++ cfi_rel_offset(%rbx, 0) ++ mov %RSP_LP, %RBX_LP ++ cfi_def_cfa_register(%rbx) ++ and $-VEC_SIZE, %RSP_LP ++#endif ++ sub $REGISTER_SAVE_AREA, %RSP_LP ++ cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) ++ # Preserve registers otherwise clobbered. ++ movq %rax, REGISTER_SAVE_RAX(%rsp) ++ movq %rcx, REGISTER_SAVE_RCX(%rsp) ++ movq %rdx, REGISTER_SAVE_RDX(%rsp) ++ movq %rsi, REGISTER_SAVE_RSI(%rsp) ++ movq %rdi, REGISTER_SAVE_RDI(%rsp) ++ movq %r8, REGISTER_SAVE_R8(%rsp) ++ movq %r9, REGISTER_SAVE_R9(%rsp) ++ VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp) ++ VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp) ++ VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp) ++ VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp) ++ VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp) ++ VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp) ++ VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp) ++ VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp) ++#ifndef __ILP32__ ++ # We also have to preserve bound registers. These are nops if ++ # Intel MPX isn't available or disabled. ++# ifdef HAVE_MPX_SUPPORT ++ bndmov %bnd0, REGISTER_SAVE_BND0(%rsp) ++ bndmov %bnd1, REGISTER_SAVE_BND1(%rsp) ++ bndmov %bnd2, REGISTER_SAVE_BND2(%rsp) ++ bndmov %bnd3, REGISTER_SAVE_BND3(%rsp) ++# else ++# if REGISTER_SAVE_BND0 == 0 ++ .byte 0x66,0x0f,0x1b,0x04,0x24 ++# else ++ .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0 ++# endif ++ .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1 ++ .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2 ++ .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3 ++# endif ++#endif ++ # Copy args pushed by PLT in register. ++ # %rdi: link_map, %rsi: reloc_index ++ mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP ++ mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP ++ call _dl_fixup # Call resolver. ++ mov %RAX_LP, %R11_LP # Save return value ++#ifndef __ILP32__ ++ # Restore bound registers. These are nops if Intel MPX isn't ++ # avaiable or disabled. ++# ifdef HAVE_MPX_SUPPORT ++ bndmov REGISTER_SAVE_BND3(%rsp), %bnd3 ++ bndmov REGISTER_SAVE_BND2(%rsp), %bnd2 ++ bndmov REGISTER_SAVE_BND1(%rsp), %bnd1 ++ bndmov REGISTER_SAVE_BND0(%rsp), %bnd0 ++# else ++ .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3 ++ .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2 ++ .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1 ++# if REGISTER_SAVE_BND0 == 0 ++ .byte 0x66,0x0f,0x1a,0x04,0x24 ++# else ++ .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0 ++# endif ++# endif ++#endif ++ # Get register content back. ++ movq REGISTER_SAVE_R9(%rsp), %r9 ++ movq REGISTER_SAVE_R8(%rsp), %r8 ++ movq REGISTER_SAVE_RDI(%rsp), %rdi ++ movq REGISTER_SAVE_RSI(%rsp), %rsi ++ movq REGISTER_SAVE_RDX(%rsp), %rdx ++ movq REGISTER_SAVE_RCX(%rsp), %rcx ++ movq REGISTER_SAVE_RAX(%rsp), %rax ++ VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0) ++ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1) ++ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2) ++ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3) ++ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4) ++ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5) ++ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6) ++ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7) ++#if DL_RUNIME_RESOLVE_REALIGN_STACK ++ mov %RBX_LP, %RSP_LP ++ cfi_def_cfa_register(%rsp) ++ movq (%rsp), %rbx ++ cfi_restore(%rbx) ++#endif ++ # Adjust stack(PLT did 2 pushes) ++ add $(LOCAL_STORAGE_AREA + 16), %RSP_LP ++ cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16)) ++ # Preserve bound registers. ++ PRESERVE_BND_REGS_PREFIX ++ jmp *%r11 # Jump to function address. ++ cfi_endproc ++ .size _dl_runtime_resolve, .-_dl_runtime_resolve ++ ++ ++/* To preserve %xmm0 - %xmm7 registers, dl-trampoline.h is included ++ twice, for _dl_runtime_resolve_sse and _dl_runtime_resolve_sse_vex. ++ But we don't need another _dl_runtime_profile for XMM registers. */ ++#if !defined PROF && defined _dl_runtime_profile ++# if (LR_VECTOR_OFFSET % VEC_SIZE) != 0 ++# error LR_VECTOR_OFFSET must be multples of VEC_SIZE ++# endif ++ ++ .globl _dl_runtime_profile ++ .hidden _dl_runtime_profile ++ .type _dl_runtime_profile, @function ++ .align 16 ++_dl_runtime_profile: ++ cfi_startproc ++ cfi_adjust_cfa_offset(16) # Incorporate PLT ++ /* The La_x86_64_regs data structure pointed to by the ++ fourth paramater must be VEC_SIZE-byte aligned. This must ++ be explicitly enforced. We have the set up a dynamically ++ sized stack frame. %rbx points to the top half which ++ has a fixed size and preserves the original stack pointer. */ ++ ++ sub $32, %RSP_LP # Allocate the local storage. ++ cfi_adjust_cfa_offset(32) ++ movq %rbx, (%rsp) ++ cfi_rel_offset(%rbx, 0) ++ ++ /* On the stack: ++ 56(%rbx) parameter #1 ++ 48(%rbx) return address ++ ++ 40(%rbx) reloc index ++ 32(%rbx) link_map ++ ++ 24(%rbx) La_x86_64_regs pointer ++ 16(%rbx) framesize ++ 8(%rbx) rax ++ (%rbx) rbx ++ */ ++ ++ movq %rax, 8(%rsp) ++ mov %RSP_LP, %RBX_LP ++ cfi_def_cfa_register(%rbx) ++ ++ /* Actively align the La_x86_64_regs structure. */ ++ and $-VEC_SIZE, %RSP_LP ++# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT ++ /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers ++ to detect if any xmm0-xmm7 registers are changed by audit ++ module. */ ++ sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP ++# else ++ sub $LR_SIZE, %RSP_LP # sizeof(La_x86_64_regs) ++# endif ++ movq %rsp, 24(%rbx) ++ ++ /* Fill the La_x86_64_regs structure. */ ++ movq %rdx, LR_RDX_OFFSET(%rsp) ++ movq %r8, LR_R8_OFFSET(%rsp) ++ movq %r9, LR_R9_OFFSET(%rsp) ++ movq %rcx, LR_RCX_OFFSET(%rsp) ++ movq %rsi, LR_RSI_OFFSET(%rsp) ++ movq %rdi, LR_RDI_OFFSET(%rsp) ++ movq %rbp, LR_RBP_OFFSET(%rsp) ++ ++ lea 48(%rbx), %RAX_LP ++ movq %rax, LR_RSP_OFFSET(%rsp) ++ ++ /* We always store the XMM registers even if AVX is available. ++ This is to provide backward binary compatibility for existing ++ audit modules. */ ++ movaps %xmm0, (LR_XMM_OFFSET)(%rsp) ++ movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) ++ movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) ++ movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) ++ movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) ++ movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) ++ movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) ++ movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) ++ ++# ifndef __ILP32__ ++# ifdef HAVE_MPX_SUPPORT ++ bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound ++ bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if ++ bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available ++ bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled. ++# else ++ .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET) ++ .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE) ++ .byte 0x66,0x0f,0x1b,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2) ++ .byte 0x66,0x0f,0x1b,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3) ++# endif ++# endif ++ ++# ifdef RESTORE_AVX + /* This is to support AVX audit modules. */ +- VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp) +- VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) +- VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) +- VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) +- VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) +- VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) +- VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) +- VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) ++ VMOVA %VEC(0), (LR_VECTOR_OFFSET)(%rsp) ++ VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) ++ VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) ++ VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) ++ VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) ++ VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) ++ VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) ++ VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) + + /* Save xmm0-xmm7 registers to detect if any of them are + changed by audit module. */ +@@ -38,7 +376,7 @@ + vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp) + vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp) + vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp) +-#endif ++# endif + + mov %RSP_LP, %RCX_LP # La_x86_64_regs pointer to %rcx. + mov 48(%rbx), %RDX_LP # Load return address if needed. +@@ -63,21 +401,7 @@ + movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6 + movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7 + +-#ifndef __ILP32__ +-# ifdef HAVE_MPX_SUPPORT +- bndmov (LR_BND_OFFSET)(%rsp), %bnd0 # Restore bound +- bndmov (LR_BND_OFFSET + BND_SIZE)(%rsp), %bnd1 # registers. +- bndmov (LR_BND_OFFSET + BND_SIZE*2)(%rsp), %bnd2 +- bndmov (LR_BND_OFFSET + BND_SIZE*3)(%rsp), %bnd3 +-# else +- .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LR_BND_OFFSET) +- .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE) +- .byte 0x66,0x0f,0x1a,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2) +- .byte 0x66,0x0f,0x1a,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3) +-# endif +-#endif +- +-#ifdef RESTORE_AVX ++# ifdef RESTORE_AVX + /* Check if any xmm0-xmm7 registers are changed by audit + module. */ + vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8 +@@ -86,7 +410,7 @@ + je 2f + vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) + jmp 1f +-2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0) ++2: VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0) + vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) + + 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 +@@ -95,7 +419,7 @@ + je 2f + vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) + jmp 1f +-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) ++2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) + vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) + + 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 +@@ -104,7 +428,7 @@ + je 2f + vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) + jmp 1f +-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) ++2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) + vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) + + 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 +@@ -113,7 +437,7 @@ + je 2f + vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) + jmp 1f +-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) ++2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) + vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) + + 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 +@@ -122,7 +446,7 @@ + je 2f + vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) + jmp 1f +-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) ++2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) + vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) + + 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 +@@ -131,7 +455,7 @@ + je 2f + vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) + jmp 1f +-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) ++2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) + vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) + + 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 +@@ -140,7 +464,7 @@ + je 2f + vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) + jmp 1f +-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) ++2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) + vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) + + 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 +@@ -149,13 +473,29 @@ + je 2f + vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) + jmp 1f +-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) ++2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) + vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) + + 1: +-#endif ++# endif ++ ++# ifndef __ILP32__ ++# ifdef HAVE_MPX_SUPPORT ++ bndmov (LR_BND_OFFSET)(%rsp), %bnd0 # Restore bound ++ bndmov (LR_BND_OFFSET + BND_SIZE)(%rsp), %bnd1 # registers. ++ bndmov (LR_BND_OFFSET + BND_SIZE*2)(%rsp), %bnd2 ++ bndmov (LR_BND_OFFSET + BND_SIZE*3)(%rsp), %bnd3 ++# else ++ .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LR_BND_OFFSET) ++ .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE) ++ .byte 0x66,0x0f,0x1a,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2) ++ .byte 0x66,0x0f,0x1a,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3) ++# endif ++# endif ++ + mov 16(%rbx), %R10_LP # Anything in framesize? + test %R10_LP, %R10_LP ++ PRESERVE_BND_REGS_PREFIX + jns 3f + + /* There's nothing in the frame size, so there +@@ -166,14 +506,15 @@ + movq LR_RSI_OFFSET(%rsp), %rsi + movq LR_RDI_OFFSET(%rsp), %rdi + +- movq %rbx, %rsp ++ mov %RBX_LP, %RSP_LP + movq (%rsp), %rbx +- cfi_restore(rbx) ++ cfi_restore(%rbx) + cfi_def_cfa_register(%rsp) + +- addq $48, %rsp # Adjust the stack to the return value ++ add $48, %RSP_LP # Adjust the stack to the return value + # (eats the reloc index and link_map) + cfi_adjust_cfa_offset(-48) ++ PRESERVE_BND_REGS_PREFIX + jmp *%r11 # Jump to function address. + + 3: +@@ -186,13 +527,13 @@ + temporary buffer of the size specified by the 'framesize' + returned from _dl_profile_fixup */ + +- leaq LR_RSP_OFFSET(%rbx), %rsi # stack +- addq $8, %r10 +- andq $0xfffffffffffffff0, %r10 +- movq %r10, %rcx +- subq %r10, %rsp +- movq %rsp, %rdi +- shrq $3, %rcx ++ lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack ++ add $8, %R10_LP ++ and $-16, %R10_LP ++ mov %R10_LP, %RCX_LP ++ sub %R10_LP, %RSP_LP ++ mov %RSP_LP, %RDI_LP ++ shr $3, %RCX_LP + rep + movsq + +@@ -200,23 +541,24 @@ + movq 32(%rdi), %rsi + movq 40(%rdi), %rdi + ++ PRESERVE_BND_REGS_PREFIX + call *%r11 + +- mov 24(%rbx), %rsp # Drop the copied stack content ++ mov 24(%rbx), %RSP_LP # Drop the copied stack content + + /* Now we have to prepare the La_x86_64_retval structure for the + _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, + so we just need to allocate the sizeof(La_x86_64_retval) space on + the stack, since the alignment has already been taken care of. */ +-#ifdef RESTORE_AVX ++# ifdef RESTORE_AVX + /* sizeof(La_x86_64_retval). Need extra space for 2 SSE + registers to detect if xmm0/xmm1 registers are changed + by audit module. */ +- subq $(LRV_SIZE + XMM_SIZE*2), %rsp +-#else +- subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval) +-#endif +- movq %rsp, %rcx # La_x86_64_retval argument to %rcx. ++ sub $(LRV_SIZE + XMM_SIZE*2), %RSP_LP ++# else ++ sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval) ++# endif ++ mov %RSP_LP, %RCX_LP # La_x86_64_retval argument to %rcx. + + /* Fill in the La_x86_64_retval structure. */ + movq %rax, LRV_RAX_OFFSET(%rcx) +@@ -225,26 +567,26 @@ + movaps %xmm0, LRV_XMM0_OFFSET(%rcx) + movaps %xmm1, LRV_XMM1_OFFSET(%rcx) + +-#ifdef RESTORE_AVX ++# ifdef RESTORE_AVX + /* This is to support AVX audit modules. */ +- VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx) +- VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx) ++ VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx) ++ VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx) + + /* Save xmm0/xmm1 registers to detect if they are changed + by audit module. */ + vmovdqa %xmm0, (LRV_SIZE)(%rcx) + vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) +-#endif ++# endif + +-#ifndef __ILP32__ +-# ifdef HAVE_MPX_SUPPORT ++# ifndef __ILP32__ ++# ifdef HAVE_MPX_SUPPORT + bndmov %bnd0, LRV_BND0_OFFSET(%rcx) # Preserve returned bounds. + bndmov %bnd1, LRV_BND1_OFFSET(%rcx) +-# else ++# else + .byte 0x66,0x0f,0x1b,0x81;.long (LRV_BND0_OFFSET) + .byte 0x66,0x0f,0x1b,0x89;.long (LRV_BND1_OFFSET) ++# endif + # endif +-#endif + + fstpt LRV_ST0_OFFSET(%rcx) + fstpt LRV_ST1_OFFSET(%rcx) +@@ -261,49 +603,47 @@ + movaps LRV_XMM0_OFFSET(%rsp), %xmm0 + movaps LRV_XMM1_OFFSET(%rsp), %xmm1 + +-#ifdef RESTORE_AVX ++# ifdef RESTORE_AVX + /* Check if xmm0/xmm1 registers are changed by audit module. */ + vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 + vpmovmskb %xmm2, %esi + cmpl $0xffff, %esi + jne 1f +- VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0) ++ VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0) + + 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 + vpmovmskb %xmm2, %esi + cmpl $0xffff, %esi + jne 1f +- VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1) ++ VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1) + + 1: +-#endif ++# endif + +-#ifndef __ILP32__ +-# ifdef HAVE_MPX_SUPPORT +- bndmov LRV_BND0_OFFSET(%rcx), %bnd0 # Restore bound registers. +- bndmov LRV_BND1_OFFSET(%rcx), %bnd1 +-# else +- .byte 0x66,0x0f,0x1a,0x81;.long (LRV_BND0_OFFSET) +- .byte 0x66,0x0f,0x1a,0x89;.long (LRV_BND1_OFFSET) ++# ifndef __ILP32__ ++# ifdef HAVE_MPX_SUPPORT ++ bndmov LRV_BND0_OFFSET(%rsp), %bnd0 # Restore bound registers. ++ bndmov LRV_BND1_OFFSET(%rsp), %bnd1 ++# else ++ .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LRV_BND0_OFFSET) ++ .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LRV_BND1_OFFSET) ++# endif + # endif +-#endif + + fldt LRV_ST1_OFFSET(%rsp) + fldt LRV_ST0_OFFSET(%rsp) + +- movq %rbx, %rsp ++ mov %RBX_LP, %RSP_LP + movq (%rsp), %rbx +- cfi_restore(rbx) ++ cfi_restore(%rbx) + cfi_def_cfa_register(%rsp) + +- addq $48, %rsp # Adjust the stack to the return value ++ add $48, %RSP_LP # Adjust the stack to the return value + # (eats the reloc index and link_map) + cfi_adjust_cfa_offset(-48) ++ PRESERVE_BND_REGS_PREFIX + retq + +-#ifdef MORE_CODE +- cfi_adjust_cfa_offset(48) +- cfi_rel_offset(%rbx, 0) +- cfi_def_cfa_register(%rbx) +-# undef MORE_CODE ++ cfi_endproc ++ .size _dl_runtime_profile, .-_dl_runtime_profile + #endif +Index: glibc-2.17-c758a686/sysdeps/x86_64/ifuncmain8.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/x86_64/ifuncmain8.c +@@ -0,0 +1,32 @@ ++/* Test IFUNC selector with floating-point parameters. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++extern float foo (float); ++ ++static int ++do_test (void) ++{ ++ if (foo (2) != 3) ++ abort (); ++ return 0; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" +Index: glibc-2.17-c758a686/sysdeps/x86_64/ifuncmod8.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/x86_64/ifuncmod8.c +@@ -0,0 +1,36 @@ ++/* Test IFUNC selector with floating-point parameters. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void * foo_ifunc (void) __asm__ ("foo"); ++__asm__(".type foo, %gnu_indirect_function"); ++ ++static float ++foo_impl (float x) ++{ ++ return x + 1; ++} ++ ++void * ++foo_ifunc (void) ++{ ++ __m128i xmm = _mm_set1_epi32 (-1); ++ asm volatile ("movdqa %0, %%xmm0" : : "x" (xmm) : "xmm0" ); ++ return foo_impl; ++} +Index: glibc-2.17-c758a686/sysdeps/x86_64/tst-avx-aux.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/x86_64/tst-avx-aux.c +@@ -0,0 +1,47 @@ ++/* Test case for preserved AVX registers in dynamic linker, -mavx part. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++int ++tst_avx_aux (void) ++{ ++#ifdef __AVX__ ++ extern __m256i avx_test (__m256i, __m256i, __m256i, __m256i, ++ __m256i, __m256i, __m256i, __m256i); ++ ++ __m256i ymm0 = _mm256_set1_epi32 (0); ++ __m256i ymm1 = _mm256_set1_epi32 (1); ++ __m256i ymm2 = _mm256_set1_epi32 (2); ++ __m256i ymm3 = _mm256_set1_epi32 (3); ++ __m256i ymm4 = _mm256_set1_epi32 (4); ++ __m256i ymm5 = _mm256_set1_epi32 (5); ++ __m256i ymm6 = _mm256_set1_epi32 (6); ++ __m256i ymm7 = _mm256_set1_epi32 (7); ++ __m256i ret = avx_test (ymm0, ymm1, ymm2, ymm3, ++ ymm4, ymm5, ymm6, ymm7); ++ ymm0 = _mm256_set1_epi32 (0x12349876); ++ if (memcmp (&ymm0, &ret, sizeof (ret))) ++ abort (); ++ return 0; ++#else /* __AVX__ */ ++ return 77; ++#endif /* __AVX__ */ ++} +Index: glibc-2.17-c758a686/sysdeps/x86_64/tst-avx.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/x86_64/tst-avx.c +@@ -0,0 +1,49 @@ ++/* Test case for preserved AVX registers in dynamic linker. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++int tst_avx_aux (void); ++ ++static int ++avx_enabled (void) ++{ ++ unsigned int eax, ebx, ecx, edx; ++ ++ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 ++ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) ++ return 0; ++ ++ /* Check the OS has AVX and SSE saving enabled. */ ++ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); ++ ++ return (eax & 6) == 6; ++} ++ ++static int ++do_test (void) ++{ ++ /* Run AVX test only if AVX is supported. */ ++ if (avx_enabled ()) ++ return tst_avx_aux (); ++ else ++ return 77; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../../test-skeleton.c" +Index: glibc-2.17-c758a686/sysdeps/x86_64/tst-avx512-aux.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/x86_64/tst-avx512-aux.c +@@ -0,0 +1,48 @@ ++/* Test case for preserved AVX512 registers in dynamic linker, ++ -mavx512 part. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++int ++tst_avx512_aux (void) ++{ ++#ifdef __AVX512F__ ++ extern __m512i avx512_test (__m512i, __m512i, __m512i, __m512i, ++ __m512i, __m512i, __m512i, __m512i); ++ ++ __m512i zmm0 = _mm512_set1_epi32 (0); ++ __m512i zmm1 = _mm512_set1_epi32 (1); ++ __m512i zmm2 = _mm512_set1_epi32 (2); ++ __m512i zmm3 = _mm512_set1_epi32 (3); ++ __m512i zmm4 = _mm512_set1_epi32 (4); ++ __m512i zmm5 = _mm512_set1_epi32 (5); ++ __m512i zmm6 = _mm512_set1_epi32 (6); ++ __m512i zmm7 = _mm512_set1_epi32 (7); ++ __m512i ret = avx512_test (zmm0, zmm1, zmm2, zmm3, ++ zmm4, zmm5, zmm6, zmm7); ++ zmm0 = _mm512_set1_epi32 (0x12349876); ++ if (memcmp (&zmm0, &ret, sizeof (ret))) ++ abort (); ++ return 0; ++#else /* __AVX512F__ */ ++ return 77; ++#endif /* __AVX512F__ */ ++} +Index: glibc-2.17-c758a686/sysdeps/x86_64/tst-avx512.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/x86_64/tst-avx512.c +@@ -0,0 +1,57 @@ ++/* Test case for preserved AVX512 registers in dynamic linker. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++int tst_avx512_aux (void); ++ ++static int ++avx512_enabled (void) ++{ ++#ifdef bit_AVX512F ++ unsigned int eax, ebx, ecx, edx; ++ ++ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 ++ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) ++ return 0; ++ ++ __cpuid_count (7, 0, eax, ebx, ecx, edx); ++ if (!(ebx & bit_AVX512F)) ++ return 0; ++ ++ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); ++ ++ /* Verify that ZMM, YMM and XMM states are enabled. */ ++ return (eax & 0xe6) == 0xe6; ++#else ++ return 0; ++#endif ++} ++ ++static int ++do_test (void) ++{ ++ /* Run AVX512 test only if AVX512 is supported. */ ++ if (avx512_enabled ()) ++ return tst_avx512_aux (); ++ else ++ return 77; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../../test-skeleton.c" +Index: glibc-2.17-c758a686/sysdeps/x86_64/tst-avx512mod.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/x86_64/tst-avx512mod.c +@@ -0,0 +1,48 @@ ++/* Test case for x86-64 preserved AVX512 registers in dynamic linker. */ ++ ++#ifdef __AVX512F__ ++#include ++#include ++#include ++ ++__m512i ++avx512_test (__m512i x0, __m512i x1, __m512i x2, __m512i x3, ++ __m512i x4, __m512i x5, __m512i x6, __m512i x7) ++{ ++ __m512i zmm; ++ ++ zmm = _mm512_set1_epi32 (0); ++ if (memcmp (&zmm, &x0, sizeof (zmm))) ++ abort (); ++ ++ zmm = _mm512_set1_epi32 (1); ++ if (memcmp (&zmm, &x1, sizeof (zmm))) ++ abort (); ++ ++ zmm = _mm512_set1_epi32 (2); ++ if (memcmp (&zmm, &x2, sizeof (zmm))) ++ abort (); ++ ++ zmm = _mm512_set1_epi32 (3); ++ if (memcmp (&zmm, &x3, sizeof (zmm))) ++ abort (); ++ ++ zmm = _mm512_set1_epi32 (4); ++ if (memcmp (&zmm, &x4, sizeof (zmm))) ++ abort (); ++ ++ zmm = _mm512_set1_epi32 (5); ++ if (memcmp (&zmm, &x5, sizeof (zmm))) ++ abort (); ++ ++ zmm = _mm512_set1_epi32 (6); ++ if (memcmp (&zmm, &x6, sizeof (zmm))) ++ abort (); ++ ++ zmm = _mm512_set1_epi32 (7); ++ if (memcmp (&zmm, &x7, sizeof (zmm))) ++ abort (); ++ ++ return _mm512_set1_epi32 (0x12349876); ++} ++#endif +Index: glibc-2.17-c758a686/sysdeps/x86_64/tst-ssemod.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/x86_64/tst-ssemod.c +@@ -0,0 +1,46 @@ ++/* Test case for x86-64 preserved SSE registers in dynamic linker. */ ++ ++#include ++#include ++#include ++ ++__m128i ++sse_test (__m128i x0, __m128i x1, __m128i x2, __m128i x3, ++ __m128i x4, __m128i x5, __m128i x6, __m128i x7) ++{ ++ __m128i xmm; ++ ++ xmm = _mm_set1_epi32 (0); ++ if (memcmp (&xmm, &x0, sizeof (xmm))) ++ abort (); ++ ++ xmm = _mm_set1_epi32 (1); ++ if (memcmp (&xmm, &x1, sizeof (xmm))) ++ abort (); ++ ++ xmm = _mm_set1_epi32 (2); ++ if (memcmp (&xmm, &x2, sizeof (xmm))) ++ abort (); ++ ++ xmm = _mm_set1_epi32 (3); ++ if (memcmp (&xmm, &x3, sizeof (xmm))) ++ abort (); ++ ++ xmm = _mm_set1_epi32 (4); ++ if (memcmp (&xmm, &x4, sizeof (xmm))) ++ abort (); ++ ++ xmm = _mm_set1_epi32 (5); ++ if (memcmp (&xmm, &x5, sizeof (xmm))) ++ abort (); ++ ++ xmm = _mm_set1_epi32 (6); ++ if (memcmp (&xmm, &x6, sizeof (xmm))) ++ abort (); ++ ++ xmm = _mm_set1_epi32 (7); ++ if (memcmp (&xmm, &x7, sizeof (xmm))) ++ abort (); ++ ++ return _mm_set1_epi32 (0x12349876); ++} +Index: glibc-2.17-c758a686/sysdeps/x86_64/tst-sse.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/x86_64/tst-sse.c +@@ -0,0 +1,46 @@ ++/* Test case for preserved SSE registers in dynamic linker. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++extern __m128i sse_test (__m128i, __m128i, __m128i, __m128i, ++ __m128i, __m128i, __m128i, __m128i); ++ ++static int ++do_test (void) ++{ ++ __m128i xmm0 = _mm_set1_epi32 (0); ++ __m128i xmm1 = _mm_set1_epi32 (1); ++ __m128i xmm2 = _mm_set1_epi32 (2); ++ __m128i xmm3 = _mm_set1_epi32 (3); ++ __m128i xmm4 = _mm_set1_epi32 (4); ++ __m128i xmm5 = _mm_set1_epi32 (5); ++ __m128i xmm6 = _mm_set1_epi32 (6); ++ __m128i xmm7 = _mm_set1_epi32 (7); ++ __m128i ret = sse_test (xmm0, xmm1, xmm2, xmm3, ++ xmm4, xmm5, xmm6, xmm7); ++ xmm0 = _mm_set1_epi32 (0x12349876); ++ if (memcmp (&xmm0, &ret, sizeof (ret))) ++ abort (); ++ return 0; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../../test-skeleton.c" +Index: glibc-2.17-c758a686/sysdeps/x86_64/tst-avxmod.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/x86_64/tst-avxmod.c +@@ -0,0 +1,49 @@ ++ ++/* Test case for x86-64 preserved AVX registers in dynamic linker. */ ++ ++#ifdef __AVX__ ++#include ++#include ++#include ++ ++__m256i ++avx_test (__m256i x0, __m256i x1, __m256i x2, __m256i x3, ++ __m256i x4, __m256i x5, __m256i x6, __m256i x7) ++{ ++ __m256i ymm; ++ ++ ymm = _mm256_set1_epi32 (0); ++ if (memcmp (&ymm, &x0, sizeof (ymm))) ++ abort (); ++ ++ ymm = _mm256_set1_epi32 (1); ++ if (memcmp (&ymm, &x1, sizeof (ymm))) ++ abort (); ++ ++ ymm = _mm256_set1_epi32 (2); ++ if (memcmp (&ymm, &x2, sizeof (ymm))) ++ abort (); ++ ++ ymm = _mm256_set1_epi32 (3); ++ if (memcmp (&ymm, &x3, sizeof (ymm))) ++ abort (); ++ ++ ymm = _mm256_set1_epi32 (4); ++ if (memcmp (&ymm, &x4, sizeof (ymm))) ++ abort (); ++ ++ ymm = _mm256_set1_epi32 (5); ++ if (memcmp (&ymm, &x5, sizeof (ymm))) ++ abort (); ++ ++ ymm = _mm256_set1_epi32 (6); ++ if (memcmp (&ymm, &x6, sizeof (ymm))) ++ abort (); ++ ++ ymm = _mm256_set1_epi32 (7); ++ if (memcmp (&ymm, &x7, sizeof (ymm))) ++ abort (); ++ ++ return _mm256_set1_epi32 (0x12349876); ++} ++#endif diff --git a/SOURCES/glibc-rh1436312.patch b/SOURCES/glibc-rh1436312.patch deleted file mode 100644 index 7b46350..0000000 --- a/SOURCES/glibc-rh1436312.patch +++ /dev/null @@ -1,54 +0,0 @@ -Upstream commits: - -commit a071766ebfd853179ac39f9773f894029bf86d36 -Author: Andreas Schwab -Date: Thu Mar 20 15:05:25 2014 +0100 - - Fix use of half-initialized result in getaddrinfo when using nscd (bug 16743) - - This fixes a bug in the way the results from __nscd_getai are collected: - for every returned result a new entry is first added to the - gaih_addrtuple list, but if that result doesn't match the request this - entry remains uninitialized. So for this non-matching result an extra - result with uninitialized content is returned. - - To reproduce (with nscd running): - - $ getent ahostsv4 localhost - 127.0.0.1 STREAM localhost - 127.0.0.1 DGRAM - 127.0.0.1 RAW - (null) STREAM - (null) DGRAM - (null) RAW - -commit 8dc9751764eb1bedf06d19695524b31a16773413 -Author: Andreas Schwab -Date: Wed May 7 11:47:20 2014 +0200 - - Fix parsing of getai result from nscd for IPv6-only request - - -Index: b/sysdeps/posix/getaddrinfo.c -=================================================================== ---- a/sysdeps/posix/getaddrinfo.c -+++ b/sysdeps/posix/getaddrinfo.c -@@ -725,6 +725,18 @@ gaih_inet (const char *name, const struc - { - socklen_t size = (air->family[i] == AF_INET - ? INADDRSZ : IN6ADDRSZ); -+ -+ if (!((air->family[i] == AF_INET -+ && req->ai_family == AF_INET6 -+ && (req->ai_flags & AI_V4MAPPED) != 0) -+ || req->ai_family == AF_UNSPEC -+ || air->family[i] == req->ai_family)) -+ { -+ /* Skip over non-matching result. */ -+ addrs += size; -+ continue; -+ } -+ - if (*pat == NULL) - { - *pat = addrfree++; diff --git a/SOURCES/glibc-rh1439165.patch b/SOURCES/glibc-rh1439165.patch new file mode 100644 index 0000000..90fe64d --- /dev/null +++ b/SOURCES/glibc-rh1439165.patch @@ -0,0 +1,339 @@ +Posted upstream at: + + https://sourceware.org/ml/libc-alpha/2017-04/msg00082.html + +sysdeps/unix/sysv/linux/syscall-names.list is stored as a separate +file (syscall-names.list) in the source RPM for easier updates. + +Author: Florian Weimer + + : Use an arch-independent system call list on Linux + + This commit changes the way the list of SYS_* system call macros + is created on Linux. glibc now contains a list of all known system + calls, and the generated file defines the SYS_ + macro only if the correspnding __NR_ macro is defined by the kernel + headers. + + As a result, there glibc does not have to be rebuilt to pick up + system calls if the glibc sources already know about them. This + means that glibc can be built with older kernel headers, and if + the installed kernel headers are upgraded afterwards, additional + SYS_ macros become available as long as glibc has a record for + those system calls. + +Index: b/sysdeps/unix/sysv/linux/Makefile +=================================================================== +--- a/sysdeps/unix/sysv/linux/Makefile ++++ b/sysdeps/unix/sysv/linux/Makefile +@@ -39,75 +39,46 @@ sysdep_headers += sys/mount.h sys/acct.h + + tests += tst-clone + +-# Generate the list of SYS_* macros for the system calls (__NR_* macros). ++# Generate the list of SYS_* macros for the system calls (__NR_* ++# macros). The file syscall-names.list contains all possible system ++# call names, and the generated header file produces SYS_* macros for ++# the __NR_* macros which are actually defined. ++ ++generated += bits/syscall.h ++$(objpfx)bits/syscall.h: \ ++ ../sysdeps/unix/sysv/linux/gen-syscall-h.awk \ ++ ../sysdeps/unix/sysv/linux/syscall-names.list ++ $(make-target-directory) ++ $(AWK) -f $^ > $@-tmp ++ $(move-if-change) $@-tmp $@ + +-# If there is more than one syscall list for different architecture +-# variants, the CPU/Makefile defines abi-variants to be a list of names +-# for those variants (e.g. 32 64), and, for each variant, defines +-# abi-$(variant)-options to be compiler options to cause +-# to define the desired list of syscalls and abi-$(variant)-condition to +-# be the condition for those options to use in a C #if condition. +-# abi-includes may be defined to a list of headers to include +-# in the generated header, if the default does not suffice. +-# +-# The generated header is compiled with `-ffreestanding' to avoid any +-# circular dependencies against the installed implementation headers. +-# Such a dependency would require the implementation header to be +-# installed before the generated header could be built (See bug 15711). +-# In current practice the generated header dependencies do not include +-# any of the implementation headers removed by the use of `-ffreestanding'. ++# All macros defined by . Include ++# explicitly because skips it if _LIBC is defined. ++$(objpfx)tst-syscall-list-macros.list: \ ++ $(objpfx)bits/syscall.h ../sysdeps/unix/sysv/linux/sys/syscall.h ++ printf '#include \n#include \n' | \ ++ $(CC) -E -o $@-tmp $(CFLAGS) $(CPPFLAGS) -x c - -dM ++ $(move-if-change) $@-tmp $@ ++ ++# __NR_* system call names. Used by the test below. ++$(objpfx)tst-syscall-list-nr.list: \ ++ ../sysdeps/unix/sysv/linux/filter-nr-syscalls.awk \ ++ $(objpfx)tst-syscall-list-macros.list ++ $(AWK) -f $^ > $@-tmp ++ $(move-if-change) $@-tmp $@ ++ ++# SYS_* system call names. Used by the test below. ++$(objpfx)tst-syscall-list-sys.list: $(objpfx)tst-syscall-list-macros.list ++ $(AWK) '/^#define SYS_/ { print substr($$2, 5) }' $< > $@-tmp ++ $(move-if-change) $@-tmp $@ ++ ++tests: $(objpfx)tst-syscall-list.out ++$(objpfx)tst-syscall-list.out: \ ++ ../sysdeps/unix/sysv/linux/tst-syscall-list.sh \ ++ $(objpfx)tst-syscall-list-nr.list $(objpfx)tst-syscall-list-sys.list ++ $(BASH) $^ > $@ + +-$(objpfx)bits/syscall%h $(objpfx)bits/syscall%d: ../sysdeps/unix/sysv/linux/sys/syscall.h +- $(make-target-directory) +- { \ +- echo '/* Generated at libc build time from kernel syscall list. */';\ +- echo ''; \ +- echo '#ifndef _SYSCALL_H'; \ +- echo '# error "Never use directly; include instead."'; \ +- echo '#endif'; \ +- echo ''; \ +- $(foreach h,$(abi-includes), echo '#include <$(h)>';) \ +- echo ''; \ +- $(if $(abi-variants), \ +- $(foreach v,$(abi-variants),\ +- $(CC) -ffreestanding -E -MD -MP -MF $(@:.h=.d)-t$(v) -MT '$(@:.d=.h) $(@:.h=.d)' \ +- -x c $(sysincludes) $< $(abi-$(v)-options) \ +- -D_LIBC -dM | \ +- sed -n 's@^#define __NR_\([^ ]*\) .*$$@#define SYS_\1 __NR_\1@p' | \ +- LC_ALL=C sort > $(@:.d=.h).new$(v); \ +- $(if $(abi-$(v)-condition),\ +- echo '#if $(abi-$(v)-condition)';) \ +- cat $(@:.d=.h).new$(v); \ +- $(if $(abi-$(v)-condition),echo '#endif';) \ +- rm -f $(@:.d=.h).new$(v); \ +- ), \ +- $(CC) -ffreestanding -E -MD -MP -MF $(@:.h=.d)-t$(v) -MT '$(@:.d=.h) $(@:.h=.d)' \ +- -x c $(sysincludes) $< \ +- -D_LIBC -dM | \ +- sed -n 's@^#define __NR_\([^ ]*\) .*$$@#define SYS_\1 __NR_\1@p' | \ +- LC_ALL=C sort;) \ +- } > $(@:.d=.h).new +- mv -f $(@:.d=.h).new $(@:.d=.h) +-ifdef abi-variants +-ifneq (,$(objpfx)) +- sed $(sed-remove-objpfx) \ +- $(foreach v,$(abi-variants),$(@:.h=.d)-t$(v)) > $(@:.h=.d)-t3 +-else +- cat $(foreach v,$(abi-variants),$(@:.h=.d)-t$(v)) \ +- > $(@:.h=.d)-t3 +-endif +- rm -f $(foreach v,$(abi-variants),$(@:.h=.d)-t$(v)) +- mv -f $(@:.h=.d)-t3 $(@:.h=.d) +-else +- mv -f $(@:.h=.d)-t $(@:.h=.d) +-endif +- +-ifndef no_deps +-# Get the generated list of dependencies (probably /usr/include/asm/unistd.h). +--include $(objpfx)bits/syscall.d +-endif +-generated += bits/syscall.h bits/syscall.d +-endif ++endif # $(subdir) == misc + + ifeq ($(subdir),time) + sysdep_headers += sys/timex.h bits/timex.h +Index: b/sysdeps/unix/sysv/linux/filter-nr-syscalls.awk +=================================================================== +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/filter-nr-syscalls.awk +@@ -0,0 +1,35 @@ ++# Filter preprocessor __NR_* macros and extract system call names. ++# Copyright (C) 2017 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++# Skip reserved system calls. ++/^#define __NR_(unused|reserved)[0-9]+ / { ++ next; ++} ++ ++# Skip pseudo-system calls which describe ranges. ++/^#define __NR_(syscalls|arch_specific_syscall|(OABI_)?SYSCALL_BASE) / { ++ next; ++} ++/^#define __NR_(|64_|[NO]32_)Linux(_syscalls)? / { ++ next; ++} ++ ++# Print the remaining _NR_* macros as system call names. ++/^#define __NR_/ { ++ print substr($2, 6); ++} +Index: b/sysdeps/unix/sysv/linux/gen-syscall-h.awk +=================================================================== +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/gen-syscall-h.awk +@@ -0,0 +1,75 @@ ++# Generate SYS_* macros from a list in a text file. ++# Copyright (C) 2017 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++# Emit a conditional definition for SYS_NAME. ++function emit(name) { ++ print "#ifdef __NR_" name; ++ print "# define SYS_" name " __NR_" name; ++ print "#endif"; ++ print ""; ++} ++ ++# Bail out with an error. ++function fatal(message) { ++ print FILENAME ":" FNR ": " message > "/dev/stderr"; ++ exit 1; ++} ++ ++BEGIN { ++ name = ""; ++ kernel = ""; ++} ++ ++# Skip empty lines and comments. ++/^\s*(|#.*)$/ { ++ next; ++} ++ ++# Kernel version. Used for documentation purposes only. ++/^kernel [0-9.]+$/ { ++ if (kernel != "") { ++ fatal("duplicate kernel directive"); ++ } ++ kernel = $2; ++ print "/* Generated at libc build time from syscall list. */"; ++ print "/* The system call list corresponds to kernel " kernel ". */"; ++ print ""; ++ print "#ifndef _SYSCALL_H" ++ print "# error \"Never use directly; include instead.\""; ++ print "#endif"; ++ print ""; ++ next; ++} ++ ++# If there is just one word, it is a system call. ++/^[a-zA-Z_][a-zA-Z0-9_]+$/ { ++ if (kernel == "") { ++ fatal("expected kernel directive before this line"); ++ } ++ if ($1 <= name) { ++ fatal("name " name " violates ordering"); ++ } ++ emit($1); ++ name = $1; ++ next; ++} ++ ++# The rest has to be syntax errors. ++// { ++ fatal("unrecognized syntax"); ++} +Index: b/sysdeps/unix/sysv/linux/tst-syscall-list.sh +=================================================================== +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/tst-syscall-list.sh +@@ -0,0 +1,72 @@ ++#!/bin/bash ++# Consistency checks for the system call list ++# Copyright (C) 2017 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++export LC_ALL=C ++set -e ++set -o pipefail ++ ++if test $# != 2 ; then ++ echo "error: wrong number of arguments: $#" ++ exit 1 ++fi ++ ++list_nr="$1" ++list_sys="$2" ++ ++errors=0 ++ ++# Use getpid as a system call which is expected to be always defined. ++# alpha uses getxpid instead, so it is permitted as an alternative. ++if ! grep -E -q '^getx?pid$' -- "$list_nr" ; then ++ echo "error: __NR_getpid not defined" ++ errors=1 ++fi ++if ! grep -E -q '^getx?pid$' -- "$list_sys" ; then ++ echo "error: SYS_getpid not defined" ++ errors=1 ++fi ++ ++comm_1="$(mktemp)" ++comm_2="$(mktemp)" ++comm_result="$(mktemp)" ++cleanup () { ++ rm -f -- "$comm_1" "$comm_2" "$comm_result" ++} ++trap cleanup 0 ++ ++sort -o "$comm_1" -- "$list_nr" ++sort -o "$comm_2" -- "$list_sys" ++ ++# Check for missing SYS_* macros. ++comm --check-order -2 -3 -- "$comm_1" "$comm_2" > "$comm_result" ++if test -s "$comm_result"; then ++ echo "error: These system calls need to be added to syscall-names.list:" ++ cat -- "$comm_result" ++ errors=1 ++fi ++ ++# Check for additional SYS_* macros. ++comm --check-order -1 -3 -- "$comm_1" "$comm_2" > "$comm_result" ++if test -s "$comm_result"; then ++ echo "error: The following system calls have unexpected SYS_* macros:" ++ cat -- "$comm_result" ++ errors=1 ++fi ++ ++exit "$errors" diff --git a/SOURCES/glibc-rh1452720-1.patch b/SOURCES/glibc-rh1452720-1.patch deleted file mode 100644 index 95e615a..0000000 --- a/SOURCES/glibc-rh1452720-1.patch +++ /dev/null @@ -1,23 +0,0 @@ -commit TBD -Author: Florian Weimer -Date: Fri May 19 17:46:47 2017 +0200 - - rtld: Completely ignore LD_LIBRARY_PATH for AT_SECURE=1 programs - -LD_LIBRARY_PATH can only be used to reorder system search paths, which -is not useful functionality. - -Index: glibc-2.17-c758a686/elf/rtld.c -=================================================================== ---- glibc-2.17-c758a686.orig/elf/rtld.c -+++ glibc-2.17-c758a686/elf/rtld.c -@@ -2580,7 +2701,8 @@ process_envvars (enum mode *modep) - - case 12: - /* The library search path. */ -- if (memcmp (envline, "LIBRARY_PATH", 12) == 0) -+ if (!__libc_enable_secure -+ && memcmp (envline, "LIBRARY_PATH", 12) == 0) - { - library_path = &envline[13]; - break; diff --git a/SOURCES/glibc-rh1452720-2.patch b/SOURCES/glibc-rh1452720-2.patch deleted file mode 100644 index ca74956..0000000 --- a/SOURCES/glibc-rh1452720-2.patch +++ /dev/null @@ -1,103 +0,0 @@ -commit TBD -Author: Florian Weimer -Date: Fri May 19 17:46:47 2017 +0200 - - rtld: Reject overly long LD_PRELOAD path elements - -Index: b/elf/rtld.c -=================================================================== ---- a/elf/rtld.c -+++ b/elf/rtld.c -@@ -99,6 +99,22 @@ uintptr_t __pointer_chk_guard_local - strong_alias (__pointer_chk_guard_local, __pointer_chk_guard) - #endif - -+/* Check that AT_SECURE=0, or that the passed name does not contain -+ directories and is not overly long. Reject empty names -+ unconditionally. */ -+static bool -+dso_name_valid_for_suid (const char *p) -+{ -+ if (__builtin_expect (INTUSE(__libc_enable_secure), 0)) -+ { -+ /* Ignore pathnames with directories for AT_SECURE=1 -+ programs, and also skip overlong names. */ -+ size_t len = strlen (p); -+ if (len >= NAME_MAX || memchr (p, '/', len) != NULL) -+ return false; -+ } -+ return *p != '\0'; -+} - - /* List of auditing DSOs. */ - static struct audit_list -@@ -880,6 +896,44 @@ static const char *preloadlist attribute - /* Nonzero if information about versions has to be printed. */ - static int version_info attribute_relro; - -+/* The LD_PRELOAD environment variable gives list of libraries -+ separated by white space or colons that are loaded before the -+ executable's dependencies and prepended to the global scope list. -+ (If the binary is running setuid all elements containing a '/' are -+ ignored since it is insecure.) Return the number of preloads -+ performed. */ -+unsigned int -+handle_ld_preload (const char *preloadlist, struct link_map *main_map) -+{ -+ unsigned int npreloads = 0; -+ const char *p = preloadlist; -+ char fname[PATH_MAX]; -+ -+ while (*p != '\0') -+ { -+ /* Split preload list at space/colon. */ -+ size_t len = strcspn (p, " :"); -+ if (len > 0 && len < PATH_MAX) -+ { -+ memcpy (fname, p, len); -+ fname[len] = '\0'; -+ } -+ else -+ fname[0] = '\0'; -+ -+ /* Skip over the substring and the following delimiter. */ -+ p += len; -+ if (*p == ' ' || *p == ':') -+ ++p; -+ -+ if (dso_name_valid_for_suid (fname)) -+ npreloads += do_preload (fname, main_map, "LD_PRELOAD"); -+ } -+ return npreloads; -+} -+ -+ -+ - static void - dl_main (const ElfW(Phdr) *phdr, - ElfW(Word) phnum, -@@ -1611,23 +1665,8 @@ ERROR: ld.so: object '%s' cannot be load - - if (__builtin_expect (preloadlist != NULL, 0)) - { -- /* The LD_PRELOAD environment variable gives list of libraries -- separated by white space or colons that are loaded before the -- executable's dependencies and prepended to the global scope -- list. If the binary is running setuid all elements -- containing a '/' are ignored since it is insecure. */ -- char *list = strdupa (preloadlist); -- char *p; -- - HP_TIMING_NOW (start); -- -- /* Prevent optimizing strsep. Speed is not important here. */ -- while ((p = (strsep) (&list, " :")) != NULL) -- if (p[0] != '\0' -- && (__builtin_expect (! INTUSE(__libc_enable_secure), 1) -- || strchr (p, '/') == NULL)) -- npreloads += do_preload (p, main_map, "LD_PRELOAD"); -- -+ npreloads += handle_ld_preload (preloadlist, main_map); - HP_TIMING_NOW (stop); - HP_TIMING_DIFF (diff, start, stop); - HP_TIMING_ACCUM_NT (load_time, diff); diff --git a/SOURCES/glibc-rh1452720-3.patch b/SOURCES/glibc-rh1452720-3.patch deleted file mode 100644 index 135ee52..0000000 --- a/SOURCES/glibc-rh1452720-3.patch +++ /dev/null @@ -1,196 +0,0 @@ -commit TBD -Author: Florian Weimer -Date: Fri May 19 17:46:47 2017 +0200 - - rtld: Reject overly long LD_AUDIT path elements - -Also only process the last LD_AUDIT entry. - -Index: b/elf/rtld.c -=================================================================== ---- a/elf/rtld.c -+++ b/elf/rtld.c -@@ -116,13 +116,91 @@ dso_name_valid_for_suid (const char *p) - return *p != '\0'; - } - --/* List of auditing DSOs. */ -+/* LD_AUDIT variable contents. Must be processed before the -+ audit_list below. */ -+const char *audit_list_string; -+ -+/* Cyclic list of auditing DSOs. audit_list->next is the first -+ element. */ - static struct audit_list - { - const char *name; - struct audit_list *next; - } *audit_list; - -+/* Iterator for audit_list_string followed by audit_list. */ -+struct audit_list_iter -+{ -+ /* Tail of audit_list_string still needing processing, or NULL. */ -+ const char *audit_list_tail; -+ -+ /* The list element returned in the previous iteration. NULL before -+ the first element. */ -+ struct audit_list *previous; -+ -+ /* Scratch buffer for returning a name which is part of -+ audit_list_string. */ -+ char fname[PATH_MAX]; -+}; -+ -+/* Initialize an audit list iterator. */ -+static void -+audit_list_iter_init (struct audit_list_iter *iter) -+{ -+ iter->audit_list_tail = audit_list_string; -+ iter->previous = NULL; -+} -+ -+/* Iterate through both audit_list_string and audit_list. */ -+static const char * -+audit_list_iter_next (struct audit_list_iter *iter) -+{ -+ if (iter->audit_list_tail != NULL) -+ { -+ /* First iterate over audit_list_string. */ -+ while (*iter->audit_list_tail != '\0') -+ { -+ /* Split audit list at colon. */ -+ size_t len = strcspn (iter->audit_list_tail, ":"); -+ if (len > 0 && len < PATH_MAX) -+ { -+ memcpy (iter->fname, iter->audit_list_tail, len); -+ iter->fname[len] = '\0'; -+ } -+ else -+ /* Do not return this name to the caller. */ -+ iter->fname[0] = '\0'; -+ -+ /* Skip over the substring and the following delimiter. */ -+ iter->audit_list_tail += len; -+ if (*iter->audit_list_tail == ':') -+ ++iter->audit_list_tail; -+ -+ /* If the name is valid, return it. */ -+ if (dso_name_valid_for_suid (iter->fname)) -+ return iter->fname; -+ /* Otherwise, wrap around and try the next name. */ -+ } -+ /* Fall through to the procesing of audit_list. */ -+ } -+ -+ if (iter->previous == NULL) -+ { -+ if (audit_list == NULL) -+ /* No pre-parsed audit list. */ -+ return NULL; -+ /* Start of audit list. The first list element is at -+ audit_list->next (cyclic list). */ -+ iter->previous = audit_list->next; -+ return iter->previous->name; -+ } -+ if (iter->previous == audit_list) -+ /* Cyclic list wrap-around. */ -+ return NULL; -+ iter->previous = iter->previous->next; -+ return iter->previous->name; -+} -+ - /* Set nonzero during loading and initialization of executable and - libraries, cleared before the executable's entry point runs. This - must not be initialized to nonzero, because the unused dynamic -@@ -1441,11 +1519,13 @@ of this helper program; chances are you - GL(dl_rtld_map).l_tls_modid = _dl_next_tls_modid (); - - /* If we have auditing DSOs to load, do it now. */ -- if (__builtin_expect (audit_list != NULL, 0)) -+ bool need_security_init = true; -+ if (__builtin_expect (audit_list != NULL, 0) -+ || __builtin_expect (audit_list_string != NULL, 0)) - { -- /* Iterate over all entries in the list. The order is important. */ - struct audit_ifaces *last_audit = NULL; -- struct audit_list *al = audit_list->next; -+ struct audit_list_iter al_iter; -+ audit_list_iter_init (&al_iter); - - /* Since we start using the auditing DSOs right away we need to - initialize the data structures now. */ -@@ -1456,9 +1536,14 @@ of this helper program; chances are you - use different values (especially the pointer guard) and will - fail later on. */ - security_init (); -+ need_security_init = false; - -- do -+ while (true) - { -+ const char *name = audit_list_iter_next (&al_iter); -+ if (name == NULL) -+ break; -+ - int tls_idx = GL(dl_tls_max_dtv_idx); - - /* Now it is time to determine the layout of the static TLS -@@ -1467,7 +1552,7 @@ of this helper program; chances are you - no DF_STATIC_TLS bit is set. The reason is that we know - glibc will use the static model. */ - struct dlmopen_args dlmargs; -- dlmargs.fname = al->name; -+ dlmargs.fname = name; - dlmargs.map = NULL; - - const char *objname; -@@ -1480,7 +1565,7 @@ of this helper program; chances are you - not_loaded: - _dl_error_printf ("\ - ERROR: ld.so: object '%s' cannot be loaded as audit interface: %s; ignored.\n", -- al->name, err_str); -+ name, err_str); - if (malloced) - free ((char *) err_str); - } -@@ -1584,10 +1669,7 @@ ERROR: ld.so: object '%s' cannot be load - goto not_loaded; - } - } -- -- al = al->next; - } -- while (al != audit_list->next); - - /* If we have any auditing modules, announce that we already - have two objects loaded. */ -@@ -1851,7 +1933,7 @@ ERROR: ld.so: object '%s' cannot be load - if (tcbp == NULL) - tcbp = init_tls (); - -- if (__builtin_expect (audit_list == NULL, 1)) -+ if (need_security_init) - /* Initialize security features. But only if we have not done it - earlier. */ - security_init (); -@@ -2495,9 +2577,7 @@ process_dl_audit (char *str) - char *p; - - while ((p = (strsep) (&str, ":")) != NULL) -- if (p[0] != '\0' -- && (__builtin_expect (! INTUSE(__libc_enable_secure), 1) -- || strchr (p, '/') == NULL)) -+ if (dso_name_valid_for_suid (p)) - { - /* This is using the local malloc, not the system malloc. The - memory can never be freed. */ -@@ -2561,7 +2641,7 @@ process_envvars (enum mode *modep) - break; - } - if (memcmp (envline, "AUDIT", 5) == 0) -- process_dl_audit (&envline[6]); -+ audit_list_string = &envline[6]; - break; - - case 7: diff --git a/SOURCES/glibc-rh1452720-4.patch b/SOURCES/glibc-rh1452720-4.patch deleted file mode 100644 index 5b4d5ea..0000000 --- a/SOURCES/glibc-rh1452720-4.patch +++ /dev/null @@ -1,51 +0,0 @@ -Partial backport (without the test) of: - -commit 1c1243b6fc33c029488add276e56570a07803bfd -Author: Siddhesh Poyarekar -Date: Tue Mar 7 20:52:04 2017 +0530 - - Ignore and remove LD_HWCAP_MASK for AT_SECURE programs (bug #21209) - - The LD_HWCAP_MASK environment variable may alter the selection of - function variants for some architectures. For AT_SECURE process it - means that if an outdated routine has a bug that would otherwise not - affect newer platforms by default, LD_HWCAP_MASK will allow that bug - to be exploited. - - To be on the safe side, ignore and disable LD_HWCAP_MASK for setuid - binaries. - - [BZ #21209] - * elf/rtld.c (process_envvars): Ignore LD_HWCAP_MASK for - AT_SECURE processes. - * sysdeps/generic/unsecvars.h: Add LD_HWCAP_MASK. - * elf/tst-env-setuid.c (test_parent): Test LD_HWCAP_MASK. - (test_child): Likewise. - * elf/Makefile (tst-env-setuid-ENV): Add LD_HWCAP_MASK. - -Index: b/elf/rtld.c -=================================================================== ---- a/elf/rtld.c -+++ b/elf/rtld.c -@@ -2688,7 +2688,8 @@ process_envvars (enum mode *modep) - - case 10: - /* Mask for the important hardware capabilities. */ -- if (memcmp (envline, "HWCAP_MASK", 10) == 0) -+ if (!__libc_enable_secure -+ && memcmp (envline, "HWCAP_MASK", 10) == 0) - GLRO(dl_hwcap_mask) = __strtoul_internal (&envline[11], NULL, - 0, 0); - break; -Index: b/sysdeps/generic/unsecvars.h -=================================================================== ---- a/sysdeps/generic/unsecvars.h -+++ b/sysdeps/generic/unsecvars.h -@@ -9,6 +9,7 @@ - "LD_DEBUG\0" \ - "LD_DEBUG_OUTPUT\0" \ - "LD_DYNAMIC_WEAK\0" \ -+ "LD_HWCAP_MASK\0" \ - "LD_LIBRARY_PATH\0" \ - "LD_ORIGIN_PATH\0" \ - "LD_PRELOAD\0" \ diff --git a/SOURCES/glibc-rh1452721-1.patch b/SOURCES/glibc-rh1452721-1.patch new file mode 100644 index 0000000..95e615a --- /dev/null +++ b/SOURCES/glibc-rh1452721-1.patch @@ -0,0 +1,23 @@ +commit TBD +Author: Florian Weimer +Date: Fri May 19 17:46:47 2017 +0200 + + rtld: Completely ignore LD_LIBRARY_PATH for AT_SECURE=1 programs + +LD_LIBRARY_PATH can only be used to reorder system search paths, which +is not useful functionality. + +Index: glibc-2.17-c758a686/elf/rtld.c +=================================================================== +--- glibc-2.17-c758a686.orig/elf/rtld.c ++++ glibc-2.17-c758a686/elf/rtld.c +@@ -2580,7 +2701,8 @@ process_envvars (enum mode *modep) + + case 12: + /* The library search path. */ +- if (memcmp (envline, "LIBRARY_PATH", 12) == 0) ++ if (!__libc_enable_secure ++ && memcmp (envline, "LIBRARY_PATH", 12) == 0) + { + library_path = &envline[13]; + break; diff --git a/SOURCES/glibc-rh1452721-2.patch b/SOURCES/glibc-rh1452721-2.patch new file mode 100644 index 0000000..ca74956 --- /dev/null +++ b/SOURCES/glibc-rh1452721-2.patch @@ -0,0 +1,103 @@ +commit TBD +Author: Florian Weimer +Date: Fri May 19 17:46:47 2017 +0200 + + rtld: Reject overly long LD_PRELOAD path elements + +Index: b/elf/rtld.c +=================================================================== +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -99,6 +99,22 @@ uintptr_t __pointer_chk_guard_local + strong_alias (__pointer_chk_guard_local, __pointer_chk_guard) + #endif + ++/* Check that AT_SECURE=0, or that the passed name does not contain ++ directories and is not overly long. Reject empty names ++ unconditionally. */ ++static bool ++dso_name_valid_for_suid (const char *p) ++{ ++ if (__builtin_expect (INTUSE(__libc_enable_secure), 0)) ++ { ++ /* Ignore pathnames with directories for AT_SECURE=1 ++ programs, and also skip overlong names. */ ++ size_t len = strlen (p); ++ if (len >= NAME_MAX || memchr (p, '/', len) != NULL) ++ return false; ++ } ++ return *p != '\0'; ++} + + /* List of auditing DSOs. */ + static struct audit_list +@@ -880,6 +896,44 @@ static const char *preloadlist attribute + /* Nonzero if information about versions has to be printed. */ + static int version_info attribute_relro; + ++/* The LD_PRELOAD environment variable gives list of libraries ++ separated by white space or colons that are loaded before the ++ executable's dependencies and prepended to the global scope list. ++ (If the binary is running setuid all elements containing a '/' are ++ ignored since it is insecure.) Return the number of preloads ++ performed. */ ++unsigned int ++handle_ld_preload (const char *preloadlist, struct link_map *main_map) ++{ ++ unsigned int npreloads = 0; ++ const char *p = preloadlist; ++ char fname[PATH_MAX]; ++ ++ while (*p != '\0') ++ { ++ /* Split preload list at space/colon. */ ++ size_t len = strcspn (p, " :"); ++ if (len > 0 && len < PATH_MAX) ++ { ++ memcpy (fname, p, len); ++ fname[len] = '\0'; ++ } ++ else ++ fname[0] = '\0'; ++ ++ /* Skip over the substring and the following delimiter. */ ++ p += len; ++ if (*p == ' ' || *p == ':') ++ ++p; ++ ++ if (dso_name_valid_for_suid (fname)) ++ npreloads += do_preload (fname, main_map, "LD_PRELOAD"); ++ } ++ return npreloads; ++} ++ ++ ++ + static void + dl_main (const ElfW(Phdr) *phdr, + ElfW(Word) phnum, +@@ -1611,23 +1665,8 @@ ERROR: ld.so: object '%s' cannot be load + + if (__builtin_expect (preloadlist != NULL, 0)) + { +- /* The LD_PRELOAD environment variable gives list of libraries +- separated by white space or colons that are loaded before the +- executable's dependencies and prepended to the global scope +- list. If the binary is running setuid all elements +- containing a '/' are ignored since it is insecure. */ +- char *list = strdupa (preloadlist); +- char *p; +- + HP_TIMING_NOW (start); +- +- /* Prevent optimizing strsep. Speed is not important here. */ +- while ((p = (strsep) (&list, " :")) != NULL) +- if (p[0] != '\0' +- && (__builtin_expect (! INTUSE(__libc_enable_secure), 1) +- || strchr (p, '/') == NULL)) +- npreloads += do_preload (p, main_map, "LD_PRELOAD"); +- ++ npreloads += handle_ld_preload (preloadlist, main_map); + HP_TIMING_NOW (stop); + HP_TIMING_DIFF (diff, start, stop); + HP_TIMING_ACCUM_NT (load_time, diff); diff --git a/SOURCES/glibc-rh1452721-3.patch b/SOURCES/glibc-rh1452721-3.patch new file mode 100644 index 0000000..135ee52 --- /dev/null +++ b/SOURCES/glibc-rh1452721-3.patch @@ -0,0 +1,196 @@ +commit TBD +Author: Florian Weimer +Date: Fri May 19 17:46:47 2017 +0200 + + rtld: Reject overly long LD_AUDIT path elements + +Also only process the last LD_AUDIT entry. + +Index: b/elf/rtld.c +=================================================================== +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -116,13 +116,91 @@ dso_name_valid_for_suid (const char *p) + return *p != '\0'; + } + +-/* List of auditing DSOs. */ ++/* LD_AUDIT variable contents. Must be processed before the ++ audit_list below. */ ++const char *audit_list_string; ++ ++/* Cyclic list of auditing DSOs. audit_list->next is the first ++ element. */ + static struct audit_list + { + const char *name; + struct audit_list *next; + } *audit_list; + ++/* Iterator for audit_list_string followed by audit_list. */ ++struct audit_list_iter ++{ ++ /* Tail of audit_list_string still needing processing, or NULL. */ ++ const char *audit_list_tail; ++ ++ /* The list element returned in the previous iteration. NULL before ++ the first element. */ ++ struct audit_list *previous; ++ ++ /* Scratch buffer for returning a name which is part of ++ audit_list_string. */ ++ char fname[PATH_MAX]; ++}; ++ ++/* Initialize an audit list iterator. */ ++static void ++audit_list_iter_init (struct audit_list_iter *iter) ++{ ++ iter->audit_list_tail = audit_list_string; ++ iter->previous = NULL; ++} ++ ++/* Iterate through both audit_list_string and audit_list. */ ++static const char * ++audit_list_iter_next (struct audit_list_iter *iter) ++{ ++ if (iter->audit_list_tail != NULL) ++ { ++ /* First iterate over audit_list_string. */ ++ while (*iter->audit_list_tail != '\0') ++ { ++ /* Split audit list at colon. */ ++ size_t len = strcspn (iter->audit_list_tail, ":"); ++ if (len > 0 && len < PATH_MAX) ++ { ++ memcpy (iter->fname, iter->audit_list_tail, len); ++ iter->fname[len] = '\0'; ++ } ++ else ++ /* Do not return this name to the caller. */ ++ iter->fname[0] = '\0'; ++ ++ /* Skip over the substring and the following delimiter. */ ++ iter->audit_list_tail += len; ++ if (*iter->audit_list_tail == ':') ++ ++iter->audit_list_tail; ++ ++ /* If the name is valid, return it. */ ++ if (dso_name_valid_for_suid (iter->fname)) ++ return iter->fname; ++ /* Otherwise, wrap around and try the next name. */ ++ } ++ /* Fall through to the procesing of audit_list. */ ++ } ++ ++ if (iter->previous == NULL) ++ { ++ if (audit_list == NULL) ++ /* No pre-parsed audit list. */ ++ return NULL; ++ /* Start of audit list. The first list element is at ++ audit_list->next (cyclic list). */ ++ iter->previous = audit_list->next; ++ return iter->previous->name; ++ } ++ if (iter->previous == audit_list) ++ /* Cyclic list wrap-around. */ ++ return NULL; ++ iter->previous = iter->previous->next; ++ return iter->previous->name; ++} ++ + /* Set nonzero during loading and initialization of executable and + libraries, cleared before the executable's entry point runs. This + must not be initialized to nonzero, because the unused dynamic +@@ -1441,11 +1519,13 @@ of this helper program; chances are you + GL(dl_rtld_map).l_tls_modid = _dl_next_tls_modid (); + + /* If we have auditing DSOs to load, do it now. */ +- if (__builtin_expect (audit_list != NULL, 0)) ++ bool need_security_init = true; ++ if (__builtin_expect (audit_list != NULL, 0) ++ || __builtin_expect (audit_list_string != NULL, 0)) + { +- /* Iterate over all entries in the list. The order is important. */ + struct audit_ifaces *last_audit = NULL; +- struct audit_list *al = audit_list->next; ++ struct audit_list_iter al_iter; ++ audit_list_iter_init (&al_iter); + + /* Since we start using the auditing DSOs right away we need to + initialize the data structures now. */ +@@ -1456,9 +1536,14 @@ of this helper program; chances are you + use different values (especially the pointer guard) and will + fail later on. */ + security_init (); ++ need_security_init = false; + +- do ++ while (true) + { ++ const char *name = audit_list_iter_next (&al_iter); ++ if (name == NULL) ++ break; ++ + int tls_idx = GL(dl_tls_max_dtv_idx); + + /* Now it is time to determine the layout of the static TLS +@@ -1467,7 +1552,7 @@ of this helper program; chances are you + no DF_STATIC_TLS bit is set. The reason is that we know + glibc will use the static model. */ + struct dlmopen_args dlmargs; +- dlmargs.fname = al->name; ++ dlmargs.fname = name; + dlmargs.map = NULL; + + const char *objname; +@@ -1480,7 +1565,7 @@ of this helper program; chances are you + not_loaded: + _dl_error_printf ("\ + ERROR: ld.so: object '%s' cannot be loaded as audit interface: %s; ignored.\n", +- al->name, err_str); ++ name, err_str); + if (malloced) + free ((char *) err_str); + } +@@ -1584,10 +1669,7 @@ ERROR: ld.so: object '%s' cannot be load + goto not_loaded; + } + } +- +- al = al->next; + } +- while (al != audit_list->next); + + /* If we have any auditing modules, announce that we already + have two objects loaded. */ +@@ -1851,7 +1933,7 @@ ERROR: ld.so: object '%s' cannot be load + if (tcbp == NULL) + tcbp = init_tls (); + +- if (__builtin_expect (audit_list == NULL, 1)) ++ if (need_security_init) + /* Initialize security features. But only if we have not done it + earlier. */ + security_init (); +@@ -2495,9 +2577,7 @@ process_dl_audit (char *str) + char *p; + + while ((p = (strsep) (&str, ":")) != NULL) +- if (p[0] != '\0' +- && (__builtin_expect (! INTUSE(__libc_enable_secure), 1) +- || strchr (p, '/') == NULL)) ++ if (dso_name_valid_for_suid (p)) + { + /* This is using the local malloc, not the system malloc. The + memory can never be freed. */ +@@ -2561,7 +2641,7 @@ process_envvars (enum mode *modep) + break; + } + if (memcmp (envline, "AUDIT", 5) == 0) +- process_dl_audit (&envline[6]); ++ audit_list_string = &envline[6]; + break; + + case 7: diff --git a/SOURCES/glibc-rh1452721-4.patch b/SOURCES/glibc-rh1452721-4.patch new file mode 100644 index 0000000..5b4d5ea --- /dev/null +++ b/SOURCES/glibc-rh1452721-4.patch @@ -0,0 +1,51 @@ +Partial backport (without the test) of: + +commit 1c1243b6fc33c029488add276e56570a07803bfd +Author: Siddhesh Poyarekar +Date: Tue Mar 7 20:52:04 2017 +0530 + + Ignore and remove LD_HWCAP_MASK for AT_SECURE programs (bug #21209) + + The LD_HWCAP_MASK environment variable may alter the selection of + function variants for some architectures. For AT_SECURE process it + means that if an outdated routine has a bug that would otherwise not + affect newer platforms by default, LD_HWCAP_MASK will allow that bug + to be exploited. + + To be on the safe side, ignore and disable LD_HWCAP_MASK for setuid + binaries. + + [BZ #21209] + * elf/rtld.c (process_envvars): Ignore LD_HWCAP_MASK for + AT_SECURE processes. + * sysdeps/generic/unsecvars.h: Add LD_HWCAP_MASK. + * elf/tst-env-setuid.c (test_parent): Test LD_HWCAP_MASK. + (test_child): Likewise. + * elf/Makefile (tst-env-setuid-ENV): Add LD_HWCAP_MASK. + +Index: b/elf/rtld.c +=================================================================== +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -2688,7 +2688,8 @@ process_envvars (enum mode *modep) + + case 10: + /* Mask for the important hardware capabilities. */ +- if (memcmp (envline, "HWCAP_MASK", 10) == 0) ++ if (!__libc_enable_secure ++ && memcmp (envline, "HWCAP_MASK", 10) == 0) + GLRO(dl_hwcap_mask) = __strtoul_internal (&envline[11], NULL, + 0, 0); + break; +Index: b/sysdeps/generic/unsecvars.h +=================================================================== +--- a/sysdeps/generic/unsecvars.h ++++ b/sysdeps/generic/unsecvars.h +@@ -9,6 +9,7 @@ + "LD_DEBUG\0" \ + "LD_DEBUG_OUTPUT\0" \ + "LD_DYNAMIC_WEAK\0" \ ++ "LD_HWCAP_MASK\0" \ + "LD_LIBRARY_PATH\0" \ + "LD_ORIGIN_PATH\0" \ + "LD_PRELOAD\0" \ diff --git a/SOURCES/glibc-rh1457177-1.patch b/SOURCES/glibc-rh1457177-1.patch new file mode 100644 index 0000000..eba9b41 --- /dev/null +++ b/SOURCES/glibc-rh1457177-1.patch @@ -0,0 +1,158 @@ +commit 38f3458175ecf7c3588bd5b6e465f4d9205fbe1c +Author: Adhemerval Zanella +Date: Wed Jan 8 05:10:41 2014 -0600 + + PowerPC: remove wrong truncl implementation for PowerPC64 + + The truncl assembly implementation (sysdeps/powerpc/powerpc64/fpu/s_truncl.S) + returns wrong results for some inputs where first double is a exact integer + and the precision is determined by second long double. + + Checking on implementation comments and history, I am very confident the + assembly implementation was based on a version before commit + 5c68d401698a58cf7da150d9cce769fa6679ba5f that fixes BZ#2423 (Errors in + long double (ldbl-128ibm) rounding functions in glibc-2.4). + + By just removing the implementation and make the build select + sysdeps/ieee754/ldbl-128ibm/s_truncl.c instead it fixes tgammal + issues regarding wrong result sign. + +Index: b/sysdeps/ieee754/ldbl-128ibm/s_truncl.c +=================================================================== +--- a/sysdeps/ieee754/ldbl-128ibm/s_truncl.c ++++ b/sysdeps/ieee754/ldbl-128ibm/s_truncl.c +@@ -17,9 +17,6 @@ + License along with the GNU C Library; if not, see + . */ + +-/* This has been coded in assembler because GCC makes such a mess of it +- when it's coded in C. */ +- + #include + #include + #include +Index: b/sysdeps/powerpc/powerpc64/fpu/s_truncl.S +=================================================================== +--- a/sysdeps/powerpc/powerpc64/fpu/s_truncl.S ++++ /dev/null +@@ -1,120 +0,0 @@ +-/* long double trunc function. +- IBM extended format long double version. +- Copyright (C) 2004, 2006 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +- +- .section ".toc","aw" +-.LC0: /* 2**52 */ +- .tc FD_43300000_0[TC],0x4330000000000000 +-.LC1: /* 0.5 */ +- .tc FD_3fe00000_0[TC],0x3fe0000000000000 +- .section ".text" +- +-/* long double [fp1,fp2] truncl (long double x [fp1,fp2]) */ +- +-ENTRY (__truncl) +- mffs fp11 /* Save current FPU rounding mode. */ +- lfd fp13,.LC0@toc(2) +- fabs fp0,fp1 +- fabs fp9,fp2 +- fsub fp12,fp13,fp13 /* generate 0.0 */ +- fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ +- fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ +- bnl- cr7,.L2 +- mtfsfi 7,1 /* Set rounding mode toward 0. */ +- ble- cr6,.L1 +- fneg fp2,fp12 +- fadd fp1,fp1,fp13 /* x+= TWO52; */ +- fsub fp1,fp1,fp13 /* x-= TWO52; */ +- fabs fp1,fp1 /* if (x == 0.0) x = 0.0; */ +-.L0: +- mtfsf 0x01,fp11 /* restore previous rounding mode. */ +- blr +-.L1: +- fneg fp2,fp12 +- bge- cr6,.L0 /* if (x < 0.0) */ +- fsub fp1,fp1,fp13 /* x-= TWO52; */ +- fadd fp1,fp1,fp13 /* x+= TWO52; */ +- fnabs fp1,fp1 /* if (x == 0.0) x = -0.0; */ +- mtfsf 0x01,fp11 /* restore previous rounding mode. */ +- blr +- +-/* The high double is > TWO52 so we need to round the low double and +- perhaps the high double. In this case we have to round the low +- double and handle any adjustment to the high double that may be +- caused by rounding (up). This is complicated by the fact that the +- high double may already be rounded and the low double may have the +- opposite sign to compensate.This gets a bit tricky so we use the +- following algorithm: +- +- tau = floor(x_high/TWO52); +- x0 = x_high - tau; +- x1 = x_low + tau; +- r1 = rint(x1); +- y_high = x0 + r1; +- y_low = x0 - y_high + r1; +- return y; */ +-.L2: +- fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ +- fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ +- fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ +- bgelr- cr7 /* return x; */ +- beqlr- cr0 +- mtfsfi 7,1 /* Set rounding mode toward 0. */ +- fdiv fp8,fp1,fp13 /* x_high/TWO52 */ +- +- bng- cr6,.L6 /* if (x > 0.0) */ +- fctidz fp0,fp8 +- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ +- fadd fp8,fp8,fp8 /* tau++; Make tau even */ +- bng cr5,.L4 /* if (x_low > 0.0) */ +- fmr fp3,fp1 +- fmr fp4,fp2 +- b .L5 +-.L4: /* if (x_low < 0.0) */ +- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ +- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +-.L5: +- fadd fp5,fp4,fp13 /* r1 = r1 + TWO52; */ +- fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ +- b .L9 +-.L6: /* if (x < 0.0) */ +- fctidz fp0,fp8 +- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ +- fadd fp8,fp8,fp8 /* tau++; Make tau even */ +- bnl cr5,.L7 /* if (x_low < 0.0) */ +- fmr fp3,fp1 +- fmr fp4,fp2 +- b .L8 +-.L7: /* if (x_low > 0.0) */ +- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ +- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +-.L8: +- fsub fp5,fp4,fp13 /* r1-= TWO52; */ +- fadd fp5,fp5,fp13 /* r1+= TWO52; */ +-.L9: +- mtfsf 0x01,fp11 /* restore previous rounding mode. */ +- fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ +- fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ +- fadd fp2,fp2,fp5 +- blr +-END (__truncl) +- +-long_double_symbol (libm, __truncl, truncl) diff --git a/SOURCES/glibc-rh1457177-2.patch b/SOURCES/glibc-rh1457177-2.patch new file mode 100644 index 0000000..bd41b69 --- /dev/null +++ b/SOURCES/glibc-rh1457177-2.patch @@ -0,0 +1,179 @@ +Backport of this upstream commit (with libm-test.inc adjustments): + +commit 374f7f61214967bb4e2257695aeeeecc2a77f369 +Author: Adhemerval Zanella +Date: Fri Mar 14 07:35:43 2014 -0500 + + PowerPC: remove wrong ceill implementation for PowerPC64 + + The ceill assembly implementation (sysdeps/powerpc/powerpc64/fpu/s_ceill.S) + returns wrong results for some inputs where first double is a exact + integer and the precision is determined by second long double. + + Checking on implementation comments and history, I am very confident the + assembly implementation was based on a version before commit + 5c68d401698a58cf7da150d9cce769fa6679ba5f that fixes BZ#2423 (Errors in + long double (ldbl-128ibm) rounding functions in glibc-2.4). + + By just removing the implementation and make the build select + sysdeps/ieee754/ldbl-128ibm/s_ceill.c instead fixes the failing math. + + Fixes BZ#16701. + +Index: b/math/libm-test.inc +=================================================================== +--- a/math/libm-test.inc ++++ b/math/libm-test.inc +@@ -2241,6 +2241,15 @@ ceil_test (void) + TEST_f_f (ceil, -72057594037927936.75L, -72057594037927936.0L); + TEST_f_f (ceil, -72057594037927937.5L, -72057594037927937.0L); + ++ /* Check cases where first double is a exact integer higher than 2^52 and ++ the precision is determined by second long double for IBM long double. */ ++ TEST_f_f (ceil, 34503599627370498.515625L, 34503599627370499.0L); ++ TEST_f_f (ceil, -34503599627370498.515625L, -34503599627370498.0L); ++# if LDBL_MANT_DIG >= 106 ++ TEST_f_f (ceil, 1192568192774434123539907640624.484375L, 1192568192774434123539907640625.0L); ++ TEST_f_f (ceil, -1192568192774434123539907640624.484375L, -1192568192774434123539907640624.0L); ++# endif ++ + TEST_f_f (ceil, 10141204801825835211973625643007.5L, 10141204801825835211973625643008.0L); + TEST_f_f (ceil, 10141204801825835211973625643008.25L, 10141204801825835211973625643009.0L); + TEST_f_f (ceil, 10141204801825835211973625643008.5L, 10141204801825835211973625643009.0L); +Index: b/sysdeps/powerpc/powerpc64/fpu/s_ceill.S +=================================================================== +--- a/sysdeps/powerpc/powerpc64/fpu/s_ceill.S ++++ /dev/null +@@ -1,132 +0,0 @@ +-/* s_ceill.S IBM extended format long double version. +- Copyright (C) 2004, 2006 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +- +- .section ".toc","aw" +-.LC0: /* 2**52 */ +- .tc FD_43300000_0[TC],0x4330000000000000 +- +- .section ".text" +- +-/* long double [fp1,fp2] ceill (long double x [fp1,fp2]) +- IEEE 1003.1 ceil function. +- +- PowerPC64 long double uses the IBM extended format which is +- represented two 64-floating point double values. The values are +- non-overlapping giving an effective precision of 106 bits. The first +- double contains the high order bits of mantisa and is always ceiled +- to represent a normal ceiling of long double to double. Since the +- long double value is sum of the high and low values, the low double +- normally has the opposite sign to compensate for the this ceiling. +- +- For long double there are two cases: +- 1) |x| < 2**52, all the integer bits are in the high double. +- ceil the high double and set the low double to -0.0. +- 2) |x| >= 2**52, ceiling involves both doubles. +- See the comment before lable .L2 for details. +- */ +- +-ENTRY (__ceill) +- mffs fp11 /* Save current FPU rounding mode. */ +- lfd fp13,.LC0@toc(2) +- fabs fp0,fp1 +- fabs fp9,fp2 +- fsub fp12,fp13,fp13 /* generate 0.0 */ +- fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ +- fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ +- bnl- cr7,.L2 +- mtfsfi 7,2 /* Set rounding mode toward +inf. */ +- fneg fp2,fp12 +- ble- cr6,.L1 +- fadd fp1,fp1,fp13 /* x+= TWO52; */ +- fsub fp1,fp1,fp13 /* x-= TWO52; */ +- fabs fp1,fp1 /* if (x == 0.0) */ +-.L0: +- mtfsf 0x01,fp11 /* restore previous rounding mode. */ +- blr /* x = 0.0; */ +-.L1: +- bge- cr6,.L0 /* if (x < 0.0) */ +- fsub fp1,fp1,fp13 /* x-= TWO52; */ +- fadd fp1,fp1,fp13 /* x+= TWO52; */ +- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ +- mtfsf 0x01,fp11 /* restore previous rounding mode. */ +- fnabs fp1,fp1 /* if (x == 0.0) */ +- blr /* x = -0.0; */ +- +-/* The high double is > TWO52 so we need to round the low double and +- perhaps the high double. In this case we have to round the low +- double and handle any adjustment to the high double that may be +- caused by rounding (up). This is complicated by the fact that the +- high double may already be rounded and the low double may have the +- opposite sign to compensate.This gets a bit tricky so we use the +- following algorithm: +- +- tau = floor(x_high/TWO52); +- x0 = x_high - tau; +- x1 = x_low + tau; +- r1 = rint(x1); +- y_high = x0 + r1; +- y_low = x0 - y_high + r1; +- return y; */ +-.L2: +- fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ +- fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ +- fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ +- bgelr- cr7 /* return x; */ +- beqlr- cr0 +- mtfsfi 7,2 /* Set rounding mode toward +inf. */ +- fdiv fp8,fp1,fp13 /* x_high/TWO52 */ +- +- bng- cr6,.L6 /* if (x > 0.0) */ +- fctidz fp0,fp8 +- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ +- bng cr5,.L4 /* if (x_low > 0.0) */ +- fmr fp3,fp1 +- fmr fp4,fp2 +- b .L5 +-.L4: /* if (x_low < 0.0) */ +- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ +- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +-.L5: +- fadd fp5,fp4,fp13 /* r1 = r1 + TWO52; */ +- fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ +- b .L9 +-.L6: /* if (x < 0.0) */ +- fctidz fp0,fp8 +- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ +- bnl cr5,.L7 /* if (x_low < 0.0) */ +- fmr fp3,fp1 +- fmr fp4,fp2 +- b .L8 +-.L7: /* if (x_low > 0.0) */ +- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ +- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +-.L8: +- fsub fp5,fp4,fp13 /* r1-= TWO52; */ +- fadd fp5,fp5,fp13 /* r1+= TWO52; */ +-.L9: +- mtfsf 0x01,fp11 /* restore previous rounding mode. */ +- fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ +- fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ +- fadd fp2,fp2,fp5 +- blr +-END (__ceill) +- +-long_double_symbol (libm, __ceill, ceill) diff --git a/SOURCES/glibc-rh1457177-3.patch b/SOURCES/glibc-rh1457177-3.patch new file mode 100644 index 0000000..535be75 --- /dev/null +++ b/SOURCES/glibc-rh1457177-3.patch @@ -0,0 +1,163 @@ +Backport of this upstream commit (with libm-test.inc adjustments): + +commit 98fb27a373f37554232e0060eef1a5bb00a07eb0 +Author: Adhemerval Zanella +Date: Fri Mar 14 12:27:52 2014 -0500 + + PowerPC: remove wrong nearbyintl implementation for PPC64 + + The nearbyintl assembly implementation + (sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S) + returns wrong results for some inputs where first double is a exact + integer and the precision is determined by second long double. + + Checking on implementation comments and history, I am very confident the + assembly implementation was based on a version before commit + 5c68d401698a58cf7da150d9cce769fa6679ba5f that fixes BZ#2423 (Errors in + long double (ldbl-128ibm) rounding functions in glibc-2.4). + + By just removing the implementation and make the build select + sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c instead fixes the failing + math. + + Fixes BZ#16706. + +Index: b/math/libm-test.inc +=================================================================== +--- a/math/libm-test.inc ++++ b/math/libm-test.inc +@@ -7619,6 +7619,16 @@ nearbyint_test (void) + TEST_f_f (nearbyint, -562949953421312.75, -562949953421313.0); + TEST_f_f (nearbyint, -1125899906842624.75, -1125899906842625.0); + #endif ++#ifdef TEST_LDOUBLE ++ /* Check cases where first double is a exact integer higher than 2^52 and ++ the precision is determined by second long double for IBM long double. */ ++ TEST_f_f (nearbyint, 34503599627370498.515625L, 34503599627370499.0L); ++ TEST_f_f (nearbyint, -34503599627370498.515625L, -34503599627370499.0L); ++# if LDBL_MANT_DIG >= 106 ++ TEST_f_f (nearbyint, 1192568192774434123539907640624.484375L, 1192568192774434123539907640624.0L); ++ TEST_f_f (nearbyint, -1192568192774434123539907640624.484375L, -1192568192774434123539907640624.0L); ++# endif ++#endif + + END (nearbyint); + } +Index: b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S +=================================================================== +--- a/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S ++++ /dev/null +@@ -1,113 +0,0 @@ +-/* nearbyint long double. +- IBM extended format long double version. +- Copyright (C) 2004, 2006 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +- +- .section ".toc","aw" +-.LC0: /* 2**52 */ +- .tc FD_43300000_0[TC],0x4330000000000000 +- .section ".text" +- +-/* long double [fp1,fp2] nearbyintl (long double x [fp1,fp2]) +- IEEE 1003.1 nearbyintl function. nearbyintl is simular to the rintl +- but does raise the "inexact" exception. This implementation is +- based on rintl but explicitly maskes the inexact exception on entry +- and clears any pending inexact before restoring the exception mask +- on exit. +- +- PowerPC64 long double uses the IBM extended format which is +- represented two 64-floating point double values. The values are +- non-overlapping giving an effective precision of 106 bits. The first +- double contains the high order bits of mantisa and is always rounded +- to represent a normal rounding of long double to double. Since the +- long double value is sum of the high and low values, the low double +- normally has the opposite sign to compensate for the this rounding. +- +- For long double there are two cases: +- 1) |x| < 2**52, all the integer bits are in the high double. +- floor the high double and set the low double to -0.0. +- 2) |x| >= 2**52, Rounding involves both doubles. +- See the comment before lable .L2 for details. +- */ +-ENTRY (__nearbyintl) +- mffs fp11 /* Save current FPSCR. */ +- lfd fp13,.LC0@toc(2) +- fabs fp0,fp1 +- mtfsb0 28 /* Disable "inexact" exceptions. */ +- fsub fp12,fp13,fp13 /* generate 0.0 */ +- fabs fp9,fp2 +- fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ +- fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ +- bnl- cr7,.L2 +- fmr fp2,fp12 +- bng- cr6,.L4 +- fadd fp1,fp1,fp13 /* x+= TWO52; */ +- fsub fp1,fp1,fp13 /* x-= TWO52; */ +- b .L9 +-.L4: +- bnl- cr6,.L9 /* if (x < 0.0) */ +- fsub fp1,fp13,fp1 /* x = TWO52 - x; */ +- fsub fp0,fp1,fp13 /* x = - (x - TWO52); */ +- fneg fp1,fp0 +-.L9: +- mtfsb0 6 /* Clear any pending "inexact" exceptions. */ +- mtfsf 0x01,fp11 /* restore exception mask. */ +- blr +- +-/* The high double is > TWO52 so we need to round the low double and +- perhaps the high double. This gets a bit tricky so we use the +- following algorithm: +- +- tau = floor(x_high/TWO52); +- x0 = x_high - tau; +- x1 = x_low + tau; +- r1 = nearbyint(x1); +- y_high = x0 + r1; +- y_low = r1 - tau; +- return y; */ +-.L2: +- fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ +- fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ +- bge- cr7,.L9 /* return x; */ +- beq- cr0,.L9 +- fdiv fp8,fp1,fp13 /* x_high/TWO52 */ +- fctidz fp0,fp8 +- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ +- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ +- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +- +- fcmpu cr6,fp4,fp12 /* if (x1 > 0.0) */ +- bng- cr6,.L8 +- fadd fp5,fp4,fp13 /* r1 = x1 + TWO52; */ +- fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ +- b .L6 +-.L8: +- fmr fp5,fp4 +- bge- cr6,.L6 /* if (x1 < 0.0) */ +- fsub fp5,fp13,fp4 /* r1 = TWO52 - x1; */ +- fsub fp0,fp5,fp13 /* r1 = - (r1 - TWO52); */ +- fneg fp5,fp0 +-.L6: +- fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ +- fsub fp2,fp5,fp8 /* y_low = r1 - tau; */ +- b .L9 +-END (__nearbyintl) +- +-long_double_symbol (libm, __nearbyintl, nearbyintl) diff --git a/SOURCES/glibc-rh1457177-4.patch b/SOURCES/glibc-rh1457177-4.patch new file mode 100644 index 0000000..f4d731d --- /dev/null +++ b/SOURCES/glibc-rh1457177-4.patch @@ -0,0 +1,180 @@ +Backport of this upstream commit (with libm-test.inc adjustments): + +commit c7de50250367167d8c9f35594b264f6a0af8dd0c +Author: Adhemerval Zanella +Date: Fri Mar 14 12:49:45 2014 -0500 + + PowerPC: remove wrong roundl implementation for PowerPC64 + + The roundl assembly implementation + (sysdeps/powerpc/powerpc64/fpu/s_roundl.S) + returns wrong results for some inputs where first double is a exact + integer and the precision is determined by second long double. + + Checking on implementation comments and history, I am very confident the + assembly implementation was based on a version before commit + 5c68d401698a58cf7da150d9cce769fa6679ba5f that fixes BZ#2423 (Errors in + long double (ldbl-128ibm) rounding functions in glibc-2.4). + + By just removing the implementation and make the build select + sysdeps/ieee754/ldbl-128ibm/s_roundl.c instead fixes the failing math. + + This fixes 16707. + +Index: b/math/libm-test.inc +=================================================================== +--- a/math/libm-test.inc ++++ b/math/libm-test.inc +@@ -9268,6 +9268,15 @@ round_test (void) + TEST_f_f (round, -72057594037927936.75L, -72057594037927937.0L); + TEST_f_f (round, -72057594037927937.5L, -72057594037927938.0L); + ++ /* Check cases where first double is a exact integer higher than 2^52 and ++ the precision is determined by second long double for IBM long double. */ ++ TEST_f_f (round, 34503599627370498.515625L, 34503599627370499.0L); ++ TEST_f_f (round, -34503599627370498.515625L, -34503599627370499.0L); ++# if LDBL_MANT_DIG >= 106 ++ TEST_f_f (round, 1192568192774434123539907640624.484375L, 1192568192774434123539907640624.0L); ++ TEST_f_f (round, -1192568192774434123539907640624.484375L, -1192568192774434123539907640624.0L); ++# endif ++ + TEST_f_f (round, 10141204801825835211973625643007.5L, 10141204801825835211973625643008.0L); + TEST_f_f (round, 10141204801825835211973625643008.25L, 10141204801825835211973625643008.0L); + TEST_f_f (round, 10141204801825835211973625643008.5L, 10141204801825835211973625643009.0L); +Index: b/sysdeps/powerpc/powerpc64/fpu/s_roundl.S +=================================================================== +--- a/sysdeps/powerpc/powerpc64/fpu/s_roundl.S ++++ /dev/null +@@ -1,132 +0,0 @@ +-/* long double round function. +- IBM extended format long double version. +- Copyright (C) 2004, 2006 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +- +- .section ".toc","aw" +-.LC0: /* 2**52 */ +- .tc FD_43300000_0[TC],0x4330000000000000 +-.LC1: /* 0.5 */ +- .tc FD_3fe00000_0[TC],0x3fe0000000000000 +- .section ".text" +- +-/* long double [fp1,fp2] roundl (long double x [fp1,fp2]) +- IEEE 1003.1 round function. IEEE specifies "round to the nearest +- integer value, rounding halfway cases away from zero, regardless of +- the current rounding mode." However PowerPC Architecture defines +- "Round to Nearest" as "Choose the best approximation. In case of a +- tie, choose the one that is even (least significant bit o).". +- So we can't use the PowerPC "Round to Nearest" mode. Instead we set +- "Round toward Zero" mode and round by adding +-0.5 before rounding +- to the integer value. */ +- +-ENTRY (__roundl) +- mffs fp11 /* Save current FPU rounding mode. */ +- lfd fp13,.LC0@toc(2) +- fabs fp0,fp1 +- fabs fp9,fp2 +- fsub fp12,fp13,fp13 /* generate 0.0 */ +- fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ +- fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ +- bnl- cr7,.L2 +- mtfsfi 7,1 /* Set rounding mode toward 0. */ +- lfd fp10,.LC1@toc(2) +- ble- cr6,.L1 +- fneg fp2,fp12 +- fadd fp1,fp1,fp10 /* x+= 0.5; */ +- fadd fp1,fp1,fp13 /* x+= TWO52; */ +- fsub fp1,fp1,fp13 /* x-= TWO52; */ +- fabs fp1,fp1 /* if (x == 0.0) x = 0.0; */ +-.L0: +- mtfsf 0x01,fp11 /* restore previous rounding mode. */ +- blr +-.L1: +- fsub fp9,fp1,fp10 /* x-= 0.5; */ +- fneg fp2,fp12 +- bge- cr6,.L0 /* if (x < 0.0) */ +- fsub fp1,fp9,fp13 /* x-= TWO52; */ +- fadd fp1,fp1,fp13 /* x+= TWO52; */ +- fnabs fp1,fp1 /* if (x == 0.0) x = -0.0; */ +- mtfsf 0x01,fp11 /* restore previous rounding mode. */ +- blr +- +-/* The high double is > TWO52 so we need to round the low double and +- perhaps the high double. In this case we have to round the low +- double and handle any adjustment to the high double that may be +- caused by rounding (up). This is complicated by the fact that the +- high double may already be rounded and the low double may have the +- opposite sign to compensate.This gets a bit tricky so we use the +- following algorithm: +- +- tau = floor(x_high/TWO52); +- x0 = x_high - tau; +- x1 = x_low + tau; +- r1 = rint(x1); +- y_high = x0 + r1; +- y_low = x0 - y_high + r1; +- return y; */ +-.L2: +- fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ +- fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ +- fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ +- lfd fp10,.LC1@toc(2) +- bgelr- cr7 /* return x; */ +- beqlr- cr0 +- mtfsfi 7,1 /* Set rounding mode toward 0. */ +- fdiv fp8,fp1,fp13 /* x_high/TWO52 */ +- +- bng- cr6,.L6 /* if (x > 0.0) */ +- fctidz fp0,fp8 +- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ +- bng cr5,.L4 /* if (x_low > 0.0) */ +- fmr fp3,fp1 +- fmr fp4,fp2 +- b .L5 +-.L4: /* if (x_low < 0.0) */ +- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ +- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +-.L5: +- fadd fp5,fp4,fp10 /* r1 = x1 + 0.5; */ +- fadd fp5,fp5,fp13 /* r1 = r1 + TWO52; */ +- fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ +- b .L9 +-.L6: /* if (x < 0.0) */ +- fctidz fp0,fp8 +- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ +- bnl cr5,.L7 /* if (x_low < 0.0) */ +- fmr fp3,fp1 +- fmr fp4,fp2 +- b .L8 +-.L7: /* if (x_low > 0.0) */ +- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ +- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +-.L8: +- fsub fp5,fp4,fp10 /* r1 = x1 - 0.5; */ +- fsub fp5,fp5,fp13 /* r1-= TWO52; */ +- fadd fp5,fp5,fp13 /* r1+= TWO52; */ +-.L9: +- mtfsf 0x01,fp11 /* restore previous rounding mode. */ +- fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ +- fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ +- fadd fp2,fp2,fp5 +- blr +-END (__roundl) +- +-long_double_symbol (libm, __roundl, roundl) diff --git a/SOURCES/glibc-rh1463274-1.patch b/SOURCES/glibc-rh1463274-1.patch deleted file mode 100644 index eba9b41..0000000 --- a/SOURCES/glibc-rh1463274-1.patch +++ /dev/null @@ -1,158 +0,0 @@ -commit 38f3458175ecf7c3588bd5b6e465f4d9205fbe1c -Author: Adhemerval Zanella -Date: Wed Jan 8 05:10:41 2014 -0600 - - PowerPC: remove wrong truncl implementation for PowerPC64 - - The truncl assembly implementation (sysdeps/powerpc/powerpc64/fpu/s_truncl.S) - returns wrong results for some inputs where first double is a exact integer - and the precision is determined by second long double. - - Checking on implementation comments and history, I am very confident the - assembly implementation was based on a version before commit - 5c68d401698a58cf7da150d9cce769fa6679ba5f that fixes BZ#2423 (Errors in - long double (ldbl-128ibm) rounding functions in glibc-2.4). - - By just removing the implementation and make the build select - sysdeps/ieee754/ldbl-128ibm/s_truncl.c instead it fixes tgammal - issues regarding wrong result sign. - -Index: b/sysdeps/ieee754/ldbl-128ibm/s_truncl.c -=================================================================== ---- a/sysdeps/ieee754/ldbl-128ibm/s_truncl.c -+++ b/sysdeps/ieee754/ldbl-128ibm/s_truncl.c -@@ -17,9 +17,6 @@ - License along with the GNU C Library; if not, see - . */ - --/* This has been coded in assembler because GCC makes such a mess of it -- when it's coded in C. */ -- - #include - #include - #include -Index: b/sysdeps/powerpc/powerpc64/fpu/s_truncl.S -=================================================================== ---- a/sysdeps/powerpc/powerpc64/fpu/s_truncl.S -+++ /dev/null -@@ -1,120 +0,0 @@ --/* long double trunc function. -- IBM extended format long double version. -- Copyright (C) 2004, 2006 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --#include --#include -- -- .section ".toc","aw" --.LC0: /* 2**52 */ -- .tc FD_43300000_0[TC],0x4330000000000000 --.LC1: /* 0.5 */ -- .tc FD_3fe00000_0[TC],0x3fe0000000000000 -- .section ".text" -- --/* long double [fp1,fp2] truncl (long double x [fp1,fp2]) */ -- --ENTRY (__truncl) -- mffs fp11 /* Save current FPU rounding mode. */ -- lfd fp13,.LC0@toc(2) -- fabs fp0,fp1 -- fabs fp9,fp2 -- fsub fp12,fp13,fp13 /* generate 0.0 */ -- fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ -- fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ -- bnl- cr7,.L2 -- mtfsfi 7,1 /* Set rounding mode toward 0. */ -- ble- cr6,.L1 -- fneg fp2,fp12 -- fadd fp1,fp1,fp13 /* x+= TWO52; */ -- fsub fp1,fp1,fp13 /* x-= TWO52; */ -- fabs fp1,fp1 /* if (x == 0.0) x = 0.0; */ --.L0: -- mtfsf 0x01,fp11 /* restore previous rounding mode. */ -- blr --.L1: -- fneg fp2,fp12 -- bge- cr6,.L0 /* if (x < 0.0) */ -- fsub fp1,fp1,fp13 /* x-= TWO52; */ -- fadd fp1,fp1,fp13 /* x+= TWO52; */ -- fnabs fp1,fp1 /* if (x == 0.0) x = -0.0; */ -- mtfsf 0x01,fp11 /* restore previous rounding mode. */ -- blr -- --/* The high double is > TWO52 so we need to round the low double and -- perhaps the high double. In this case we have to round the low -- double and handle any adjustment to the high double that may be -- caused by rounding (up). This is complicated by the fact that the -- high double may already be rounded and the low double may have the -- opposite sign to compensate.This gets a bit tricky so we use the -- following algorithm: -- -- tau = floor(x_high/TWO52); -- x0 = x_high - tau; -- x1 = x_low + tau; -- r1 = rint(x1); -- y_high = x0 + r1; -- y_low = x0 - y_high + r1; -- return y; */ --.L2: -- fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ -- fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ -- fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ -- bgelr- cr7 /* return x; */ -- beqlr- cr0 -- mtfsfi 7,1 /* Set rounding mode toward 0. */ -- fdiv fp8,fp1,fp13 /* x_high/TWO52 */ -- -- bng- cr6,.L6 /* if (x > 0.0) */ -- fctidz fp0,fp8 -- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ -- fadd fp8,fp8,fp8 /* tau++; Make tau even */ -- bng cr5,.L4 /* if (x_low > 0.0) */ -- fmr fp3,fp1 -- fmr fp4,fp2 -- b .L5 --.L4: /* if (x_low < 0.0) */ -- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ -- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ --.L5: -- fadd fp5,fp4,fp13 /* r1 = r1 + TWO52; */ -- fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ -- b .L9 --.L6: /* if (x < 0.0) */ -- fctidz fp0,fp8 -- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ -- fadd fp8,fp8,fp8 /* tau++; Make tau even */ -- bnl cr5,.L7 /* if (x_low < 0.0) */ -- fmr fp3,fp1 -- fmr fp4,fp2 -- b .L8 --.L7: /* if (x_low > 0.0) */ -- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ -- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ --.L8: -- fsub fp5,fp4,fp13 /* r1-= TWO52; */ -- fadd fp5,fp5,fp13 /* r1+= TWO52; */ --.L9: -- mtfsf 0x01,fp11 /* restore previous rounding mode. */ -- fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ -- fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ -- fadd fp2,fp2,fp5 -- blr --END (__truncl) -- --long_double_symbol (libm, __truncl, truncl) diff --git a/SOURCES/glibc-rh1463274-2.patch b/SOURCES/glibc-rh1463274-2.patch deleted file mode 100644 index bd41b69..0000000 --- a/SOURCES/glibc-rh1463274-2.patch +++ /dev/null @@ -1,179 +0,0 @@ -Backport of this upstream commit (with libm-test.inc adjustments): - -commit 374f7f61214967bb4e2257695aeeeecc2a77f369 -Author: Adhemerval Zanella -Date: Fri Mar 14 07:35:43 2014 -0500 - - PowerPC: remove wrong ceill implementation for PowerPC64 - - The ceill assembly implementation (sysdeps/powerpc/powerpc64/fpu/s_ceill.S) - returns wrong results for some inputs where first double is a exact - integer and the precision is determined by second long double. - - Checking on implementation comments and history, I am very confident the - assembly implementation was based on a version before commit - 5c68d401698a58cf7da150d9cce769fa6679ba5f that fixes BZ#2423 (Errors in - long double (ldbl-128ibm) rounding functions in glibc-2.4). - - By just removing the implementation and make the build select - sysdeps/ieee754/ldbl-128ibm/s_ceill.c instead fixes the failing math. - - Fixes BZ#16701. - -Index: b/math/libm-test.inc -=================================================================== ---- a/math/libm-test.inc -+++ b/math/libm-test.inc -@@ -2241,6 +2241,15 @@ ceil_test (void) - TEST_f_f (ceil, -72057594037927936.75L, -72057594037927936.0L); - TEST_f_f (ceil, -72057594037927937.5L, -72057594037927937.0L); - -+ /* Check cases where first double is a exact integer higher than 2^52 and -+ the precision is determined by second long double for IBM long double. */ -+ TEST_f_f (ceil, 34503599627370498.515625L, 34503599627370499.0L); -+ TEST_f_f (ceil, -34503599627370498.515625L, -34503599627370498.0L); -+# if LDBL_MANT_DIG >= 106 -+ TEST_f_f (ceil, 1192568192774434123539907640624.484375L, 1192568192774434123539907640625.0L); -+ TEST_f_f (ceil, -1192568192774434123539907640624.484375L, -1192568192774434123539907640624.0L); -+# endif -+ - TEST_f_f (ceil, 10141204801825835211973625643007.5L, 10141204801825835211973625643008.0L); - TEST_f_f (ceil, 10141204801825835211973625643008.25L, 10141204801825835211973625643009.0L); - TEST_f_f (ceil, 10141204801825835211973625643008.5L, 10141204801825835211973625643009.0L); -Index: b/sysdeps/powerpc/powerpc64/fpu/s_ceill.S -=================================================================== ---- a/sysdeps/powerpc/powerpc64/fpu/s_ceill.S -+++ /dev/null -@@ -1,132 +0,0 @@ --/* s_ceill.S IBM extended format long double version. -- Copyright (C) 2004, 2006 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --#include --#include -- -- .section ".toc","aw" --.LC0: /* 2**52 */ -- .tc FD_43300000_0[TC],0x4330000000000000 -- -- .section ".text" -- --/* long double [fp1,fp2] ceill (long double x [fp1,fp2]) -- IEEE 1003.1 ceil function. -- -- PowerPC64 long double uses the IBM extended format which is -- represented two 64-floating point double values. The values are -- non-overlapping giving an effective precision of 106 bits. The first -- double contains the high order bits of mantisa and is always ceiled -- to represent a normal ceiling of long double to double. Since the -- long double value is sum of the high and low values, the low double -- normally has the opposite sign to compensate for the this ceiling. -- -- For long double there are two cases: -- 1) |x| < 2**52, all the integer bits are in the high double. -- ceil the high double and set the low double to -0.0. -- 2) |x| >= 2**52, ceiling involves both doubles. -- See the comment before lable .L2 for details. -- */ -- --ENTRY (__ceill) -- mffs fp11 /* Save current FPU rounding mode. */ -- lfd fp13,.LC0@toc(2) -- fabs fp0,fp1 -- fabs fp9,fp2 -- fsub fp12,fp13,fp13 /* generate 0.0 */ -- fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ -- fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ -- bnl- cr7,.L2 -- mtfsfi 7,2 /* Set rounding mode toward +inf. */ -- fneg fp2,fp12 -- ble- cr6,.L1 -- fadd fp1,fp1,fp13 /* x+= TWO52; */ -- fsub fp1,fp1,fp13 /* x-= TWO52; */ -- fabs fp1,fp1 /* if (x == 0.0) */ --.L0: -- mtfsf 0x01,fp11 /* restore previous rounding mode. */ -- blr /* x = 0.0; */ --.L1: -- bge- cr6,.L0 /* if (x < 0.0) */ -- fsub fp1,fp1,fp13 /* x-= TWO52; */ -- fadd fp1,fp1,fp13 /* x+= TWO52; */ -- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ -- mtfsf 0x01,fp11 /* restore previous rounding mode. */ -- fnabs fp1,fp1 /* if (x == 0.0) */ -- blr /* x = -0.0; */ -- --/* The high double is > TWO52 so we need to round the low double and -- perhaps the high double. In this case we have to round the low -- double and handle any adjustment to the high double that may be -- caused by rounding (up). This is complicated by the fact that the -- high double may already be rounded and the low double may have the -- opposite sign to compensate.This gets a bit tricky so we use the -- following algorithm: -- -- tau = floor(x_high/TWO52); -- x0 = x_high - tau; -- x1 = x_low + tau; -- r1 = rint(x1); -- y_high = x0 + r1; -- y_low = x0 - y_high + r1; -- return y; */ --.L2: -- fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ -- fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ -- fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ -- bgelr- cr7 /* return x; */ -- beqlr- cr0 -- mtfsfi 7,2 /* Set rounding mode toward +inf. */ -- fdiv fp8,fp1,fp13 /* x_high/TWO52 */ -- -- bng- cr6,.L6 /* if (x > 0.0) */ -- fctidz fp0,fp8 -- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ -- bng cr5,.L4 /* if (x_low > 0.0) */ -- fmr fp3,fp1 -- fmr fp4,fp2 -- b .L5 --.L4: /* if (x_low < 0.0) */ -- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ -- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ --.L5: -- fadd fp5,fp4,fp13 /* r1 = r1 + TWO52; */ -- fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ -- b .L9 --.L6: /* if (x < 0.0) */ -- fctidz fp0,fp8 -- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ -- bnl cr5,.L7 /* if (x_low < 0.0) */ -- fmr fp3,fp1 -- fmr fp4,fp2 -- b .L8 --.L7: /* if (x_low > 0.0) */ -- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ -- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ --.L8: -- fsub fp5,fp4,fp13 /* r1-= TWO52; */ -- fadd fp5,fp5,fp13 /* r1+= TWO52; */ --.L9: -- mtfsf 0x01,fp11 /* restore previous rounding mode. */ -- fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ -- fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ -- fadd fp2,fp2,fp5 -- blr --END (__ceill) -- --long_double_symbol (libm, __ceill, ceill) diff --git a/SOURCES/glibc-rh1463274-3.patch b/SOURCES/glibc-rh1463274-3.patch deleted file mode 100644 index 535be75..0000000 --- a/SOURCES/glibc-rh1463274-3.patch +++ /dev/null @@ -1,163 +0,0 @@ -Backport of this upstream commit (with libm-test.inc adjustments): - -commit 98fb27a373f37554232e0060eef1a5bb00a07eb0 -Author: Adhemerval Zanella -Date: Fri Mar 14 12:27:52 2014 -0500 - - PowerPC: remove wrong nearbyintl implementation for PPC64 - - The nearbyintl assembly implementation - (sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S) - returns wrong results for some inputs where first double is a exact - integer and the precision is determined by second long double. - - Checking on implementation comments and history, I am very confident the - assembly implementation was based on a version before commit - 5c68d401698a58cf7da150d9cce769fa6679ba5f that fixes BZ#2423 (Errors in - long double (ldbl-128ibm) rounding functions in glibc-2.4). - - By just removing the implementation and make the build select - sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c instead fixes the failing - math. - - Fixes BZ#16706. - -Index: b/math/libm-test.inc -=================================================================== ---- a/math/libm-test.inc -+++ b/math/libm-test.inc -@@ -7619,6 +7619,16 @@ nearbyint_test (void) - TEST_f_f (nearbyint, -562949953421312.75, -562949953421313.0); - TEST_f_f (nearbyint, -1125899906842624.75, -1125899906842625.0); - #endif -+#ifdef TEST_LDOUBLE -+ /* Check cases where first double is a exact integer higher than 2^52 and -+ the precision is determined by second long double for IBM long double. */ -+ TEST_f_f (nearbyint, 34503599627370498.515625L, 34503599627370499.0L); -+ TEST_f_f (nearbyint, -34503599627370498.515625L, -34503599627370499.0L); -+# if LDBL_MANT_DIG >= 106 -+ TEST_f_f (nearbyint, 1192568192774434123539907640624.484375L, 1192568192774434123539907640624.0L); -+ TEST_f_f (nearbyint, -1192568192774434123539907640624.484375L, -1192568192774434123539907640624.0L); -+# endif -+#endif - - END (nearbyint); - } -Index: b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S -=================================================================== ---- a/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S -+++ /dev/null -@@ -1,113 +0,0 @@ --/* nearbyint long double. -- IBM extended format long double version. -- Copyright (C) 2004, 2006 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --#include --#include -- -- .section ".toc","aw" --.LC0: /* 2**52 */ -- .tc FD_43300000_0[TC],0x4330000000000000 -- .section ".text" -- --/* long double [fp1,fp2] nearbyintl (long double x [fp1,fp2]) -- IEEE 1003.1 nearbyintl function. nearbyintl is simular to the rintl -- but does raise the "inexact" exception. This implementation is -- based on rintl but explicitly maskes the inexact exception on entry -- and clears any pending inexact before restoring the exception mask -- on exit. -- -- PowerPC64 long double uses the IBM extended format which is -- represented two 64-floating point double values. The values are -- non-overlapping giving an effective precision of 106 bits. The first -- double contains the high order bits of mantisa and is always rounded -- to represent a normal rounding of long double to double. Since the -- long double value is sum of the high and low values, the low double -- normally has the opposite sign to compensate for the this rounding. -- -- For long double there are two cases: -- 1) |x| < 2**52, all the integer bits are in the high double. -- floor the high double and set the low double to -0.0. -- 2) |x| >= 2**52, Rounding involves both doubles. -- See the comment before lable .L2 for details. -- */ --ENTRY (__nearbyintl) -- mffs fp11 /* Save current FPSCR. */ -- lfd fp13,.LC0@toc(2) -- fabs fp0,fp1 -- mtfsb0 28 /* Disable "inexact" exceptions. */ -- fsub fp12,fp13,fp13 /* generate 0.0 */ -- fabs fp9,fp2 -- fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ -- fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ -- bnl- cr7,.L2 -- fmr fp2,fp12 -- bng- cr6,.L4 -- fadd fp1,fp1,fp13 /* x+= TWO52; */ -- fsub fp1,fp1,fp13 /* x-= TWO52; */ -- b .L9 --.L4: -- bnl- cr6,.L9 /* if (x < 0.0) */ -- fsub fp1,fp13,fp1 /* x = TWO52 - x; */ -- fsub fp0,fp1,fp13 /* x = - (x - TWO52); */ -- fneg fp1,fp0 --.L9: -- mtfsb0 6 /* Clear any pending "inexact" exceptions. */ -- mtfsf 0x01,fp11 /* restore exception mask. */ -- blr -- --/* The high double is > TWO52 so we need to round the low double and -- perhaps the high double. This gets a bit tricky so we use the -- following algorithm: -- -- tau = floor(x_high/TWO52); -- x0 = x_high - tau; -- x1 = x_low + tau; -- r1 = nearbyint(x1); -- y_high = x0 + r1; -- y_low = r1 - tau; -- return y; */ --.L2: -- fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ -- fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ -- bge- cr7,.L9 /* return x; */ -- beq- cr0,.L9 -- fdiv fp8,fp1,fp13 /* x_high/TWO52 */ -- fctidz fp0,fp8 -- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ -- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ -- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ -- -- fcmpu cr6,fp4,fp12 /* if (x1 > 0.0) */ -- bng- cr6,.L8 -- fadd fp5,fp4,fp13 /* r1 = x1 + TWO52; */ -- fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ -- b .L6 --.L8: -- fmr fp5,fp4 -- bge- cr6,.L6 /* if (x1 < 0.0) */ -- fsub fp5,fp13,fp4 /* r1 = TWO52 - x1; */ -- fsub fp0,fp5,fp13 /* r1 = - (r1 - TWO52); */ -- fneg fp5,fp0 --.L6: -- fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ -- fsub fp2,fp5,fp8 /* y_low = r1 - tau; */ -- b .L9 --END (__nearbyintl) -- --long_double_symbol (libm, __nearbyintl, nearbyintl) diff --git a/SOURCES/glibc-rh1463274-4.patch b/SOURCES/glibc-rh1463274-4.patch deleted file mode 100644 index f4d731d..0000000 --- a/SOURCES/glibc-rh1463274-4.patch +++ /dev/null @@ -1,180 +0,0 @@ -Backport of this upstream commit (with libm-test.inc adjustments): - -commit c7de50250367167d8c9f35594b264f6a0af8dd0c -Author: Adhemerval Zanella -Date: Fri Mar 14 12:49:45 2014 -0500 - - PowerPC: remove wrong roundl implementation for PowerPC64 - - The roundl assembly implementation - (sysdeps/powerpc/powerpc64/fpu/s_roundl.S) - returns wrong results for some inputs where first double is a exact - integer and the precision is determined by second long double. - - Checking on implementation comments and history, I am very confident the - assembly implementation was based on a version before commit - 5c68d401698a58cf7da150d9cce769fa6679ba5f that fixes BZ#2423 (Errors in - long double (ldbl-128ibm) rounding functions in glibc-2.4). - - By just removing the implementation and make the build select - sysdeps/ieee754/ldbl-128ibm/s_roundl.c instead fixes the failing math. - - This fixes 16707. - -Index: b/math/libm-test.inc -=================================================================== ---- a/math/libm-test.inc -+++ b/math/libm-test.inc -@@ -9268,6 +9268,15 @@ round_test (void) - TEST_f_f (round, -72057594037927936.75L, -72057594037927937.0L); - TEST_f_f (round, -72057594037927937.5L, -72057594037927938.0L); - -+ /* Check cases where first double is a exact integer higher than 2^52 and -+ the precision is determined by second long double for IBM long double. */ -+ TEST_f_f (round, 34503599627370498.515625L, 34503599627370499.0L); -+ TEST_f_f (round, -34503599627370498.515625L, -34503599627370499.0L); -+# if LDBL_MANT_DIG >= 106 -+ TEST_f_f (round, 1192568192774434123539907640624.484375L, 1192568192774434123539907640624.0L); -+ TEST_f_f (round, -1192568192774434123539907640624.484375L, -1192568192774434123539907640624.0L); -+# endif -+ - TEST_f_f (round, 10141204801825835211973625643007.5L, 10141204801825835211973625643008.0L); - TEST_f_f (round, 10141204801825835211973625643008.25L, 10141204801825835211973625643008.0L); - TEST_f_f (round, 10141204801825835211973625643008.5L, 10141204801825835211973625643009.0L); -Index: b/sysdeps/powerpc/powerpc64/fpu/s_roundl.S -=================================================================== ---- a/sysdeps/powerpc/powerpc64/fpu/s_roundl.S -+++ /dev/null -@@ -1,132 +0,0 @@ --/* long double round function. -- IBM extended format long double version. -- Copyright (C) 2004, 2006 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --#include --#include -- -- .section ".toc","aw" --.LC0: /* 2**52 */ -- .tc FD_43300000_0[TC],0x4330000000000000 --.LC1: /* 0.5 */ -- .tc FD_3fe00000_0[TC],0x3fe0000000000000 -- .section ".text" -- --/* long double [fp1,fp2] roundl (long double x [fp1,fp2]) -- IEEE 1003.1 round function. IEEE specifies "round to the nearest -- integer value, rounding halfway cases away from zero, regardless of -- the current rounding mode." However PowerPC Architecture defines -- "Round to Nearest" as "Choose the best approximation. In case of a -- tie, choose the one that is even (least significant bit o).". -- So we can't use the PowerPC "Round to Nearest" mode. Instead we set -- "Round toward Zero" mode and round by adding +-0.5 before rounding -- to the integer value. */ -- --ENTRY (__roundl) -- mffs fp11 /* Save current FPU rounding mode. */ -- lfd fp13,.LC0@toc(2) -- fabs fp0,fp1 -- fabs fp9,fp2 -- fsub fp12,fp13,fp13 /* generate 0.0 */ -- fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ -- fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ -- bnl- cr7,.L2 -- mtfsfi 7,1 /* Set rounding mode toward 0. */ -- lfd fp10,.LC1@toc(2) -- ble- cr6,.L1 -- fneg fp2,fp12 -- fadd fp1,fp1,fp10 /* x+= 0.5; */ -- fadd fp1,fp1,fp13 /* x+= TWO52; */ -- fsub fp1,fp1,fp13 /* x-= TWO52; */ -- fabs fp1,fp1 /* if (x == 0.0) x = 0.0; */ --.L0: -- mtfsf 0x01,fp11 /* restore previous rounding mode. */ -- blr --.L1: -- fsub fp9,fp1,fp10 /* x-= 0.5; */ -- fneg fp2,fp12 -- bge- cr6,.L0 /* if (x < 0.0) */ -- fsub fp1,fp9,fp13 /* x-= TWO52; */ -- fadd fp1,fp1,fp13 /* x+= TWO52; */ -- fnabs fp1,fp1 /* if (x == 0.0) x = -0.0; */ -- mtfsf 0x01,fp11 /* restore previous rounding mode. */ -- blr -- --/* The high double is > TWO52 so we need to round the low double and -- perhaps the high double. In this case we have to round the low -- double and handle any adjustment to the high double that may be -- caused by rounding (up). This is complicated by the fact that the -- high double may already be rounded and the low double may have the -- opposite sign to compensate.This gets a bit tricky so we use the -- following algorithm: -- -- tau = floor(x_high/TWO52); -- x0 = x_high - tau; -- x1 = x_low + tau; -- r1 = rint(x1); -- y_high = x0 + r1; -- y_low = x0 - y_high + r1; -- return y; */ --.L2: -- fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ -- fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ -- fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ -- lfd fp10,.LC1@toc(2) -- bgelr- cr7 /* return x; */ -- beqlr- cr0 -- mtfsfi 7,1 /* Set rounding mode toward 0. */ -- fdiv fp8,fp1,fp13 /* x_high/TWO52 */ -- -- bng- cr6,.L6 /* if (x > 0.0) */ -- fctidz fp0,fp8 -- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ -- bng cr5,.L4 /* if (x_low > 0.0) */ -- fmr fp3,fp1 -- fmr fp4,fp2 -- b .L5 --.L4: /* if (x_low < 0.0) */ -- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ -- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ --.L5: -- fadd fp5,fp4,fp10 /* r1 = x1 + 0.5; */ -- fadd fp5,fp5,fp13 /* r1 = r1 + TWO52; */ -- fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ -- b .L9 --.L6: /* if (x < 0.0) */ -- fctidz fp0,fp8 -- fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ -- bnl cr5,.L7 /* if (x_low < 0.0) */ -- fmr fp3,fp1 -- fmr fp4,fp2 -- b .L8 --.L7: /* if (x_low > 0.0) */ -- fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ -- fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ --.L8: -- fsub fp5,fp4,fp10 /* r1 = x1 - 0.5; */ -- fsub fp5,fp5,fp13 /* r1-= TWO52; */ -- fadd fp5,fp5,fp13 /* r1+= TWO52; */ --.L9: -- mtfsf 0x01,fp11 /* restore previous rounding mode. */ -- fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ -- fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ -- fadd fp2,fp2,fp5 -- blr --END (__roundl) -- --long_double_symbol (libm, __roundl, roundl) diff --git a/SOURCES/glibc-rh731835-0.patch b/SOURCES/glibc-rh731835-0.patch new file mode 100644 index 0000000..602faf7 --- /dev/null +++ b/SOURCES/glibc-rh731835-0.patch @@ -0,0 +1,761 @@ +commit 8d2c0a593bdefd220be0822fb70de6b8d3bfd39d +Author: Adhemerval Zanella +Date: Fri Nov 7 12:25:32 2014 -0500 + + powerpc: Add the lock elision using HTM + + This patch adds support for lock elision using ISA 2.07 hardware + transactional memory instructions for pthread_mutex primitives. + Similar to s390 version, the for elision logic defined in + 'force-elision.h' is only enabled if ENABLE_LOCK_ELISION is defined. + + Also, the lock elision code should be able to be built even with + a compiler that does not provide HTM support with builtins. + However I have noted the performance is sub-optimal due scheduling + pressures. + +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-conf.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-conf.c +@@ -0,0 +1,80 @@ ++/* elision-conf.c: Lock elision tunable parameters. ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "config.h" ++#include ++#include ++#include ++#include ++ ++/* Reasonable initial tuning values, may be revised in the future. ++ This is a conservative initial value. */ ++ ++struct elision_config __elision_aconf = ++ { ++ /* How many times to use a non-transactional lock after a transactional ++ failure has occurred because the lock is already acquired. Expressed ++ in number of lock acquisition attempts. */ ++ .skip_lock_busy = 3, ++ /* How often to not attempt to use elision if a transaction aborted due ++ to reasons other than other threads' memory accesses. Expressed in ++ number of lock acquisition attempts. */ ++ .skip_lock_internal_abort = 3, ++ /* How often to not attempt to use elision if a lock used up all retries ++ without success. Expressed in number of lock acquisition attempts. */ ++ .skip_lock_out_of_tbegin_retries = 3, ++ /* How often we retry using elision if there is chance for the transaction ++ to finish execution (e.g., it wasn't aborted due to the lock being ++ already acquired. */ ++ .try_tbegin = 3, ++ /* Same as SKIP_LOCK_INTERNAL_ABORT but for trylock. */ ++ .skip_trylock_internal_abort = 3, ++ }; ++ ++/* Force elision for all new locks. This is used to decide whether existing ++ DEFAULT locks should be automatically use elision in pthread_mutex_lock(). ++ Disabled for suid programs. Only used when elision is available. */ ++ ++int __pthread_force_elision attribute_hidden; ++ ++/* Initialize elision. */ ++ ++static void ++elision_init (int argc __attribute__ ((unused)), ++ char **argv __attribute__ ((unused)), ++ char **environ) ++{ ++#ifdef ENABLE_LOCK_ELISION ++ int elision_available = (GLRO (dl_hwcap2) & PPC_FEATURE2_HAS_HTM) ? 1 : 0; ++ __pthread_force_elision = __libc_enable_secure ? 0 : elision_available; ++#endif ++} ++ ++#ifdef SHARED ++# define INIT_SECTION ".init_array" ++# define MAYBE_CONST ++#else ++# define INIT_SECTION ".preinit_array" ++# define MAYBE_CONST const ++#endif ++ ++void (*MAYBE_CONST __pthread_init_array []) (int, char **, char **) ++ __attribute__ ((section (INIT_SECTION), aligned (sizeof (void *)))) = ++{ ++ &elision_init ++}; +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-conf.h +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-conf.h +@@ -0,0 +1,42 @@ ++/* elision-conf.h: Lock elision tunable parameters. ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _ELISION_CONF_H ++#define _ELISION_CONF_H 1 ++ ++#include ++#include ++ ++/* Should make sure there is no false sharing on this. */ ++struct elision_config ++{ ++ int skip_lock_busy; ++ int skip_lock_internal_abort; ++ int skip_lock_out_of_tbegin_retries; ++ int try_tbegin; ++ int skip_trylock_internal_abort; ++} __attribute__ ((__aligned__ (128))); ++ ++extern struct elision_config __elision_aconf attribute_hidden; ++ ++extern int __pthread_force_elision attribute_hidden; ++ ++/* Tell the test suite to test elision for this architecture. */ ++#define HAVE_ELISION 1 ++ ++#endif +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-lock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-lock.c +@@ -0,0 +1,107 @@ ++/* elision-lock.c: Elided pthread mutex lock. ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "htm.h" ++ ++/* PowerISA 2.0.7 Section B.5.5 defines isync to be insufficient as a ++ barrier in acquire mechanism for HTM operations, a strong 'sync' is ++ required. */ ++#undef __arch_compare_and_exchange_val_32_acq ++#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \ ++ ({ \ ++ __typeof (*(mem)) __tmp; \ ++ __typeof (mem) __memp = (mem); \ ++ __asm __volatile ( \ ++ "1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" \ ++ " cmpw %0,%2\n" \ ++ " bne 2f\n" \ ++ " stwcx. %3,0,%1\n" \ ++ " bne- 1b\n" \ ++ "2: sync" \ ++ : "=&r" (__tmp) \ ++ : "b" (__memp), "r" (oldval), "r" (newval) \ ++ : "cr0", "memory"); \ ++ __tmp; \ ++ }) ++ ++#if !defined(LLL_LOCK) && !defined(EXTRAARG) ++/* Make sure the configuration code is always linked in for static ++ libraries. */ ++#include "elision-conf.c" ++#endif ++ ++#ifndef EXTRAARG ++# define EXTRAARG ++#endif ++#ifndef LLL_LOCK ++# define LLL_LOCK(a,b) lll_lock(a,b), 0 ++#endif ++ ++#define aconf __elision_aconf ++ ++/* Adaptive lock using transactions. ++ By default the lock region is run as a transaction, and when it ++ aborts or the lock is busy the lock adapts itself. */ ++ ++int ++__lll_lock_elision (int *lock, short *adapt_count, EXTRAARG int pshared) ++{ ++ if (*adapt_count > 0) ++ { ++ (*adapt_count)--; ++ goto use_lock; ++ } ++ ++ int try_begin = aconf.try_tbegin; ++ while (1) ++ { ++ if (__builtin_tbegin (0)) ++ { ++ if (*lock == 0) ++ return 0; ++ /* Lock was busy. Fall back to normal locking. */ ++ __builtin_tabort (_ABORT_LOCK_BUSY); ++ } ++ else ++ { ++ /* A persistent failure indicates that a retry will probably ++ result in another failure. Use normal locking now and ++ for the next couple of calls. */ ++ if (try_begin-- <= 0 ++ || _TEXASRU_FAILURE_PERSISTENT (__builtin_get_texasru ())) ++ { ++ if (aconf.skip_lock_internal_abort > 0) ++ *adapt_count = aconf.skip_lock_internal_abort; ++ goto use_lock; ++ } ++ /* Same logic as above, but for for a number of temporary failures ++ in a row. */ ++ else if (aconf.skip_lock_out_of_tbegin_retries > 0 ++ && aconf.try_tbegin > 0) ++ *adapt_count = aconf.skip_lock_out_of_tbegin_retries; ++ } ++ } ++ ++use_lock: ++ return LLL_LOCK ((*lock), pshared); ++} +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-timed.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-timed.c +@@ -0,0 +1,28 @@ ++/* elision-timed.c: Lock elision timed lock. ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#define __lll_lock_elision __lll_timedlock_elision ++#define EXTRAARG const struct timespec *t, ++#undef LLL_LOCK ++#define LLL_LOCK(a, b) lll_timedlock(a, t, b) ++ ++#include "elision-lock.c" +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c +@@ -0,0 +1,68 @@ ++/* elision-trylock.c: Lock eliding trylock for pthreads. ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include "htm.h" ++ ++#define aconf __elision_aconf ++ ++/* Try to elide a futex trylock. FUTEX is the futex variable. ADAPT_COUNT is ++ the adaptation counter in the mutex. */ ++ ++int ++__lll_trylock_elision (int *futex, short *adapt_count) ++{ ++ /* Implement POSIX semantics by forbiding nesting elided trylocks. */ ++ __builtin_tabort (_ABORT_NESTED_TRYLOCK); ++ ++ /* Only try a transaction if it's worth it. */ ++ if (*adapt_count > 0) ++ { ++ (*adapt_count)--; ++ goto use_lock; ++ } ++ ++ if (__builtin_tbegin (0)) ++ { ++ if (*futex == 0) ++ return 0; ++ ++ /* Lock was busy. Fall back to normal locking. */ ++ __builtin_tabort (_ABORT_LOCK_BUSY); ++ } ++ else ++ { ++ if (_TEXASRU_FAILURE_PERSISTENT (__builtin_get_texasru ())) ++ { ++ /* A persistent failure indicates that a retry will probably ++ result in another failure. Use normal locking now and ++ for the next couple of calls. */ ++ if (aconf.skip_trylock_internal_abort > 0) ++ *adapt_count = aconf.skip_trylock_internal_abort; ++ } ++ ++ if (aconf.skip_lock_busy > 0) ++ *adapt_count = aconf.skip_lock_busy; ++ } ++ ++use_lock: ++ return lll_trylock (*futex); ++} +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c +@@ -0,0 +1,32 @@ ++/* elision-unlock.c: Commit an elided pthread lock. ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "pthreadP.h" ++#include ++#include "htm.h" ++ ++int ++__lll_unlock_elision(int *lock, int pshared) ++{ ++ /* When the lock was free we're in a transaction. */ ++ if (*lock == 0) ++ __builtin_tend (0); ++ else ++ lll_unlock ((*lock), pshared); ++ return 0; ++} +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/force-elision.h +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/force-elision.h +@@ -0,0 +1,28 @@ ++/* force-elision.h: Automatic enabling of elision for mutexes ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifdef ENABLE_LOCK_ELISION ++/* Automatically enable elision for existing user lock kinds. */ ++#define FORCE_ELISION(m, s) \ ++ if (__pthread_force_elision \ ++ && (m->__data.__kind & PTHREAD_MUTEX_ELISION_FLAGS_NP) == 0) \ ++ { \ ++ mutex->__data.__kind |= PTHREAD_MUTEX_ELISION_NP; \ ++ s; \ ++ } ++#endif +Index: glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/htm.h +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/htm.h +@@ -0,0 +1,138 @@ ++/* Shared HTM header. Emulate transactional execution facility intrinsics for ++ compilers and assemblers that do not support the intrinsics and instructions ++ yet. ++ ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _HTM_H ++#define _HTM_H 1 ++ ++#ifdef __ASSEMBLER__ ++ ++/* tbegin. */ ++.macro TBEGIN ++ .long 0x7c00051d ++.endm ++ ++/* tend. 0 */ ++.macro TEND ++ .long 0x7c00055d ++.endm ++ ++/* tabort. code */ ++.macro TABORT code ++ .byte 0x7c ++ .byte \code ++ .byte 0x07 ++ .byte 0x1d ++.endm ++ ++/*"TEXASR - Transaction EXception And Summary Register" ++ mfspr %dst,130 */ ++.macro TEXASR dst ++ mfspr \dst,130 ++.endm ++ ++#else ++ ++#include ++ ++/* Official HTM intrinsics interface matching GCC, but works ++ on older GCC compatible compilers and binutils. ++ We should somehow detect if the compiler supports it, because ++ it may be able to generate slightly better code. */ ++ ++#define TBEGIN ".long 0x7c00051d" ++#define TEND ".long 0x7c00055d" ++#if __BYTE_ORDER == __LITTLE_ENDIAN ++# define TABORT ".byte 0x1d,0x07,%1,0x1d" ++#else ++# define TABORT ".byte 0x7c,%1,0x07,0x1d" ++#endif ++ ++#define __force_inline inline __attribute__((__always_inline__)) ++ ++#ifndef __HTM__ ++ ++#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \ ++ (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1)) ++#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \ ++ _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1) ++ ++#define _tbegin() \ ++ ({ unsigned int __ret; \ ++ asm volatile ( \ ++ TBEGIN "\t\n" \ ++ "mfcr %0\t\n" \ ++ "rlwinm %0,%0,3,1\t\n" \ ++ "xori %0,%0,1\t\n" \ ++ : "=r" (__ret) : \ ++ : "cr0", "memory"); \ ++ __ret; \ ++ }) ++ ++#define _tend() \ ++ ({ unsigned int __ret; \ ++ asm volatile ( \ ++ TEND "\t\n" \ ++ "mfcr %0\t\n" \ ++ "rlwinm %0,%0,3,1\t\n" \ ++ "xori %0,%0,1\t\n" \ ++ : "=r" (__ret) : \ ++ : "cr0", "memory"); \ ++ __ret; \ ++ }) ++ ++#define _tabort(__code) \ ++ ({ unsigned int __ret; \ ++ asm volatile ( \ ++ TABORT "\t\n" \ ++ "mfcr %0\t\n" \ ++ "rlwinm %0,%0,3,1\t\n" \ ++ "xori %0,%0,1\t\n" \ ++ : "=r" (__ret) : "r" (__code) \ ++ : "cr0", "memory"); \ ++ __ret; \ ++ }) ++ ++#define _texasru() \ ++ ({ unsigned long __ret; \ ++ asm volatile ( \ ++ "mfspr %0,131\t\n" \ ++ : "=r" (__ret)); \ ++ __ret; \ ++ }) ++ ++#define __builtin_tbegin(tdb) _tbegin () ++#define __builtin_tend(nested) _tend () ++#define __builtin_tabort(abortcode) _tabort (abortcode) ++#define __builtin_get_texasru() _texasru () ++ ++#else ++# include ++#endif /* __HTM__ */ ++ ++#endif /* __ASSEMBLER__ */ ++ ++/* Definitions used for TEXASR Failure code (bits 0:6), they need to be even ++ because tabort. always sets the first bit. */ ++#define _ABORT_LOCK_BUSY 0x3f /* Lock already used. */ ++#define _ABORT_NESTED_TRYLOCK 0x3e /* Write operation in trylock. */ ++#define _ABORT_SYSCALL 0x3d /* Syscall issued. */ ++ ++#endif +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h +@@ -326,4 +326,28 @@ extern int __lll_timedwait_tid (int *, c + __res; \ + }) + ++/* Transactional lock elision definitions. */ ++extern int __lll_timedlock_elision ++ (int *futex, short *adapt_count, const struct timespec *timeout, int private) ++ attribute_hidden; ++ ++#define lll_timedlock_elision(futex, adapt_count, timeout, private) \ ++ __lll_timedlock_elision(&(futex), &(adapt_count), timeout, private) ++ ++extern int __lll_lock_elision (int *futex, short *adapt_count, int private) ++ attribute_hidden; ++ ++extern int __lll_unlock_elision(int *lock, int private) ++ attribute_hidden; ++ ++extern int __lll_trylock_elision(int *lock, short *adapt_count) ++ attribute_hidden; ++ ++#define lll_lock_elision(futex, adapt_count, private) \ ++ __lll_lock_elision (&(futex), &(adapt_count), private) ++#define lll_unlock_elision(futex, private) \ ++ __lll_unlock_elision (&(futex), private) ++#define lll_trylock_elision(futex, adapt_count) \ ++ __lll_trylock_elision (&(futex), &(adapt_count)) ++ + #endif /* lowlevellock.h */ +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_mutex_cond_lock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_mutex_cond_lock.c +@@ -0,0 +1,22 @@ ++/* Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* The cond lock is not actually elided yet, but we still need to handle ++ already elided locks. */ ++#include ++ ++#include "sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c" +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_mutex_lock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_mutex_lock.c +@@ -0,0 +1,22 @@ ++/* Elided version of pthread_mutex_lock. ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#include +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_mutex_timedlock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_mutex_timedlock.c +@@ -0,0 +1,22 @@ ++/* Elided version of pthread_mutex_timedlock. ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#include +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_mutex_trylock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_mutex_trylock.c +@@ -0,0 +1,22 @@ ++/* Elided version of pthread_mutex_trylock. ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#include +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h +@@ -90,14 +90,25 @@ typedef union + binary compatibility. */ + int __kind; + #if __WORDSIZE == 64 +- int __spins; ++ short __spins; ++ short __elision; + __pthread_list_t __list; + # define __PTHREAD_MUTEX_HAVE_PREV 1 ++# define __PTHREAD_SPINS 0, 0 ++# define __PTHREAD_MUTEX_HAVE_ELISION 1 + #else + unsigned int __nusers; + __extension__ union + { +- int __spins; ++ struct ++ { ++ short __espins; ++ short __elision; ++# define __spins __elision_data.__espins ++# define __elision __elision_data.__elision ++# define __PTHREAD_SPINS { 0, 0 } ++# define __PTHREAD_MUTEX_HAVE_ELISION 2 ++ } __elision_data; + __pthread_slist_t __list; + }; + #endif +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/Makefile +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/powerpc/Makefile ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/Makefile +@@ -1,2 +1,4 @@ + # pull in __syscall_error routine + libpthread-routines += sysdep ++libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \ ++ elision-trylock diff --git a/SOURCES/glibc-rh731835-1.patch b/SOURCES/glibc-rh731835-1.patch new file mode 100644 index 0000000..a47d2aa --- /dev/null +++ b/SOURCES/glibc-rh731835-1.patch @@ -0,0 +1,215 @@ +commit 56cf2763819d2f721c98f2b8bcc04a3c673837d3 +Author: Adhemerval Zanella +Date: Fri Nov 7 12:34:52 2014 -0500 + + powerpc: abort transaction in syscalls + + Linux kernel powerpc documentation states issuing a syscall inside a + transaction is not recommended and may lead to undefined behavior. It + also states syscalls does not abort transactoin neither they run in + transactional state. + + To avoid side-effects being visible outside transactions, GLIBC with + lock elision enabled will issue a transaction abort instruction just + before all syscalls if hardware supports hardware transactions. + +Index: glibc-2.17-c758a686/nptl/sysdeps/powerpc/tcb-offsets.sym +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/powerpc/tcb-offsets.sym ++++ glibc-2.17-c758a686/nptl/sysdeps/powerpc/tcb-offsets.sym +@@ -15,6 +15,7 @@ MULTIPLE_THREADS_OFFSET thread_offsetof + PID thread_offsetof (pid) + TID thread_offsetof (tid) + POINTER_GUARD (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) ++TM_CAPABLE (offsetof (tcbhead_t, tm_capable) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) + #ifndef __ASSUME_PRIVATE_FUTEX + PRIVATE_FUTEX_OFFSET thread_offsetof (header.private_futex) + #endif +Index: glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/powerpc/tls.h ++++ glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h +@@ -61,6 +61,15 @@ typedef union dtv + are private. */ + typedef struct + { ++ /* Indicate if HTM capable (ISA 2.07). */ ++ uint32_t tm_capable; ++ /* Reservation for AT_PLATFORM data - powerpc64. */ ++#ifdef __powerpc64__ ++ uint32_t at_platform; ++#endif ++ /* Reservation for Dynamic System Optimizer ABI. */ ++ uintptr_t dso_slot2; ++ uintptr_t dso_slot1; + /* GCC split stack support. */ + void *__private_ss; + /* Reservation for the Event-Based Branching ABI. */ +@@ -123,7 +132,11 @@ register void *__thread_register __asm__ + special attention since 'errno' is not yet available and if the + operation can cause a failure 'errno' must not be touched. */ + # define TLS_INIT_TP(tcbp, secondcall) \ +- (__thread_register = (void *) (tcbp) + TLS_TCB_OFFSET, NULL) ++ ({ \ ++ __thread_register = (void *) (tcbp) + TLS_TCB_OFFSET; \ ++ THREAD_SET_TM_CAPABLE (GLRO (dl_hwcap2) & PPC_FEATURE2_HAS_HTM ? 1 : 0); \ ++ NULL; \ ++ }) + + /* Return the address of the dtv for the current thread. */ + # define THREAD_DTV() \ +@@ -177,6 +190,13 @@ register void *__thread_register __asm__ + + TLS_PRE_TCB_SIZE))[-1].pointer_guard \ + = THREAD_GET_POINTER_GUARD()) + ++/* tm_capable field in TCB head. */ ++# define THREAD_GET_TM_CAPABLE() \ ++ (((tcbhead_t *) ((char *) __thread_register \ ++ - TLS_TCB_OFFSET))[-1].tm_capable) ++# define THREAD_SET_TM_CAPABLE(value) \ ++ (THREAD_GET_TM_CAPABLE () = (value)) ++ + /* l_tls_offset == 0 is perfectly valid on PPC, so we have to use some + different value to mean unset l_tls_offset. */ + # define NO_TLS_OFFSET -1 +Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/sysdep.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/powerpc/powerpc32/sysdep.h ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/sysdep.h +@@ -89,7 +89,23 @@ GOT_LABEL: ; \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(name) + ++#if ! IS_IN(rtld) && defined (ENABLE_LOCK_ELISION) ++# define ABORT_TRANSACTION \ ++ cmpwi 2,0; \ ++ beq 1f; \ ++ lwz 0,TM_CAPABLE(2); \ ++ cmpwi 0,0; \ ++ beq 1f; \ ++ li 0,_ABORT_SYSCALL; \ ++ tabort. 0; \ ++ .align 4; \ ++1: ++#else ++# define ABORT_TRANSACTION ++#endif ++ + #define DO_CALL(syscall) \ ++ ABORT_TRANSACTION \ + li 0,syscall; \ + sc + +Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/powerpc/powerpc64/sysdep.h ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h +@@ -283,7 +283,23 @@ LT_LABELSUFFIX(name,_name_end): ; \ + TRACEBACK_MASK(name,mask) \ + END_2(name) + ++#if !IS_IN(rtld) && defined (ENABLE_LOCK_ELISION) ++# define ABORT_TRANSACTION \ ++ cmpdi 13,0; \ ++ beq 1f; \ ++ lwz 0,TM_CAPABLE(13); \ ++ cmpwi 0,0; \ ++ beq 1f; \ ++ li 0,_ABORT_SYSCALL; \ ++ tabort. 0; \ ++ .align 4; \ ++1: ++#else ++# define ABORT_TRANSACTION ++#endif ++ + #define DO_CALL(syscall) \ ++ ABORT_TRANSACTION \ + li 0,syscall; \ + sc + +Index: glibc-2.17-c758a686/sysdeps/powerpc/sysdep.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/powerpc/sysdep.h ++++ glibc-2.17-c758a686/sysdeps/powerpc/sysdep.h +@@ -21,6 +21,10 @@ + */ + #define _SYS_AUXV_H 1 + #include ++#ifdef ENABLE_LOCK_ELISION ++#include ++#include ++#endif + + #define PPC_FEATURE_970 (PPC_FEATURE_POWER4 + PPC_FEATURE_HAS_ALTIVEC) + +@@ -164,4 +168,22 @@ + #define ALIGNARG(log2) log2 + #define ASM_SIZE_DIRECTIVE(name) .size name,.-name + ++#else ++ ++/* Linux kernel powerpc documentation [1] states issuing a syscall inside a ++ transaction is not recommended and may lead to undefined behavior. It ++ also states syscalls do not abort transactions. To avoid such traps, ++ we abort transaction just before syscalls. ++ ++ [1] Documentation/powerpc/transactional_memory.txt [Syscalls] */ ++#if !IS_IN(rtld) && defined (ENABLE_LOCK_ELISION) ++# define ABORT_TRANSACTION \ ++ ({ \ ++ if (THREAD_GET_TM_CAPABLE ()) \ ++ __builtin_tabort (_ABORT_SYSCALL); \ ++ }) ++#else ++# define ABORT_TRANSACTION ++#endif ++ + #endif /* __ASSEMBLER__ */ +Index: glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h +@@ -194,6 +194,7 @@ + register long int r11 __asm__ ("r11"); \ + register long int r12 __asm__ ("r12"); \ + LOADARGS_##nr(name, args); \ ++ ABORT_TRANSACTION; \ + __asm__ __volatile__ \ + ("sc \n\t" \ + "mfcr %0" \ +Index: glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h +@@ -201,6 +201,7 @@ + register long int r7 __asm__ ("r7"); \ + register long int r8 __asm__ ("r8"); \ + LOADARGS_##nr (name, ##args); \ ++ ABORT_TRANSACTION; \ + __asm__ __volatile__ \ + ("sc\n\t" \ + "mfcr %0\n\t" \ +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/createthread.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/powerpc/createthread.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/createthread.c +@@ -16,9 +16,16 @@ + License along with the GNU C Library; if not, see + . */ + ++/* RHEL 7-specific changes: The functions PREPARE_CREATE and TLS_VALUE ++ are used by createthread.c to override thread setup. In upstream ++ they appear in TLS_DEFINE_INIT_TP. */ ++# define PREPARE_CREATE \ ++ void *tp = (void *) (pd) + TLS_TCB_OFFSET + TLS_PRE_TCB_SIZE; \ ++ (((tcbhead_t *) ((char *) tp - TLS_TCB_OFFSET))[-1].tm_capable) = \ ++ THREAD_GET_TM_CAPABLE (); ++ + /* Value passed to 'clone' for initialization of the thread register. */ +-#define TLS_VALUE ((void *) (pd) \ +- + TLS_TCB_OFFSET + TLS_PRE_TCB_SIZE) ++# define TLS_VALUE tp + + /* Get the real implementation. */ + #include diff --git a/SOURCES/glibc-rh731835-2.patch b/SOURCES/glibc-rh731835-2.patch new file mode 100644 index 0000000..ea314ac --- /dev/null +++ b/SOURCES/glibc-rh731835-2.patch @@ -0,0 +1,43 @@ +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-conf.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/powerpc/elision-conf.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/elision-conf.c +@@ -61,7 +61,14 @@ elision_init (int argc __attribute__ ((u + { + #ifdef ENABLE_LOCK_ELISION + int elision_available = (GLRO (dl_hwcap2) & PPC_FEATURE2_HAS_HTM) ? 1 : 0; +- __pthread_force_elision = __libc_enable_secure ? 0 : elision_available; ++ if (!__libc_enable_secure && elision_available) ++ { ++ __pthread_force_elision = GLRO(dl_elision_enabled); ++ } ++ else ++ { ++ __pthread_force_elision = 0; ++ } + #endif + } + +Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/multiarch/strstr.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/powerpc/powerpc64/multiarch/strstr.c ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/multiarch/strstr.c +@@ -17,7 +17,10 @@ + . */ + + /* Define multiple versions only for definition in libc. */ +-#if IS_IN (libc) ++/* RHEL 7-specific: Define multiple versions only for the definition in ++ libc. Don't define multiple versions for strstr in static library ++ since we need strstr before initialization has happened. */ ++#if defined SHARED && IS_IN (libc) + # include + # include + # include "init-arch.h" +@@ -31,4 +34,6 @@ libc_ifunc (strstr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strstr_power7 + : __strstr_ppc); ++#else ++#include "string/strstr.c" + #endif diff --git a/SOURCES/glibc-rh841653-0.patch b/SOURCES/glibc-rh841653-0.patch new file mode 100644 index 0000000..e57b33f --- /dev/null +++ b/SOURCES/glibc-rh841653-0.patch @@ -0,0 +1,761 @@ +In RHEL7 we already have the newer cpu-feature support so we need +to backport b376899d2 to get 1cdbe5794 to compile. The goal wtih elision +is an incremental set of patches each which compile and introduce the +required functionality for elision. + +Partial backport of: + +commit b376899d27e5ac892f0339cf1bbb3d2158347db8 +Author: H.J. Lu +Date: Thu Aug 13 03:40:40 2015 -0700 + + Update x86 elision-conf.c for + + This patch updates x86 elision-conf.c to use the newly defined + HAS_CPU_FEATURE from . + + * sysdeps/unix/sysv/linux/x86/elision-conf.c (elision_init): + Replace HAS_RTM with HAS_CPU_FEATURE (RTM). + + +Full backport of: + +commit 1717da59aed9612becd56aaa1249aac695af4c8a +Author: Andi Kleen +Date: Thu May 16 19:17:14 2013 -0700 + + Add a configure option to enable lock elision and disable by default + + Can be enabled with --enable-lock-elision=yes at configure time. + +commit 1cdbe579482c07e9f4bb3baa4864da2d3e7eb837 +Author: Andi Kleen +Date: Sat Nov 10 00:51:26 2012 -0800 + + Add the low level infrastructure for pthreads lock elision with TSX + + Lock elision using TSX is a technique to optimize lock scaling + It allows to run locks in parallel using hardware support for + a transactional execution mode in 4th generation Intel Core CPUs. + See http://www.intel.com/software/tsx for more Information. + + This patch implements a simple adaptive lock elision algorithm based + on RTM. It enables elision for the pthread mutexes and rwlocks. + The algorithm keeps track whether a mutex successfully elides or not, + and stops eliding for some time when it is not. + + When the CPU supports RTM the elision path is automatically tried, + otherwise any elision is disabled. + + The adaptation algorithm and its tuning is currently preliminary. + + The code adds some checks to the lock fast paths. Micro-benchmarks + show little to no difference without RTM. + + This patch implements the low level "lll_" code for lock elision. + Followon patches hook this into the pthread implementation + + Changes with the RTM mutexes: + ----------------------------- + Lock elision in pthreads is generally compatible with existing programs. + There are some obscure exceptions, which are expected to be uncommon. + See the manual for more details. + + - A broken program that unlocks a free lock will crash. + There are ways around this with some tradeoffs (more code in hot paths) + I'm still undecided on what approach to take here; have to wait for testing reports. + - pthread_mutex_destroy of a lock mutex will not return EBUSY but 0. + - There's also a similar situation with trylock outside the mutex, + "knowing" that the mutex must be held due to some other condition. + In this case an assert failure cannot be recovered. This situation is + usually an existing bug in the program. + - Same applies to the rwlocks. Some of the return values changes + (for example there is no EDEADLK for an elided lock, unless it aborts. + However when elided it will also never deadlock of course) + - Timing changes, so broken programs that make assumptions about specific timing + may expose already existing latent problems. Note that these broken programs will + break in other situations too (loaded system, new faster hardware, compiler + optimizations etc.) + - Programs with non recursive mutexes that take them recursively in a thread and + which would always deadlock without elision may not always see a deadlock. + The deadlock will only happen on an early or delayed abort (which typically + happens at some point) + This only happens for mutexes not explicitely set to PTHREAD_MUTEX_NORMAL + or PTHREAD_MUTEX_ADAPTIVE_NP. PTHREAD_MUTEX_NORMAL mutexes do not elide. + + The elision default can be set at configure time. + + This patch implements the basic infrastructure for elision. +Index: glibc-2.17-c758a686/nptl/elision-conf.h +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/elision-conf.h +@@ -0,0 +1 @@ ++/* empty */ +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h +@@ -430,6 +430,12 @@ LLL_STUB_UNWIND_INFO_END + : "memory"); \ + result; }) + ++extern int __lll_timedlock_elision (int *futex, short *adapt_count, ++ const struct timespec *timeout, ++ int private) attribute_hidden; ++ ++#define lll_timedlock_elision(futex, adapt_count, timeout, private) \ ++ __lll_timedlock_elision(&(futex), &(adapt_count), timeout, private) + + #define lll_robust_timedlock(futex, timeout, id, private) \ + ({ int result, ignore1, ignore2, ignore3; \ +@@ -583,6 +589,22 @@ extern int __lll_timedwait_tid (int *tid + } \ + __result; }) + ++extern int __lll_lock_elision (int *futex, short *adapt_count, int private) ++ attribute_hidden; ++ ++extern int __lll_unlock_elision(int *lock, int private) ++ attribute_hidden; ++ ++extern int __lll_trylock_elision(int *lock, short *adapt_count) ++ attribute_hidden; ++ ++#define lll_lock_elision(futex, adapt_count, private) \ ++ __lll_lock_elision (&(futex), &(adapt_count), private) ++#define lll_unlock_elision(futex, private) \ ++ __lll_unlock_elision (&(futex), private) ++#define lll_trylock_elision(futex, adapt_count) \ ++ __lll_trylock_elision(&(futex), &(adapt_count)) ++ + #endif /* !__ASSEMBLER__ */ + + #endif /* lowlevellock.h */ +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/Makefile +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/Makefile +@@ -0,0 +1,2 @@ ++libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \ ++ elision-trylock +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c +@@ -0,0 +1,90 @@ ++/* elision-conf.c: Lock elision tunable parameters. ++ Copyright (C) 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "config.h" ++#include ++#include ++#include ++#include ++ ++/* Reasonable initial tuning values, may be revised in the future. ++ This is a conservative initial value. */ ++ ++struct elision_config __elision_aconf = ++ { ++ /* How often to not attempt to use elision if a transaction aborted ++ because the lock is already acquired. Expressed in number of lock ++ acquisition attempts. */ ++ .skip_lock_busy = 3, ++ /* How often to not attempt to use elision if a transaction aborted due ++ to reasons other than other threads' memory accesses. Expressed in ++ number of lock acquisition attempts. */ ++ .skip_lock_internal_abort = 3, ++ /* How often we retry using elision if there is chance for the transaction ++ to finish execution (e.g., it wasn't aborted due to the lock being ++ already acquired. */ ++ .retry_try_xbegin = 3, ++ /* Same as SKIP_LOCK_INTERNAL_ABORT but for trylock. */ ++ .skip_trylock_internal_abort = 3, ++ }; ++ ++/* Elided rwlock toggle, set when elision is available and is ++ enabled for rwlocks. */ ++ ++int __rwlock_rtm_enabled attribute_hidden; ++ ++/* Retries for elided rwlocks on read. Conservative initial value. */ ++ ++int __rwlock_rtm_read_retries attribute_hidden = 3; ++ ++/* Set when the CPU supports elision. When false elision is never attempted. */ ++ ++int __elision_available attribute_hidden; ++ ++/* Force elision for all new locks. This is used to decide whether existing ++ DEFAULT locks should be automatically upgraded to elision in ++ pthread_mutex_lock(). Disabled for suid programs. Only used when elision ++ is available. */ ++ ++int __pthread_force_elision attribute_hidden; ++ ++/* Initialize elison. */ ++ ++static void ++elision_init (int argc __attribute__ ((unused)), ++ char **argv __attribute__ ((unused)), ++ char **environ) ++{ ++ __elision_available = HAS_CPU_FEATURE (RTM); ++#ifdef ENABLE_LOCK_ELISION ++ __pthread_force_elision = __libc_enable_secure ? 0 : __elision_available; ++ __rwlock_rtm_enabled = __libc_enable_secure ? 0 : __elision_available; ++#endif ++} ++ ++#ifdef SHARED ++# define INIT_SECTION ".init_array" ++#else ++# define INIT_SECTION ".preinit_array" ++#endif ++ ++void (*const __pthread_init_array []) (int, char **, char **) ++ __attribute__ ((section (INIT_SECTION), aligned (sizeof (void *)))) = ++{ ++ &elision_init ++}; +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.h +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.h +@@ -0,0 +1,44 @@ ++/* elision-conf.h: Lock elision tunable parameters. ++ Copyright (C) 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++#ifndef _ELISION_CONF_H ++#define _ELISION_CONF_H 1 ++ ++#include ++#include ++#include ++ ++/* Should make sure there is no false sharing on this. */ ++ ++struct elision_config ++{ ++ int skip_lock_busy; ++ int skip_lock_internal_abort; ++ int retry_try_xbegin; ++ int skip_trylock_internal_abort; ++}; ++ ++extern struct elision_config __elision_aconf attribute_hidden; ++ ++extern int __rwlock_rtm_enabled attribute_hidden; ++extern int __elision_available attribute_hidden; ++extern int __pthread_force_elision attribute_hidden; ++ ++/* Tell the test suite to test elision for this architecture. */ ++#define HAVE_ELISION 1 ++ ++#endif +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c +@@ -0,0 +1,95 @@ ++/* elision-lock.c: Elided pthread mutex lock. ++ Copyright (C) 2011-2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include "pthreadP.h" ++#include "lowlevellock.h" ++#include "hle.h" ++#include ++ ++#if !defined(LLL_LOCK) && !defined(EXTRAARG) ++/* Make sure the configuration code is always linked in for static ++ libraries. */ ++#include "elision-conf.c" ++#endif ++ ++#ifndef EXTRAARG ++#define EXTRAARG ++#endif ++#ifndef LLL_LOCK ++#define LLL_LOCK(a,b) lll_lock(a,b), 0 ++#endif ++ ++#define aconf __elision_aconf ++ ++/* Adaptive lock using transactions. ++ By default the lock region is run as a transaction, and when it ++ aborts or the lock is busy the lock adapts itself. */ ++ ++int ++__lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) ++{ ++ if (*adapt_count <= 0) ++ { ++ unsigned status; ++ int try_xbegin; ++ ++ for (try_xbegin = aconf.retry_try_xbegin; ++ try_xbegin > 0; ++ try_xbegin--) ++ { ++ if ((status = _xbegin()) == _XBEGIN_STARTED) ++ { ++ if (*futex == 0) ++ return 0; ++ ++ /* Lock was busy. Fall back to normal locking. ++ Could also _xend here but xabort with 0xff code ++ is more visible in the profiler. */ ++ _xabort (_ABORT_LOCK_BUSY); ++ } ++ ++ if (!(status & _XABORT_RETRY)) ++ { ++ if ((status & _XABORT_EXPLICIT) ++ && _XABORT_CODE (status) == _ABORT_LOCK_BUSY) ++ { ++ /* Right now we skip here. Better would be to wait a bit ++ and retry. This likely needs some spinning. */ ++ if (*adapt_count != aconf.skip_lock_busy) ++ *adapt_count = aconf.skip_lock_busy; ++ } ++ /* Internal abort. There is no chance for retry. ++ Use the normal locking and next time use lock. ++ Be careful to avoid writing to the lock. */ ++ else if (*adapt_count != aconf.skip_lock_internal_abort) ++ *adapt_count = aconf.skip_lock_internal_abort; ++ break; ++ } ++ } ++ } ++ else ++ { ++ /* Use a normal lock until the threshold counter runs out. ++ Lost updates possible. */ ++ (*adapt_count)--; ++ } ++ ++ /* Use a normal lock here. */ ++ return LLL_LOCK ((*futex), private); ++} +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-timed.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-timed.c +@@ -0,0 +1,26 @@ ++/* elision-timed.c: Lock elision timed lock. ++ Copyright (C) 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include "lowlevellock.h" ++#define __lll_lock_elision __lll_timedlock_elision ++#define EXTRAARG const struct timespec *t, ++#undef LLL_LOCK ++#define LLL_LOCK(a, b) lll_timedlock(a, t, b) ++#include "elision-lock.c" +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c +@@ -0,0 +1,72 @@ ++/* elision-trylock.c: Lock eliding trylock for pthreads. ++ Copyright (C) 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include "hle.h" ++#include ++ ++#define aconf __elision_aconf ++ ++/* Try to elide a futex trylock. FUTEX is the futex variable. TRY_LOCK is the ++ adaptation counter in the mutex. UPGRADED is != 0 when this is for an ++ automatically upgraded lock. */ ++ ++int ++__lll_trylock_elision (int *futex, short *adapt_count) ++{ ++ /* Implement POSIX semantics by forbiding nesting ++ trylock. Sorry. After the abort the code is re-executed ++ non transactional and if the lock was already locked ++ return an error. */ ++ _xabort (_ABORT_NESTED_TRYLOCK); ++ ++ /* Only try a transaction if it's worth it. */ ++ if (*adapt_count <= 0) ++ { ++ unsigned status; ++ ++ if ((status = _xbegin()) == _XBEGIN_STARTED) ++ { ++ if (*futex == 0) ++ return 0; ++ ++ /* Lock was busy. Fall back to normal locking. ++ Could also _xend here but xabort with 0xff code ++ is more visible in the profiler. */ ++ _xabort (_ABORT_LOCK_BUSY); ++ } ++ ++ if (!(status & _XABORT_RETRY)) ++ { ++ /* Internal abort. No chance for retry. For future ++ locks don't try speculation for some time. */ ++ if (*adapt_count != aconf.skip_trylock_internal_abort) ++ *adapt_count = aconf.skip_trylock_internal_abort; ++ } ++ /* Could do some retries here. */ ++ } ++ else ++ { ++ /* Lost updates are possible, but harmless. */ ++ (*adapt_count)--; ++ } ++ ++ return lll_trylock (*futex); ++} +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-unlock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-unlock.c +@@ -0,0 +1,33 @@ ++/* elision-unlock.c: Commit an elided pthread lock. ++ Copyright (C) 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "pthreadP.h" ++#include "lowlevellock.h" ++#include "hle.h" ++ ++int ++__lll_unlock_elision(int *lock, int private) ++{ ++ /* When the lock was free we're in a transaction. ++ When you crash here you unlocked a free lock. */ ++ if (*lock == 0) ++ _xend(); ++ else ++ lll_unlock ((*lock), private); ++ return 0; ++} +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/hle.h +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/hle.h +@@ -0,0 +1,75 @@ ++/* Shared RTM header. Emulate TSX intrinsics for compilers and assemblers ++ that do not support the intrinsics and instructions yet. */ ++#ifndef _HLE_H ++#define _HLE_H 1 ++ ++#ifdef __ASSEMBLER__ ++ ++.macro XBEGIN target ++ .byte 0xc7,0xf8 ++ .long \target-1f ++1: ++.endm ++ ++.macro XEND ++ .byte 0x0f,0x01,0xd5 ++.endm ++ ++.macro XABORT code ++ .byte 0xc6,0xf8,\code ++.endm ++ ++.macro XTEST ++ .byte 0x0f,0x01,0xd6 ++.endm ++ ++#endif ++ ++/* Official RTM intrinsics interface matching gcc/icc, but works ++ on older gcc compatible compilers and binutils. ++ We should somehow detect if the compiler supports it, because ++ it may be able to generate slightly better code. */ ++ ++#define _XBEGIN_STARTED (~0u) ++#define _XABORT_EXPLICIT (1 << 0) ++#define _XABORT_RETRY (1 << 1) ++#define _XABORT_CONFLICT (1 << 2) ++#define _XABORT_CAPACITY (1 << 3) ++#define _XABORT_DEBUG (1 << 4) ++#define _XABORT_NESTED (1 << 5) ++#define _XABORT_CODE(x) (((x) >> 24) & 0xff) ++ ++#define _ABORT_LOCK_BUSY 0xff ++#define _ABORT_LOCK_IS_LOCKED 0xfe ++#define _ABORT_NESTED_TRYLOCK 0xfd ++ ++#ifndef __ASSEMBLER__ ++ ++#define __force_inline __attribute__((__always_inline__)) inline ++ ++static __force_inline int _xbegin(void) ++{ ++ int ret = _XBEGIN_STARTED; ++ asm volatile (".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory"); ++ return ret; ++} ++ ++static __force_inline void _xend(void) ++{ ++ asm volatile (".byte 0x0f,0x01,0xd5" ::: "memory"); ++} ++ ++static __force_inline void _xabort(const unsigned int status) ++{ ++ asm volatile (".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); ++} ++ ++static __force_inline int _xtest(void) ++{ ++ unsigned char out; ++ asm volatile (".byte 0x0f,0x01,0xd6 ; setnz %0" : "=r" (out) :: "memory"); ++ return out; ++} ++ ++#endif ++#endif +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h +@@ -427,6 +427,13 @@ LLL_STUB_UNWIND_INFO_END + : "memory", "cx", "cc", "r10", "r11"); \ + result; }) + ++extern int __lll_timedlock_elision (int *futex, short *adapt_count, ++ const struct timespec *timeout, ++ int private) attribute_hidden; ++ ++#define lll_timedlock_elision(futex, adapt_count, timeout, private) \ ++ __lll_timedlock_elision(&(futex), &(adapt_count), timeout, private) ++ + #define lll_robust_timedlock(futex, timeout, id, private) \ + ({ int result, ignore1, ignore2, ignore3; \ + __asm __volatile (LOCK_INSTR "cmpxchgl %1, %4\n\t" \ +@@ -597,6 +604,22 @@ extern int __lll_timedwait_tid (int *tid + } \ + __result; }) + ++extern int __lll_lock_elision (int *futex, short *adapt_count, int private) ++ attribute_hidden; ++ ++extern int __lll_unlock_elision (int *lock, int private) ++ attribute_hidden; ++ ++extern int __lll_trylock_elision (int *lock, short *adapt_count) ++ attribute_hidden; ++ ++#define lll_lock_elision(futex, adapt_count, private) \ ++ __lll_lock_elision (&(futex), &(adapt_count), private) ++#define lll_unlock_elision(futex, private) \ ++ __lll_unlock_elision (&(futex), private) ++#define lll_trylock_elision(futex, adapt_count) \ ++ __lll_trylock_elision (&(futex), &(adapt_count)) ++ + #endif /* !__ASSEMBLER__ */ + + #endif /* lowlevellock.h */ +Index: glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/init-arch.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/i386/i686/multiarch/init-arch.c ++++ /dev/null +@@ -1 +0,0 @@ +-#include +Index: glibc-2.17-c758a686/INSTALL +=================================================================== +--- glibc-2.17-c758a686.orig/INSTALL ++++ glibc-2.17-c758a686/INSTALL +@@ -140,6 +140,9 @@ will be used, and CFLAGS sets optimizati + additional security risks to the system and you should enable it + only if you understand and accept those risks. + ++`--enable-lock-elision=yes' ++ Enable lock elision for pthread mutexes and rwlocks by default. ++ + `--build=BUILD-SYSTEM' + `--host=HOST-SYSTEM' + These options are for cross-compiling. If you specify both +Index: glibc-2.17-c758a686/config.h.in +=================================================================== +--- glibc-2.17-c758a686.orig/config.h.in ++++ glibc-2.17-c758a686/config.h.in +@@ -180,6 +180,9 @@ + /* Define if __stack_chk_guard canary should be randomized at program startup. */ + #undef ENABLE_STACKGUARD_RANDOMIZE + ++/* Define if lock elision should be enabled by default. */ ++#undef ENABLE_LOCK_ELISION ++ + /* Package description. */ + #undef PKGVERSION + +Index: glibc-2.17-c758a686/configure +=================================================================== +--- glibc-2.17-c758a686.orig/configure ++++ glibc-2.17-c758a686/configure +@@ -750,6 +750,7 @@ enable_profile + enable_versioning + enable_oldest_abi + enable_stackguard_randomization ++enable_lock_elision + enable_add_ons + enable_hidden_plt + enable_bind_now +@@ -1405,6 +1406,9 @@ Optional Features: + --enable-stackguard-randomization + initialize __stack_chk_guard canary with a random + number at program start ++ --enable-lock-elision=yes/no ++ Enable lock elision for pthread mutexes and rwlocks ++ by default + --enable-add-ons[=DIRS...] + configure and build add-ons in DIR1,DIR2,... search + for add-ons if no parameter given +@@ -3716,6 +3720,18 @@ if test "$enable_stackguard_randomize" = + + fi + ++# Check whether --enable-lock-elision was given. ++if test "${enable_lock_elision+set}" = set; then : ++ enableval=$enable_lock_elision; enable_lock_elision=$enableval ++else ++ enable_lock_elision=no ++fi ++ ++if test "$enable_lock_elision" = yes ; then ++ $as_echo "#define ENABLE_LOCK_ELISION 1" >>confdefs.h ++ ++fi ++ + # Check whether --enable-add-ons was given. + if test "${enable_add_ons+set}" = set; then : + enableval=$enable_add_ons; +Index: glibc-2.17-c758a686/configure.in +=================================================================== +--- glibc-2.17-c758a686.orig/configure.in ++++ glibc-2.17-c758a686/configure.in +@@ -155,6 +155,15 @@ if test "$enable_stackguard_randomize" = + AC_DEFINE(ENABLE_STACKGUARD_RANDOMIZE) + fi + ++AC_ARG_ENABLE([lock-elision], ++ AC_HELP_STRING([--enable-lock-elision[=yes/no]], ++ [Enable lock elision for pthread mutexes and rwlocks by default]), ++ [enable_lock_elision=$enableval], ++ [enable_lock_elision=no]) ++if test "$enable_lock_elision" = yes ; then ++ AC_DEFINE(ENABLE_LOCK_ELISION) ++fi ++ + dnl Generic infrastructure for drop-in additions to libc. + AC_ARG_ENABLE([add-ons], + AC_HELP_STRING([--enable-add-ons@<:@=DIRS...@:>@], +Index: glibc-2.17-c758a686/manual/install.texi +=================================================================== +--- glibc-2.17-c758a686.orig/manual/install.texi ++++ glibc-2.17-c758a686/manual/install.texi +@@ -174,6 +174,9 @@ setuid and owned by @code{root}. The us + additional security risks to the system and you should enable it only if + you understand and accept those risks. + ++@item --enable-lock-elision=yes ++Enable lock elision for pthread mutexes by default. ++ + @item --build=@var{build-system} + @itemx --host=@var{host-system} + These options are for cross-compiling. If you specify both options and diff --git a/SOURCES/glibc-rh841653-1.patch b/SOURCES/glibc-rh841653-1.patch new file mode 100644 index 0000000..a438831 --- /dev/null +++ b/SOURCES/glibc-rh841653-1.patch @@ -0,0 +1,103 @@ +commit b023e4ca99f5e81f90d87d23cd267ef2abd2388c +Author: Andi Kleen +Date: Sat Dec 22 00:58:34 2012 -0800 + + Add new internal mutex type flags for elision. + + Add Enable/disable flags used internally + + Extend the mutex initializers to have the fields needed for + elision. The layout stays the same, and this is not visible + to programs. + + These changes are not exposed outside pthread +Index: glibc-2.17-c758a686/nptl/pthreadP.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthreadP.h ++++ glibc-2.17-c758a686/nptl/pthreadP.h +@@ -61,6 +61,10 @@ + enum + { + PTHREAD_MUTEX_KIND_MASK_NP = 3, ++ ++ PTHREAD_MUTEX_ELISION_NP = 256, ++ PTHREAD_MUTEX_NO_ELISION_NP = 512, ++ + PTHREAD_MUTEX_ROBUST_NORMAL_NP = 16, + PTHREAD_MUTEX_ROBUST_RECURSIVE_NP + = PTHREAD_MUTEX_ROBUST_NORMAL_NP | PTHREAD_MUTEX_RECURSIVE_NP, +@@ -93,12 +97,21 @@ enum + PTHREAD_MUTEX_PP_ERRORCHECK_NP + = PTHREAD_MUTEX_PRIO_PROTECT_NP | PTHREAD_MUTEX_ERRORCHECK_NP, + PTHREAD_MUTEX_PP_ADAPTIVE_NP +- = PTHREAD_MUTEX_PRIO_PROTECT_NP | PTHREAD_MUTEX_ADAPTIVE_NP ++ = PTHREAD_MUTEX_PRIO_PROTECT_NP | PTHREAD_MUTEX_ADAPTIVE_NP, ++ PTHREAD_MUTEX_ELISION_FLAGS_NP ++ = PTHREAD_MUTEX_ELISION_NP | PTHREAD_MUTEX_NO_ELISION_NP, ++ ++ PTHREAD_MUTEX_TIMED_ELISION_NP = ++ PTHREAD_MUTEX_TIMED_NP | PTHREAD_MUTEX_ELISION_NP, ++ PTHREAD_MUTEX_TIMED_NO_ELISION_NP = ++ PTHREAD_MUTEX_TIMED_NP | PTHREAD_MUTEX_NO_ELISION_NP, + }; + #define PTHREAD_MUTEX_PSHARED_BIT 128 + + #define PTHREAD_MUTEX_TYPE(m) \ + ((m)->__data.__kind & 127) ++#define PTHREAD_MUTEX_TYPE_ELISION(m) \ ++ ((m)->__data.__kind & (127|PTHREAD_MUTEX_ELISION_FLAGS_NP)) + + #if LLL_PRIVATE == 0 && LLL_SHARED == 128 + # define PTHREAD_MUTEX_PSHARED(m) \ +Index: glibc-2.17-c758a686/nptl/sysdeps/pthread/pthread.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/pthread/pthread.h ++++ glibc-2.17-c758a686/nptl/sysdeps/pthread/pthread.h +@@ -83,27 +83,39 @@ enum + + + /* Mutex initializers. */ ++#if __PTHREAD_MUTEX_HAVE_ELISION == 1 /* 64bit layout. */ ++#define __PTHREAD_SPINS 0, 0 ++#elif __PTHREAD_MUTEX_HAVE_ELISION == 2 /* 32bit layout. */ ++#define __PTHREAD_SPINS { 0, 0 } ++#else ++#define __PTHREAD_SPINS 0 ++#endif ++ + #ifdef __PTHREAD_MUTEX_HAVE_PREV + # define PTHREAD_MUTEX_INITIALIZER \ +- { { 0, 0, 0, 0, 0, 0, { 0, 0 } } } ++ { { 0, 0, 0, 0, 0, __PTHREAD_SPINS, { 0, 0 } } } + # ifdef __USE_GNU + # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \ +- { { 0, 0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, 0, { 0, 0 } } } ++ { { 0, 0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __PTHREAD_SPINS, { 0, 0 } } } + # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \ +- { { 0, 0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, 0, { 0, 0 } } } ++ { { 0, 0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __PTHREAD_SPINS, { 0, 0 } } } + # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \ +- { { 0, 0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, 0, { 0, 0 } } } ++ { { 0, 0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __PTHREAD_SPINS, { 0, 0 } } } ++# define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \ ++ { { 0, 0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __PTHREAD_SPINS, { 0, 0 } } } ++ + # endif + #else + # define PTHREAD_MUTEX_INITIALIZER \ +- { { 0, 0, 0, 0, 0, { 0 } } } ++ { { 0, 0, 0, 0, 0, { __PTHREAD_SPINS } } } + # ifdef __USE_GNU + # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \ +- { { 0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, 0, { 0 } } } ++ { { 0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, 0, { __PTHREAD_SPINS } } } + # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \ +- { { 0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, 0, { 0 } } } ++ { { 0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, 0, { __PTHREAD_SPINS } } } + # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \ +- { { 0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, 0, { 0 } } } ++ { { 0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, 0, { __PTHREAD_SPINS } } } ++ + # endif + #endif + diff --git a/SOURCES/glibc-rh841653-10.patch b/SOURCES/glibc-rh841653-10.patch new file mode 100644 index 0000000..d94fc16 --- /dev/null +++ b/SOURCES/glibc-rh841653-10.patch @@ -0,0 +1,56 @@ +commit b0a3c1640ab2fb7d16d9b9a8d9c0e524e9cb0001 +Author: Andreas Schwab +Date: Tue Mar 4 13:00:26 2014 +0100 + + Properly handle forced elision in pthread_mutex_trylock (bug 16657) + +Index: glibc-2.17-c758a686/nptl/pthread_mutex_trylock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthread_mutex_trylock.c ++++ glibc-2.17-c758a686/nptl/pthread_mutex_trylock.c +@@ -26,8 +26,8 @@ + #define lll_trylock_elision(a,t) lll_trylock(a) + #endif + +-#ifndef DO_ELISION +-#define DO_ELISION(m) 0 ++#ifndef FORCE_ELISION ++#define FORCE_ELISION(m, s) + #endif + + /* We don't force elision in trylock, because this can lead to inconsistent +@@ -69,7 +69,7 @@ __pthread_mutex_trylock (mutex) + break; + + case PTHREAD_MUTEX_TIMED_ELISION_NP: +- elision: ++ elision: __attribute__((unused)) + if (lll_trylock_elision (mutex->__data.__lock, + mutex->__data.__elision) != 0) + break; +@@ -77,8 +77,7 @@ __pthread_mutex_trylock (mutex) + return 0; + + case PTHREAD_MUTEX_TIMED_NP: +- if (DO_ELISION (mutex)) +- goto elision; ++ FORCE_ELISION (mutex, goto elision); + /*FALL THROUGH*/ + case PTHREAD_MUTEX_ADAPTIVE_NP: + case PTHREAD_MUTEX_ERRORCHECK_NP: +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/force-elision.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/force-elision.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/force-elision.h +@@ -16,11 +16,6 @@ + License along with the GNU C Library; if not, see + . */ + +-/* Check for elision on this lock without upgrading. */ +-#define DO_ELISION(m) \ +- (__pthread_force_elision \ +- && (m->__data.__kind & PTHREAD_MUTEX_NO_ELISION_NP) == 0) \ +- + /* Automatically enable elision for existing user lock kinds. */ + #define FORCE_ELISION(m, s) \ + if (__pthread_force_elision \ diff --git a/SOURCES/glibc-rh841653-11.patch b/SOURCES/glibc-rh841653-11.patch new file mode 100644 index 0000000..062ac76 --- /dev/null +++ b/SOURCES/glibc-rh841653-11.patch @@ -0,0 +1,26 @@ +commit 7f786dc12bd60f0a134e538429fef98350e4c814 +Author: Torvald Riegel +Date: Mon Dec 15 22:09:55 2014 +0100 + + Fix nptl/tst-mutex5.c: Do not skip tests if elision is enabled. +Index: glibc-2.17-c758a686/nptl/tst-mutex5.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/tst-mutex5.c ++++ glibc-2.17-c758a686/nptl/tst-mutex5.c +@@ -86,8 +86,6 @@ do_test (void) + return 1; + } + +- /* Elided locks do not time out. */ +-#ifndef ENABLE_LOCK_ELISION + if (pthread_mutex_trylock (&m) == 0) + { + puts ("mutex_trylock succeeded"); +@@ -183,7 +181,6 @@ do_test (void) + puts ("3rd timedlock didn't return right away"); + return 1; + } +-#endif + + if (pthread_mutex_unlock (&m) != 0) + { diff --git a/SOURCES/glibc-rh841653-12.patch b/SOURCES/glibc-rh841653-12.patch new file mode 100644 index 0000000..e017067 --- /dev/null +++ b/SOURCES/glibc-rh841653-12.patch @@ -0,0 +1,100 @@ +commit 2868e0703d5b8c8e60c6f60de13e876c4d85daa0 +Author: Andreas Schwab +Date: Mon Aug 11 11:18:26 2014 +0200 + + Filter out PTHREAD_MUTEX_NO_ELISION_NP bit in pthread_mutexattr_gettype (BZ #15790) + + pthread_mutexattr_settype adds PTHREAD_MUTEX_NO_ELISION_NP to kind, + which is an internal flag that pthread_mutexattr_gettype shouldn't + expose, since pthread_mutexattr_settype wouldn't accept it. +Index: glibc-2.17-c758a686/nptl/Makefile +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/Makefile ++++ glibc-2.17-c758a686/nptl/Makefile +@@ -252,6 +252,7 @@ tests = tst-typesizes \ + tst-exit1 tst-exit2 tst-exit3 \ + tst-stdio1 tst-stdio2 \ + tst-stack1 tst-stack2 tst-stack3 tst-stack4 tst-pthread-getattr \ ++ tst-pthread-mutexattr \ + tst-unload \ + tst-dlsym1 \ + tst-sysconf \ +Index: glibc-2.17-c758a686/nptl/pthread_mutexattr_gettype.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthread_mutexattr_gettype.c ++++ glibc-2.17-c758a686/nptl/pthread_mutexattr_gettype.c +@@ -28,7 +28,8 @@ pthread_mutexattr_gettype (attr, kind) + + iattr = (const struct pthread_mutexattr *) attr; + +- *kind = iattr->mutexkind & ~PTHREAD_MUTEXATTR_FLAG_BITS; ++ *kind = (iattr->mutexkind & ~PTHREAD_MUTEXATTR_FLAG_BITS ++ & ~PTHREAD_MUTEX_NO_ELISION_NP); + + return 0; + } +Index: glibc-2.17-c758a686/nptl/tst-pthread-mutexattr.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/tst-pthread-mutexattr.c +@@ -0,0 +1,60 @@ ++/* Make sure that pthread_mutexattr_gettype returns a valid kind. ++ ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ pthread_mutexattr_t attr; ++ int kind; ++ int error; ++ ++ error = pthread_mutexattr_init (&attr); ++ if (error) ++ { ++ printf ("pthread_mutexattr_init: %s\n", strerror (error)); ++ return 1; ++ } ++ error = pthread_mutexattr_settype (&attr, PTHREAD_MUTEX_DEFAULT); ++ if (error) ++ { ++ printf ("pthread_mutexattr_settype (1): %s\n", strerror (error)); ++ return 1; ++ } ++ error = pthread_mutexattr_gettype (&attr, &kind); ++ if (error) ++ { ++ printf ("pthread_mutexattr_gettype: %s\n", strerror (error)); ++ return 1; ++ } ++ error = pthread_mutexattr_settype (&attr, kind); ++ if (error) ++ { ++ printf ("pthread_mutexattr_settype (2): %s\n", strerror (error)); ++ return 1; ++ } ++ return 0; ++} ++ ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" diff --git a/SOURCES/glibc-rh841653-13.patch b/SOURCES/glibc-rh841653-13.patch new file mode 100644 index 0000000..9f4eac2 --- /dev/null +++ b/SOURCES/glibc-rh841653-13.patch @@ -0,0 +1,103 @@ +commit 5aded6f2abbe19bc77e563b7db10aa9dd037a90d +Author: Andreas Schwab +Date: Wed Jan 13 16:04:42 2016 +0100 + + Don't do lock elision on an error checking mutex (bug 17514) + + Error checking mutexes are not supposed to be subject to lock elision. + That would defeat the error checking nature of the mutex because lock + elision doesn't record ownership. +Index: glibc-2.17-c758a686/nptl/Makefile +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/Makefile ++++ glibc-2.17-c758a686/nptl/Makefile +@@ -266,7 +266,8 @@ tests = tst-typesizes \ + tst-abstime \ + tst-vfork1 tst-vfork2 tst-vfork1x tst-vfork2x \ + tst-getpid1 tst-getpid2 tst-getpid3 \ +- tst-initializers1 $(patsubst %,tst-initializers1-%,c89 gnu89 c99 gnu99) ++ tst-initializers1 $(patsubst %,tst-initializers1-%,c89 gnu89 c99 gnu99) \ ++ tst-mutex-errorcheck + xtests = tst-setuid1 tst-setuid1-static tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 + test-srcs = tst-oddstacklimit + +Index: glibc-2.17-c758a686/nptl/pthread_mutex_timedlock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthread_mutex_timedlock.c ++++ glibc-2.17-c758a686/nptl/pthread_mutex_timedlock.c +@@ -87,7 +87,8 @@ pthread_mutex_timedlock (mutex, abstime) + if (__builtin_expect (mutex->__data.__owner == id, 0)) + return EDEADLK; + +- /* FALLTHROUGH */ ++ /* Don't do lock elision on an error checking mutex. */ ++ goto simple; + + case PTHREAD_MUTEX_TIMED_NP: + FORCE_ELISION (mutex, goto elision); +Index: glibc-2.17-c758a686/nptl/tst-mutex-errorcheck.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/tst-mutex-errorcheck.c +@@ -0,0 +1,61 @@ ++/* Check that error checking mutexes are not subject to lock elision. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ struct timespec tms = { 0 }; ++ pthread_mutex_t mutex; ++ pthread_mutexattr_t mutexattr; ++ int ret = 0; ++ ++ if (pthread_mutexattr_init (&mutexattr) != 0) ++ return 1; ++ if (pthread_mutexattr_settype (&mutexattr, PTHREAD_MUTEX_ERRORCHECK) != 0) ++ return 1; ++ ++ if (pthread_mutex_init (&mutex, &mutexattr) != 0) ++ return 1; ++ if (pthread_mutexattr_destroy (&mutexattr) != 0) ++ return 1; ++ ++ /* The call to pthread_mutex_timedlock erroneously enabled lock elision ++ on the mutex, which then triggered an assertion failure in ++ pthread_mutex_unlock. It would also defeat the error checking nature ++ of the mutex. */ ++ if (pthread_mutex_timedlock (&mutex, &tms) != 0) ++ return 1; ++ if (pthread_mutex_timedlock (&mutex, &tms) != EDEADLK) ++ { ++ printf ("Failed error checking on locked mutex\n"); ++ ret = 1; ++ } ++ ++ if (pthread_mutex_unlock (&mutex) != 0) ++ ret = 1; ++ ++ return ret; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" diff --git a/SOURCES/glibc-rh841653-14.patch b/SOURCES/glibc-rh841653-14.patch new file mode 100644 index 0000000..0c1b4a5 --- /dev/null +++ b/SOURCES/glibc-rh841653-14.patch @@ -0,0 +1,172 @@ +Partial backport: + +Skipped elide.h since rw-locks are not backported. + +commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a +Author: Torvald Riegel +Date: Wed Nov 30 17:53:11 2016 +0100 + + Use C11-like atomics instead of plain memory accesses in x86 lock elision. + + This uses atomic operations to access lock elision metadata that is accessed + concurrently (ie, adapt_count fields). The size of the data is less than a + word but accessed only with atomic loads and stores; therefore, we add + support for shorter-size atomic load and stores too. + + * include/atomic.h (__atomic_check_size_ls): New. + (atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed, + atomic_store_release): Use it. + * sysdeps/x86/elide.h (ACCESS_ONCE): Remove. + (elision_adapt, ELIDE_LOCK): Use atomics. + * sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use + atomics and improve code comments. + * sysdeps/unix/sysv/linux/x86/elision-trylock.c + (__lll_trylock_elision): Likewise. + +Index: glibc-2.17-c758a686/include/atomic.h +=================================================================== +--- glibc-2.17-c758a686.orig/include/atomic.h ++++ glibc-2.17-c758a686/include/atomic.h +@@ -567,6 +567,20 @@ void __atomic_link_error (void); + if (sizeof (*mem) != 4) \ + __atomic_link_error (); + # endif ++/* We additionally provide 8b and 16b atomic loads and stores; we do not yet ++ need other atomic operations of such sizes, and restricting the support to ++ loads and stores makes this easier for archs that do not have native ++ support for atomic operations to less-than-word-sized data. */ ++# if __HAVE_64B_ATOMICS == 1 ++# define __atomic_check_size_ls(mem) \ ++ if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && (sizeof (*mem) != 4) \ ++ && (sizeof (*mem) != 8)) \ ++ __atomic_link_error (); ++# else ++# define __atomic_check_size_ls(mem) \ ++ if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \ ++ __atomic_link_error (); ++# endif + + # define atomic_thread_fence_acquire() \ + __atomic_thread_fence (__ATOMIC_ACQUIRE) +@@ -576,18 +590,20 @@ void __atomic_link_error (void); + __atomic_thread_fence (__ATOMIC_SEQ_CST) + + # define atomic_load_relaxed(mem) \ +- ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_RELAXED); }) ++ ({ __atomic_check_size_ls((mem)); \ ++ __atomic_load_n ((mem), __ATOMIC_RELAXED); }) + # define atomic_load_acquire(mem) \ +- ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_ACQUIRE); }) ++ ({ __atomic_check_size_ls((mem)); \ ++ __atomic_load_n ((mem), __ATOMIC_ACQUIRE); }) + + # define atomic_store_relaxed(mem, val) \ + do { \ +- __atomic_check_size((mem)); \ ++ __atomic_check_size_ls((mem)); \ + __atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \ + } while (0) + # define atomic_store_release(mem, val) \ + do { \ +- __atomic_check_size((mem)); \ ++ __atomic_check_size_ls((mem)); \ + __atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \ + } while (0) + +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c +@@ -44,7 +44,13 @@ + int + __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) + { +- if (*adapt_count <= 0) ++ /* adapt_count can be accessed concurrently; these accesses can be both ++ inside of transactions (if critical sections are nested and the outer ++ critical section uses lock elision) and outside of transactions. Thus, ++ we need to use atomic accesses to avoid data races. However, the ++ value of adapt_count is just a hint, so relaxed MO accesses are ++ sufficient. */ ++ if (atomic_load_relaxed (adapt_count) <= 0) + { + unsigned status; + int try_xbegin; +@@ -70,15 +76,20 @@ __lll_lock_elision (int *futex, short *a + && _XABORT_CODE (status) == _ABORT_LOCK_BUSY) + { + /* Right now we skip here. Better would be to wait a bit +- and retry. This likely needs some spinning. */ +- if (*adapt_count != aconf.skip_lock_busy) +- *adapt_count = aconf.skip_lock_busy; ++ and retry. This likely needs some spinning. See ++ above for why relaxed MO is sufficient. */ ++ if (atomic_load_relaxed (adapt_count) ++ != aconf.skip_lock_busy) ++ atomic_store_relaxed (adapt_count, aconf.skip_lock_busy); + } + /* Internal abort. There is no chance for retry. + Use the normal locking and next time use lock. +- Be careful to avoid writing to the lock. */ +- else if (*adapt_count != aconf.skip_lock_internal_abort) +- *adapt_count = aconf.skip_lock_internal_abort; ++ Be careful to avoid writing to the lock. See above for why ++ relaxed MO is sufficient. */ ++ else if (atomic_load_relaxed (adapt_count) ++ != aconf.skip_lock_internal_abort) ++ atomic_store_relaxed (adapt_count, ++ aconf.skip_lock_internal_abort); + break; + } + } +@@ -87,7 +98,8 @@ __lll_lock_elision (int *futex, short *a + { + /* Use a normal lock until the threshold counter runs out. + Lost updates possible. */ +- (*adapt_count)--; ++ atomic_store_relaxed (adapt_count, ++ atomic_load_relaxed (adapt_count) - 1); + } + + /* Use a normal lock here. */ +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c +@@ -36,8 +36,10 @@ __lll_trylock_elision (int *futex, short + return an error. */ + _xabort (_ABORT_NESTED_TRYLOCK); + +- /* Only try a transaction if it's worth it. */ +- if (*adapt_count <= 0) ++ /* Only try a transaction if it's worth it. See __lll_lock_elision for ++ why we need atomic accesses. Relaxed MO is sufficient because this is ++ just a hint. */ ++ if (atomic_load_relaxed (adapt_count) <= 0) + { + unsigned status; + +@@ -55,16 +57,18 @@ __lll_trylock_elision (int *futex, short + if (!(status & _XABORT_RETRY)) + { + /* Internal abort. No chance for retry. For future +- locks don't try speculation for some time. */ +- if (*adapt_count != aconf.skip_trylock_internal_abort) +- *adapt_count = aconf.skip_trylock_internal_abort; ++ locks don't try speculation for some time. See above for MO. */ ++ if (atomic_load_relaxed (adapt_count) ++ != aconf.skip_lock_internal_abort) ++ atomic_store_relaxed (adapt_count, aconf.skip_lock_internal_abort); + } + /* Could do some retries here. */ + } + else + { +- /* Lost updates are possible, but harmless. */ +- (*adapt_count)--; ++ /* Lost updates are possible but harmless (see above). */ ++ atomic_store_relaxed (adapt_count, ++ atomic_load_relaxed (adapt_count) - 1); + } + + return lll_trylock (*futex); diff --git a/SOURCES/glibc-rh841653-15.patch b/SOURCES/glibc-rh841653-15.patch new file mode 100644 index 0000000..8eb551c --- /dev/null +++ b/SOURCES/glibc-rh841653-15.patch @@ -0,0 +1,48 @@ +commit 657c084cd6f69d6cc880c2ae65129a0723d053c5 +Author: Andreas Schwab +Date: Mon Dec 5 12:06:46 2016 +0100 + + Get rid of __elision_available + +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c +@@ -52,11 +52,6 @@ int __rwlock_rtm_enabled attribute_hidde + + int __rwlock_rtm_read_retries attribute_hidden = 3; + +-/* Set when the CPU supports elision. When false elision is never attempted. +- */ +- +-int __elision_available attribute_hidden; +- + /* Force elision for all new locks. This is used to decide whether existing + DEFAULT locks should be automatically upgraded to elision in + pthread_mutex_lock(). Disabled for suid programs. Only used when elision +@@ -71,10 +66,10 @@ elision_init (int argc __attribute__ ((u + char **argv __attribute__ ((unused)), + char **environ) + { +- __elision_available = HAS_CPU_FEATURE (RTM); ++ int elision_available = HAS_CPU_FEATURE (RTM); + #ifdef ENABLE_LOCK_ELISION +- __pthread_force_elision = __libc_enable_secure ? 0 : __elision_available; +- __rwlock_rtm_enabled = __libc_enable_secure ? 0 : __elision_available; ++ __pthread_force_elision = __libc_enable_secure ? 0 : elision_available; ++ __rwlock_rtm_enabled = __libc_enable_secure ? 0 : elision_available; + #endif + } + +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.h +@@ -35,7 +35,6 @@ struct elision_config + extern struct elision_config __elision_aconf attribute_hidden; + + extern int __rwlock_rtm_enabled attribute_hidden; +-extern int __elision_available attribute_hidden; + extern int __pthread_force_elision attribute_hidden; + + /* Tell the test suite to test elision for this architecture. */ diff --git a/SOURCES/glibc-rh841653-16.patch b/SOURCES/glibc-rh841653-16.patch new file mode 100644 index 0000000..b2b4915 --- /dev/null +++ b/SOURCES/glibc-rh841653-16.patch @@ -0,0 +1,204 @@ +Index: glibc-2.17-c758a686/elf/rtld.c +=================================================================== +--- glibc-2.17-c758a686.orig/elf/rtld.c ++++ glibc-2.17-c758a686/elf/rtld.c +@@ -2478,6 +2478,11 @@ process_envvars (enum mode *modep) + GLRO(dl_profile_output) + = &"/var/tmp\0/var/profile"[INTUSE(__libc_enable_secure) ? 9 : 0]; + ++ /* RHEL 7 specific change: ++ Without the tunables farmework we simulate tunables only for ++ use with enabling transactional memory. */ ++ _dl_process_tunable_env_entries (); ++ + while ((envline = _dl_next_ld_env_entry (&runp)) != NULL) + { + size_t len = 0; +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c +@@ -68,8 +68,18 @@ elision_init (int argc __attribute__ ((u + { + int elision_available = HAS_CPU_FEATURE (RTM); + #ifdef ENABLE_LOCK_ELISION +- __pthread_force_elision = __libc_enable_secure ? 0 : elision_available; +- __rwlock_rtm_enabled = __libc_enable_secure ? 0 : elision_available; ++ if (!__libc_enable_secure && elision_available) ++ { ++ /* RHEL 7 specific change: Check if elision is enabled for the ++ process. */ ++ __pthread_force_elision = GLRO(dl_elision_enabled); ++ __rwlock_rtm_enabled = GLRO(dl_elision_enabled); ++ } ++ else ++ { ++ __pthread_force_elision = 0; ++ __rwlock_rtm_enabled = 0; ++ } + #endif + } + +Index: glibc-2.17-c758a686/sysdeps/generic/ldsodefs.h +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/generic/ldsodefs.h ++++ glibc-2.17-c758a686/sysdeps/generic/ldsodefs.h +@@ -557,6 +557,9 @@ struct rtld_global_ro + platforms. */ + EXTERN uint64_t _dl_hwcap2; + ++ /* RHEL 7 specific change: Is elision enabled for this process? */ ++ EXTERN bool _dl_elision_enabled; ++ + #ifdef SHARED + /* We add a function table to _rtld_global which is then used to + call the function instead of going through the PLT. The result +@@ -893,6 +896,10 @@ extern void _dl_show_auxv (void) interna + other. */ + extern char *_dl_next_ld_env_entry (char ***position) internal_function; + ++/* RHEL 7 specific change: ++ Manually process RHEL 7-specific tunable entries. */ ++extern void _dl_process_tunable_env_entries (void) internal_function; ++ + /* Return an array with the names of the important hardware capabilities. */ + extern const struct r_strlenpair *_dl_important_hwcaps (const char *platform, + size_t paltform_len, +Index: glibc-2.17-c758a686/elf/dl-support.c +=================================================================== +--- glibc-2.17-c758a686.orig/elf/dl-support.c ++++ glibc-2.17-c758a686/elf/dl-support.c +@@ -123,6 +123,10 @@ size_t _dl_phnum; + uint64_t _dl_hwcap __attribute__ ((nocommon)); + uint64_t _dl_hwcap2 __attribute__ ((nocommon)); + ++/* RHEL 7-specific change: Is elision enabled for the process? ++ Static library definition. */ ++bool _dl_elision_enabled; ++ + /* This is not initialized to HWCAP_IMPORTANT, matching the definition + of _dl_important_hwcaps, below, where no hwcap strings are ever + used. This mask is still used to mediate the lookups in the cache +@@ -289,6 +293,9 @@ _dl_non_dynamic_init (void) + _dl_profile_output + = &"/var/tmp\0/var/profile"[__libc_enable_secure ? 9 : 0]; + ++ /* RHEL 7 specific change: Process tunables at startup. */ ++ _dl_process_tunable_env_entries (); ++ + if (__libc_enable_secure) + { + static const char unsecure_envvars[] = +Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr-c.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/strstr-c.c ++++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr-c.c +@@ -17,6 +17,10 @@ + License along with the GNU C Library; if not, see + . */ + ++/* RHEL 7-specific: Define multiple versions only for the definition in ++ libc. Don't define multiple versions for strstr in static library ++ since we need strstr before initialization has happened. */ ++#if defined SHARED && IS_IN (libc) + /* Redefine strstr so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ + #undef strstr +@@ -30,9 +34,11 @@ + # define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2); + #endif ++#endif + + #include "string/strstr.c" + ++#if defined SHARED && IS_IN (libc) + extern __typeof (__redirect_strstr) __strstr_sse42 attribute_hidden; + extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden; + extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden; +@@ -48,3 +54,5 @@ libc_ifunc (__libc_strstr, HAS_CPU_FEATU + + #undef strstr + strong_alias (__libc_strstr, strstr) ++#endif ++ +Index: glibc-2.17-c758a686/elf/Makefile +=================================================================== +--- glibc-2.17-c758a686.orig/elf/Makefile ++++ glibc-2.17-c758a686/elf/Makefile +@@ -35,6 +35,10 @@ dl-routines = $(addprefix dl-,load looku + ifeq (yes,$(use-ldconfig)) + dl-routines += dl-cache + endif ++ ++# RHEL 7-specific change: Add rudimentary tunables support. ++dl-routines += dl-tunables ++ + all-dl-routines = $(dl-routines) $(sysdep-dl-routines) + # But they are absent from the shared libc, because that code is in ld.so. + elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \ +Index: glibc-2.17-c758a686/elf/dl-tunables.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/elf/dl-tunables.c +@@ -0,0 +1,60 @@ ++/* RHEL 7-specific rudimentary tunables handling. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++/* RHEL 7 specific change: ++ Manually process RHEL 7-specific tunable entries. */ ++void ++internal_function ++_dl_process_tunable_env_entries (void) ++{ ++#if HAVE_ELISION ++ char **ep; ++ const char *envname = { "RHEL_GLIBC_TUNABLES" }; ++# define TUNABLE_ELISION 0 ++# define TUNABLE_MAX 1 ++ const char *tunables[TUNABLE_MAX] = { "glibc.elision.enable=1" }; ++ ++ ep = __environ; ++ while (*ep != NULL) ++ { ++ size_t cnt = 0; ++ ++ while ((*ep)[cnt] == envname[cnt] && envname[cnt] != '\0') ++ ++cnt; ++ ++ if (envname[cnt] == '\0' && (*ep)[cnt] == '=') ++ { ++ /* Found it. */ ++ char *found; ++ found = strstr (*ep, tunables[TUNABLE_ELISION]); ++ /* Process TUNABLE_ELISION: ++ Note: elision-conf.c applies security checks. */ ++ if (found != NULL) ++ GLRO(dl_elision_enabled) = true; ++ /* Continue the loop in case NAME appears again. */ ++ } ++ ++ ++ep; ++ } ++#endif ++} diff --git a/SOURCES/glibc-rh841653-17.patch b/SOURCES/glibc-rh841653-17.patch new file mode 100644 index 0000000..dba8b9f --- /dev/null +++ b/SOURCES/glibc-rh841653-17.patch @@ -0,0 +1,83 @@ +commit 2702856bf45c82cf8e69f2064f5aa15c0ceb6359 +Author: Andrew Senkevich +Date: Mon Dec 19 13:20:31 2016 +0300 + + Disable TSX on some Haswell processors. + + Patch disables Intel TSX on some Haswell processors to avoid TSX + on kernels that weren't updated with the latest microcode package + (which disables broken feature by default). + + * sysdeps/x86/cpu-features.c (get_common_indeces): Add + stepping identification. + (init_cpu_features): Add handle of Haswell. + +Index: glibc-2.17-c758a686/sysdeps/x86/cpu-features.c +=================================================================== +--- glibc-2.17-c758a686.orig/sysdeps/x86/cpu-features.c ++++ glibc-2.17-c758a686/sysdeps/x86/cpu-features.c +@@ -21,7 +21,8 @@ + + static inline void + get_common_indeces (struct cpu_features *cpu_features, +- unsigned int *family, unsigned int *model) ++ unsigned int *family, unsigned int *model, ++ unsigned int *stepping) + { + unsigned int eax; + __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, +@@ -30,6 +31,7 @@ get_common_indeces (struct cpu_features + GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax; + *family = (eax >> 8) & 0x0f; + *model = (eax >> 4) & 0x0f; ++ *stepping = eax & 0x0f; + } + + static inline void +@@ -45,9 +47,11 @@ init_cpu_features (struct cpu_features * + /* This spells out "GenuineIntel". */ + if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) + { ++ unsigned int stepping; ++ + kind = arch_kind_intel; + +- get_common_indeces (cpu_features, &family, &model); ++ get_common_indeces (cpu_features, &family, &model, &stepping); + + /* Intel processors prefer SSE instruction for memory/string + routines if they are available. */ +@@ -128,6 +132,20 @@ init_cpu_features (struct cpu_features * + | bit_Fast_Unaligned_Load + | bit_Prefer_PMINUB_for_stringop); + break; ++ ++ case 0x3f: ++ /* Xeon E7 v3 with stepping >= 4 has working TSX. */ ++ if (stepping >= 4) ++ break; ++ case 0x3c: ++ case 0x45: ++ case 0x46: ++ /* Disable Intel TSX on Haswell processors (except Xeon E7 v3 ++ with stepping >= 4) to avoid TSX on kernels that weren't ++ updated with the latest microcode package (which disables ++ broken feature by default). */ ++ cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx &= ~(bit_RTM); ++ break; + } + } + +@@ -148,9 +166,11 @@ init_cpu_features (struct cpu_features * + /* This spells out "AuthenticAMD". */ + else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) + { ++ unsigned int stepping; ++ + kind = arch_kind_amd; + +- get_common_indeces (cpu_features, &family, &model); ++ get_common_indeces (cpu_features, &family, &model, &stepping); + + ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; + diff --git a/SOURCES/glibc-rh841653-2.patch b/SOURCES/glibc-rh841653-2.patch new file mode 100644 index 0000000..5738a0e --- /dev/null +++ b/SOURCES/glibc-rh841653-2.patch @@ -0,0 +1,122 @@ +commit 68cc29355f3334c7ad18f648ff9a6383a0916d23 +Author: Andi Kleen +Date: Fri Jun 28 05:19:37 2013 -0700 + + Add minimal test suite changes for elision enabled kernels + + tst-mutex5 and 8 test some behaviour not required by POSIX, + that elision changes. This changes these tests to not check + this when elision is enabled at configure time. +Index: glibc-2.17-c758a686/nptl/tst-mutex5.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/tst-mutex5.c ++++ glibc-2.17-c758a686/nptl/tst-mutex5.c +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + + #ifndef TYPE +@@ -85,6 +86,8 @@ do_test (void) + return 1; + } + ++ /* Elided locks do not time out. */ ++#ifdef ENABLE_LOCK_ELISION + if (pthread_mutex_trylock (&m) == 0) + { + puts ("mutex_trylock succeeded"); +@@ -180,6 +183,7 @@ do_test (void) + puts ("3rd timedlock didn't return right away"); + return 1; + } ++#endif + + if (pthread_mutex_unlock (&m) != 0) + { +Index: glibc-2.17-c758a686/nptl/tst-mutex8.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/tst-mutex8.c ++++ glibc-2.17-c758a686/nptl/tst-mutex8.c +@@ -93,6 +93,8 @@ tf (void *arg) + static int + check_type (const char *mas, pthread_mutexattr_t *ma) + { ++ int e __attribute__((unused)); ++ + if (pthread_mutex_init (m, ma) != 0) + { + printf ("1st mutex_init failed for %s\n", mas); +@@ -117,7 +119,10 @@ check_type (const char *mas, pthread_mut + return 1; + } + +- int e = pthread_mutex_destroy (m); ++ /* Elided mutexes don't fail destroy. If elision is not explicitly disabled ++ we don't know, so can also not check this. */ ++#ifndef ENABLE_LOCK_ELISION ++ e = pthread_mutex_destroy (m); + if (e == 0) + { + printf ("mutex_destroy of self-locked mutex succeeded for %s\n", mas); +@@ -129,6 +134,7 @@ check_type (const char *mas, pthread_mut + mas); + return 1; + } ++#endif + + if (pthread_mutex_unlock (m) != 0) + { +@@ -142,6 +148,8 @@ check_type (const char *mas, pthread_mut + return 1; + } + ++ /* Elided mutexes don't fail destroy. */ ++#ifndef ENABLE_LOCK_ELISION + e = pthread_mutex_destroy (m); + if (e == 0) + { +@@ -155,6 +163,7 @@ mutex_destroy of self-trylocked mutex di + mas); + return 1; + } ++#endif + + if (pthread_mutex_unlock (m) != 0) + { +@@ -189,6 +198,8 @@ mutex_destroy of self-trylocked mutex di + return 1; + } + ++ /* Elided mutexes don't fail destroy. */ ++#ifndef ENABLE_LOCK_ELISION + e = pthread_mutex_destroy (m); + if (e == 0) + { +@@ -201,6 +212,7 @@ mutex_destroy of self-trylocked mutex di + mutex_destroy of condvar-used mutex did not return EBUSY for %s\n", mas); + return 1; + } ++#endif + + done = true; + if (pthread_cond_signal (&c) != 0) +@@ -259,6 +271,8 @@ mutex_destroy of condvar-used mutex did + return 1; + } + ++ /* Elided mutexes don't fail destroy. */ ++#ifndef ENABLE_LOCK_ELISION + e = pthread_mutex_destroy (m); + if (e == 0) + { +@@ -273,6 +287,7 @@ mutex_destroy of condvar-used mutex did + mas); + return 1; + } ++#endif + + if (pthread_cancel (th) != 0) + { diff --git a/SOURCES/glibc-rh841653-3.patch b/SOURCES/glibc-rh841653-3.patch new file mode 100644 index 0000000..5ce6f08 --- /dev/null +++ b/SOURCES/glibc-rh841653-3.patch @@ -0,0 +1,541 @@ +commit e8c659d74e011346785355eeef03b7fb6f533c61 +Author: Andi Kleen +Date: Sat Dec 22 01:03:04 2012 -0800 + + Add elision to pthread_mutex_{try,timed,un}lock + + Add elision paths to the basic mutex locks. + + The normal path has a check for RTM and upgrades the lock + to RTM when available. Trylocks cannot automatically upgrade, + so they check for elision every time. + + We use a 4 byte value in the mutex to store the lock + elision adaptation state. This is separate from the adaptive + spin state and uses a separate field. + + Condition variables currently do not support elision. + + Recursive mutexes and condition variables may be supported at some point, + but are not in the current implementation. Also "trylock" will + not automatically enable elision unless some other lock call + has been already called on the lock. + + This version does not use IFUNC, so it means every lock has one + additional check for elision. Benchmarking showed the overhead + to be negligible. +Index: glibc-2.17-c758a686/nptl/pthreadP.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthreadP.h ++++ glibc-2.17-c758a686/nptl/pthreadP.h +@@ -110,8 +110,10 @@ enum + + #define PTHREAD_MUTEX_TYPE(m) \ + ((m)->__data.__kind & 127) ++/* Don't include NO_ELISION, as that type is always the same ++ as the underlying lock type. */ + #define PTHREAD_MUTEX_TYPE_ELISION(m) \ +- ((m)->__data.__kind & (127|PTHREAD_MUTEX_ELISION_FLAGS_NP)) ++ ((m)->__data.__kind & (127|PTHREAD_MUTEX_ELISION_NP)) + + #if LLL_PRIVATE == 0 && LLL_SHARED == 128 + # define PTHREAD_MUTEX_PSHARED(m) \ +Index: glibc-2.17-c758a686/nptl/pthread_mutex_lock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthread_mutex_lock.c ++++ glibc-2.17-c758a686/nptl/pthread_mutex_lock.c +@@ -25,6 +25,14 @@ + #include + #include + ++#ifndef lll_lock_elision ++#define lll_lock_elision(lock, try_lock, private) ({ \ ++ lll_lock (lock, private); 0; }) ++#endif ++ ++#ifndef lll_trylock_elision ++#define lll_trylock_elision(a,t) lll_trylock(a) ++#endif + + #ifndef LLL_MUTEX_LOCK + # define LLL_MUTEX_LOCK(mutex) \ +@@ -34,39 +42,60 @@ + # define LLL_ROBUST_MUTEX_LOCK(mutex, id) \ + lll_robust_lock ((mutex)->__data.__lock, id, \ + PTHREAD_ROBUST_MUTEX_PSHARED (mutex)) ++# define LLL_MUTEX_LOCK_ELISION(mutex) \ ++ lll_lock_elision ((mutex)->__data.__lock, (mutex)->__data.__elision, \ ++ PTHREAD_MUTEX_PSHARED (mutex)) ++# define LLL_MUTEX_TRYLOCK_ELISION(mutex) \ ++ lll_trylock_elision((mutex)->__data.__lock, (mutex)->__data.__elision, \ ++ PTHREAD_MUTEX_PSHARED (mutex)) + #endif + ++#ifndef FORCE_ELISION ++#define FORCE_ELISION(m, s) ++#endif + + static int __pthread_mutex_lock_full (pthread_mutex_t *mutex) + __attribute_noinline__; + +- + int + __pthread_mutex_lock (mutex) + pthread_mutex_t *mutex; + { + assert (sizeof (mutex->__size) >= sizeof (mutex->__data)); + +- unsigned int type = PTHREAD_MUTEX_TYPE (mutex); ++ unsigned int type = PTHREAD_MUTEX_TYPE_ELISION (mutex); + + LIBC_PROBE (mutex_entry, 1, mutex); + +- if (__builtin_expect (type & ~PTHREAD_MUTEX_KIND_MASK_NP, 0)) ++ if (__builtin_expect (type & ~(PTHREAD_MUTEX_KIND_MASK_NP ++ | PTHREAD_MUTEX_ELISION_FLAGS_NP), 0)) + return __pthread_mutex_lock_full (mutex); + +- pid_t id = THREAD_GETMEM (THREAD_SELF, tid); +- +- if (__builtin_expect (type, PTHREAD_MUTEX_TIMED_NP) +- == PTHREAD_MUTEX_TIMED_NP) ++ if (__builtin_expect (type == PTHREAD_MUTEX_TIMED_NP, 1)) + { ++ FORCE_ELISION (mutex, goto elision); + simple: + /* Normal mutex. */ + LLL_MUTEX_LOCK (mutex); + assert (mutex->__data.__owner == 0); + } +- else if (__builtin_expect (type == PTHREAD_MUTEX_RECURSIVE_NP, 1)) ++#ifdef HAVE_ELISION ++ else if (__builtin_expect (type == PTHREAD_MUTEX_TIMED_ELISION_NP, 1)) ++ { ++ elision: __attribute__((unused)) ++ /* This case can never happen on a system without elision, ++ as the mutex type initialization functions will not ++ allow to set the elision flags. */ ++ /* Don't record owner or users for elision case. This is a ++ tail call. */ ++ return LLL_MUTEX_LOCK_ELISION (mutex); ++ } ++#endif ++ else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex) ++ == PTHREAD_MUTEX_RECURSIVE_NP, 1)) + { + /* Recursive mutex. */ ++ pid_t id = THREAD_GETMEM (THREAD_SELF, tid); + + /* Check whether we already hold the mutex. */ + if (mutex->__data.__owner == id) +@@ -87,7 +116,8 @@ __pthread_mutex_lock (mutex) + assert (mutex->__data.__owner == 0); + mutex->__data.__count = 1; + } +- else if (__builtin_expect (type == PTHREAD_MUTEX_ADAPTIVE_NP, 1)) ++ else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex) ++ == PTHREAD_MUTEX_ADAPTIVE_NP, 1)) + { + if (! __is_smp) + goto simple; +@@ -117,13 +147,16 @@ __pthread_mutex_lock (mutex) + } + else + { +- assert (type == PTHREAD_MUTEX_ERRORCHECK_NP); ++ pid_t id = THREAD_GETMEM (THREAD_SELF, tid); ++ assert (PTHREAD_MUTEX_TYPE (mutex) == PTHREAD_MUTEX_ERRORCHECK_NP); + /* Check whether we already hold the mutex. */ + if (__builtin_expect (mutex->__data.__owner == id, 0)) + return EDEADLK; + goto simple; + } + ++ pid_t id = THREAD_GETMEM (THREAD_SELF, tid); ++ + /* Record the ownership. */ + mutex->__data.__owner = id; + #ifndef NO_INCR +Index: glibc-2.17-c758a686/nptl/pthread_mutex_timedlock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthread_mutex_timedlock.c ++++ glibc-2.17-c758a686/nptl/pthread_mutex_timedlock.c +@@ -25,6 +25,17 @@ + + #include + ++#ifndef lll_timedlock_elision ++#define lll_timedlock_elision(a,dummy,b,c) lll_timedlock(a, b, c) ++#endif ++ ++#ifndef lll_trylock_elision ++#define lll_trylock_elision(a,t) lll_trylock(a) ++#endif ++ ++#ifndef FORCE_ELISION ++#define FORCE_ELISION(m, s) ++#endif + + int + pthread_mutex_timedlock (mutex, abstime) +@@ -40,10 +51,11 @@ pthread_mutex_timedlock (mutex, abstime) + /* We must not check ABSTIME here. If the thread does not block + abstime must not be checked for a valid value. */ + +- switch (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex), ++ switch (__builtin_expect (PTHREAD_MUTEX_TYPE_ELISION (mutex), + PTHREAD_MUTEX_TIMED_NP)) + { + /* Recursive mutex. */ ++ case PTHREAD_MUTEX_RECURSIVE_NP|PTHREAD_MUTEX_ELISION_NP: + case PTHREAD_MUTEX_RECURSIVE_NP: + /* Check whether we already hold the mutex. */ + if (mutex->__data.__owner == id) +@@ -78,12 +90,22 @@ pthread_mutex_timedlock (mutex, abstime) + /* FALLTHROUGH */ + + case PTHREAD_MUTEX_TIMED_NP: ++ FORCE_ELISION (mutex, goto elision); + simple: + /* Normal mutex. */ + result = lll_timedlock (mutex->__data.__lock, abstime, + PTHREAD_MUTEX_PSHARED (mutex)); + break; + ++ case PTHREAD_MUTEX_TIMED_ELISION_NP: ++ elision: __attribute__((unused)) ++ /* Don't record ownership */ ++ return lll_timedlock_elision (mutex->__data.__lock, ++ mutex->__data.__spins, ++ abstime, ++ PTHREAD_MUTEX_PSHARED (mutex)); ++ ++ + case PTHREAD_MUTEX_ADAPTIVE_NP: + if (! __is_smp) + goto simple; +Index: glibc-2.17-c758a686/nptl/pthread_mutex_trylock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthread_mutex_trylock.c ++++ glibc-2.17-c758a686/nptl/pthread_mutex_trylock.c +@@ -22,6 +22,16 @@ + #include "pthreadP.h" + #include + ++#ifndef lll_trylock_elision ++#define lll_trylock_elision(a,t) lll_trylock(a) ++#endif ++ ++#ifndef DO_ELISION ++#define DO_ELISION(m) 0 ++#endif ++ ++/* We don't force elision in trylock, because this can lead to inconsistent ++ lock state if the lock was actually busy. */ + + int + __pthread_mutex_trylock (mutex) +@@ -30,10 +40,11 @@ __pthread_mutex_trylock (mutex) + int oldval; + pid_t id = THREAD_GETMEM (THREAD_SELF, tid); + +- switch (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex), ++ switch (__builtin_expect (PTHREAD_MUTEX_TYPE_ELISION (mutex), + PTHREAD_MUTEX_TIMED_NP)) + { + /* Recursive mutex. */ ++ case PTHREAD_MUTEX_RECURSIVE_NP|PTHREAD_MUTEX_ELISION_NP: + case PTHREAD_MUTEX_RECURSIVE_NP: + /* Check whether we already hold the mutex. */ + if (mutex->__data.__owner == id) +@@ -57,10 +68,20 @@ __pthread_mutex_trylock (mutex) + } + break; + +- case PTHREAD_MUTEX_ERRORCHECK_NP: ++ case PTHREAD_MUTEX_TIMED_ELISION_NP: ++ elision: ++ if (lll_trylock_elision (mutex->__data.__lock, ++ mutex->__data.__elision) != 0) ++ break; ++ /* Don't record the ownership. */ ++ return 0; ++ + case PTHREAD_MUTEX_TIMED_NP: ++ if (DO_ELISION (mutex)) ++ goto elision; ++ /*FALL THROUGH*/ + case PTHREAD_MUTEX_ADAPTIVE_NP: +- /* Normal mutex. */ ++ case PTHREAD_MUTEX_ERRORCHECK_NP: + if (lll_trylock (mutex->__data.__lock) != 0) + break; + +@@ -378,4 +399,9 @@ __pthread_mutex_trylock (mutex) + + return EBUSY; + } ++ ++#ifndef __pthread_mutex_trylock ++#ifndef pthread_mutex_trylock + strong_alias (__pthread_mutex_trylock, pthread_mutex_trylock) ++#endif ++#endif +Index: glibc-2.17-c758a686/nptl/pthread_mutex_unlock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthread_mutex_unlock.c ++++ glibc-2.17-c758a686/nptl/pthread_mutex_unlock.c +@@ -23,6 +23,10 @@ + #include + #include + ++#ifndef lll_unlock_elision ++#define lll_unlock_elision(a,b) ({ lll_unlock (a,b); 0; }) ++#endif ++ + static int + internal_function + __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr) +@@ -34,8 +38,9 @@ __pthread_mutex_unlock_usercnt (mutex, d + pthread_mutex_t *mutex; + int decr; + { +- int type = PTHREAD_MUTEX_TYPE (mutex); +- if (__builtin_expect (type & ~PTHREAD_MUTEX_KIND_MASK_NP, 0)) ++ int type = PTHREAD_MUTEX_TYPE_ELISION (mutex); ++ if (__builtin_expect (type & ++ ~(PTHREAD_MUTEX_KIND_MASK_NP|PTHREAD_MUTEX_ELISION_FLAGS_NP), 0)) + return __pthread_mutex_unlock_full (mutex, decr); + + if (__builtin_expect (type, PTHREAD_MUTEX_TIMED_NP) +@@ -55,7 +60,14 @@ __pthread_mutex_unlock_usercnt (mutex, d + + return 0; + } +- else if (__builtin_expect (type == PTHREAD_MUTEX_RECURSIVE_NP, 1)) ++ else if (__builtin_expect (type == PTHREAD_MUTEX_TIMED_ELISION_NP, 1)) ++ { ++ /* Don't reset the owner/users fields for elision. */ ++ return lll_unlock_elision (mutex->__data.__lock, ++ PTHREAD_MUTEX_PSHARED (mutex)); ++ } ++ else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex) ++ == PTHREAD_MUTEX_RECURSIVE_NP, 1)) + { + /* Recursive mutex. */ + if (mutex->__data.__owner != THREAD_GETMEM (THREAD_SELF, tid)) +@@ -66,7 +78,8 @@ __pthread_mutex_unlock_usercnt (mutex, d + return 0; + goto normal; + } +- else if (__builtin_expect (type == PTHREAD_MUTEX_ADAPTIVE_NP, 1)) ++ else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex) ++ == PTHREAD_MUTEX_ADAPTIVE_NP, 1)) + goto normal; + else + { +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c +@@ -2,8 +2,15 @@ + + #define LLL_MUTEX_LOCK(mutex) \ + lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)) ++ ++/* Not actually elided so far. Needed? */ ++#define LLL_MUTEX_LOCK_ELISION(mutex) \ ++ ({ lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)); 0; }) ++ + #define LLL_MUTEX_TRYLOCK(mutex) \ + lll_cond_trylock ((mutex)->__data.__lock) ++#define LLL_MUTEX_TRYLOCK_ELISION(mutex) LLL_MUTEX_TRYLOCK(mutex) ++ + #define LLL_ROBUST_MUTEX_LOCK(mutex, id) \ + lll_robust_cond_lock ((mutex)->__data.__lock, id, \ + PTHREAD_ROBUST_MUTEX_PSHARED (mutex)) +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h +@@ -101,14 +101,23 @@ typedef union + binary compatibility. */ + int __kind; + #ifdef __x86_64__ +- int __spins; ++ short __spins; ++ short __elision; + __pthread_list_t __list; + # define __PTHREAD_MUTEX_HAVE_PREV 1 ++# define __PTHREAD_MUTEX_HAVE_ELISION 1 + #else + unsigned int __nusers; + __extension__ union + { +- int __spins; ++ struct ++ { ++ short __espins; ++ short __elision; ++# define __spins d.__espins ++# define __elision d.__elision ++# define __PTHREAD_MUTEX_HAVE_ELISION 2 ++ } d; + __pthread_slist_t __list; + }; + #endif +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c +@@ -24,9 +24,8 @@ + + #define aconf __elision_aconf + +-/* Try to elide a futex trylock. FUTEX is the futex variable. TRY_LOCK is the +- adaptation counter in the mutex. UPGRADED is != 0 when this is for an +- automatically upgraded lock. */ ++/* Try to elide a futex trylock. FUTEX is the futex variable. ADAPT_COUNT is the ++ adaptation counter in the mutex. */ + + int + __lll_trylock_elision (int *futex, short *adapt_count) +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/force-elision.h +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/force-elision.h +@@ -0,0 +1,31 @@ ++/* force-elision.h: Automatic enabling of elision for mutexes ++ Copyright (C) 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Check for elision on this lock without upgrading. */ ++#define DO_ELISION(m) \ ++ (__pthread_force_elision \ ++ && (m->__data.__kind & PTHREAD_MUTEX_NO_ELISION_NP) == 0) \ ++ ++/* Automatically enable elision for existing user lock kinds. */ ++#define FORCE_ELISION(m, s) \ ++ if (__pthread_force_elision \ ++ && (m->__data.__kind & PTHREAD_MUTEX_ELISION_FLAGS_NP) == 0) \ ++ { \ ++ mutex->__data.__kind |= PTHREAD_MUTEX_ELISION_NP; \ ++ s; \ ++ } +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_cond_lock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_cond_lock.c +@@ -0,0 +1,21 @@ ++/* Copyright (C) 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* The cond lock is not actually elided yet, but we still need to handle ++ already elided locks. */ ++#include ++#include "sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c" +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_lock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_lock.c +@@ -0,0 +1,21 @@ ++/* Elided version of pthread_mutex_lock. ++ Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++#include ++#include "force-elision.h" ++ ++#include "nptl/pthread_mutex_lock.c" +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_timedlock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_timedlock.c +@@ -0,0 +1,20 @@ ++/* Elided version of pthread_mutex_timedlock. ++ Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++#include ++#include "force-elision.h" ++#include "nptl/pthread_mutex_timedlock.c" +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_trylock.c +=================================================================== +--- /dev/null ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_trylock.c +@@ -0,0 +1,21 @@ ++/* Elided version of pthread_mutex_trylock. ++ Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++#include ++#include "force-elision.h" ++ ++#include "nptl/pthread_mutex_trylock.c" diff --git a/SOURCES/glibc-rh841653-4.patch b/SOURCES/glibc-rh841653-4.patch new file mode 100644 index 0000000..0477254 --- /dev/null +++ b/SOURCES/glibc-rh841653-4.patch @@ -0,0 +1,29 @@ +commit 49186d21ef2d87986bccaf0a7c45c48c91b265f3 +Author: Andi Kleen +Date: Thu Jun 27 11:15:06 2013 -0700 + + Disable elision for any pthread_mutexattr_settype call + + PTHREAD_MUTEX_NORMAL requires deadlock for nesting, DEFAULT + does not. Since glibc uses the same value (0) disable elision + for any call to pthread_mutexattr_settype() with a 0 value. + This implies that a program can disable elision by doing + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL) + + Based on a original proposal by Rich Felker. +Index: glibc-2.17-c758a686/nptl/pthread_mutexattr_settype.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthread_mutexattr_settype.c ++++ glibc-2.17-c758a686/nptl/pthread_mutexattr_settype.c +@@ -30,6 +30,11 @@ __pthread_mutexattr_settype (attr, kind) + if (kind < PTHREAD_MUTEX_NORMAL || kind > PTHREAD_MUTEX_ADAPTIVE_NP) + return EINVAL; + ++ /* Cannot distinguish between DEFAULT and NORMAL. So any settype ++ call disables elision for now. */ ++ if (kind == PTHREAD_MUTEX_DEFAULT) ++ kind |= PTHREAD_MUTEX_NO_ELISION_NP; ++ + iattr = (struct pthread_mutexattr *) attr; + + iattr->mutexkind = (iattr->mutexkind & PTHREAD_MUTEXATTR_FLAG_BITS) | kind; diff --git a/SOURCES/glibc-rh841653-5.patch b/SOURCES/glibc-rh841653-5.patch new file mode 100644 index 0000000..32ed90b --- /dev/null +++ b/SOURCES/glibc-rh841653-5.patch @@ -0,0 +1,45 @@ +commit 52dfbe137e41f2da1f5584f6dd9ea89589c71228 +Author: Siddhesh Poyarekar +Date: Thu Jul 4 20:33:03 2013 +0530 + + Fix lock elision help text in INSTALL and configure +Index: glibc-2.17-c758a686/INSTALL +=================================================================== +--- glibc-2.17-c758a686.orig/INSTALL ++++ glibc-2.17-c758a686/INSTALL +@@ -141,7 +141,7 @@ will be used, and CFLAGS sets optimizati + only if you understand and accept those risks. + + `--enable-lock-elision=yes' +- Enable lock elision for pthread mutexes and rwlocks by default. ++ Enable lock elision for pthread mutexes by default. + + `--build=BUILD-SYSTEM' + `--host=HOST-SYSTEM' +Index: glibc-2.17-c758a686/configure +=================================================================== +--- glibc-2.17-c758a686.orig/configure ++++ glibc-2.17-c758a686/configure +@@ -1407,8 +1407,7 @@ Optional Features: + initialize __stack_chk_guard canary with a random + number at program start + --enable-lock-elision=yes/no +- Enable lock elision for pthread mutexes and rwlocks +- by default ++ Enable lock elision for pthread mutexes by default + --enable-add-ons[=DIRS...] + configure and build add-ons in DIR1,DIR2,... search + for add-ons if no parameter given +Index: glibc-2.17-c758a686/configure.in +=================================================================== +--- glibc-2.17-c758a686.orig/configure.in ++++ glibc-2.17-c758a686/configure.in +@@ -157,7 +157,7 @@ fi + + AC_ARG_ENABLE([lock-elision], + AC_HELP_STRING([--enable-lock-elision[=yes/no]], +- [Enable lock elision for pthread mutexes and rwlocks by default]), ++ [Enable lock elision for pthread mutexes by default]), + [enable_lock_elision=$enableval], + [enable_lock_elision=no]) + if test "$enable_lock_elision" = yes ; then diff --git a/SOURCES/glibc-rh841653-6.patch b/SOURCES/glibc-rh841653-6.patch new file mode 100644 index 0000000..047c17c --- /dev/null +++ b/SOURCES/glibc-rh841653-6.patch @@ -0,0 +1,307 @@ +commit 075b9322c9e091b7e139f4c57e07d78d896c7a62 +Author: Dominik Vogt +Date: Fri Jul 19 05:16:28 2013 +0000 + + Clean up whitespace in lock elision patches. + + Signed-off-by: Carlos O'Donell +Index: glibc-2.17-c758a686/nptl/pthread_mutex_lock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthread_mutex_lock.c ++++ glibc-2.17-c758a686/nptl/pthread_mutex_lock.c +@@ -86,7 +86,7 @@ __pthread_mutex_lock (mutex) + /* This case can never happen on a system without elision, + as the mutex type initialization functions will not + allow to set the elision flags. */ +- /* Don't record owner or users for elision case. This is a ++ /* Don't record owner or users for elision case. This is a + tail call. */ + return LLL_MUTEX_LOCK_ELISION (mutex); + } +Index: glibc-2.17-c758a686/nptl/pthread_mutex_trylock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthread_mutex_trylock.c ++++ glibc-2.17-c758a686/nptl/pthread_mutex_trylock.c +@@ -31,7 +31,7 @@ + #endif + + /* We don't force elision in trylock, because this can lead to inconsistent +- lock state if the lock was actually busy. */ ++ lock state if the lock was actually busy. */ + + int + __pthread_mutex_trylock (mutex) +@@ -73,7 +73,7 @@ __pthread_mutex_trylock (mutex) + if (lll_trylock_elision (mutex->__data.__lock, + mutex->__data.__elision) != 0) + break; +- /* Don't record the ownership. */ ++ /* Don't record the ownership. */ + return 0; + + case PTHREAD_MUTEX_TIMED_NP: +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h +@@ -112,7 +112,7 @@ typedef union + { + struct + { +- short __espins; ++ short __espins; + short __elision; + # define __spins d.__espins + # define __elision d.__elision +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c +@@ -14,7 +14,7 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see +- . */ ++ . */ + + #include "config.h" + #include +@@ -32,7 +32,7 @@ struct elision_config __elision_aconf = + acquisition attempts. */ + .skip_lock_busy = 3, + /* How often to not attempt to use elision if a transaction aborted due +- to reasons other than other threads' memory accesses. Expressed in ++ to reasons other than other threads' memory accesses. Expressed in + number of lock acquisition attempts. */ + .skip_lock_internal_abort = 3, + /* How often we retry using elision if there is chance for the transaction +@@ -52,13 +52,14 @@ int __rwlock_rtm_enabled attribute_hidde + + int __rwlock_rtm_read_retries attribute_hidden = 3; + +-/* Set when the CPU supports elision. When false elision is never attempted. */ ++/* Set when the CPU supports elision. When false elision is never attempted. ++ */ + + int __elision_available attribute_hidden; + +-/* Force elision for all new locks. This is used to decide whether existing ++/* Force elision for all new locks. This is used to decide whether existing + DEFAULT locks should be automatically upgraded to elision in +- pthread_mutex_lock(). Disabled for suid programs. Only used when elision ++ pthread_mutex_lock(). Disabled for suid programs. Only used when elision + is available. */ + + int __pthread_force_elision attribute_hidden; +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.h +@@ -14,7 +14,7 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see +- . */ ++ . */ + #ifndef _ELISION_CONF_H + #define _ELISION_CONF_H 1 + +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c +@@ -14,7 +14,7 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see +- . */ ++ . */ + + #include + #include "pthreadP.h" +@@ -58,7 +58,7 @@ __lll_lock_elision (int *futex, short *a + if (*futex == 0) + return 0; + +- /* Lock was busy. Fall back to normal locking. ++ /* Lock was busy. Fall back to normal locking. + Could also _xend here but xabort with 0xff code + is more visible in the profiler. */ + _xabort (_ABORT_LOCK_BUSY); +@@ -69,12 +69,12 @@ __lll_lock_elision (int *futex, short *a + if ((status & _XABORT_EXPLICIT) + && _XABORT_CODE (status) == _ABORT_LOCK_BUSY) + { +- /* Right now we skip here. Better would be to wait a bit +- and retry. This likely needs some spinning. */ ++ /* Right now we skip here. Better would be to wait a bit ++ and retry. This likely needs some spinning. */ + if (*adapt_count != aconf.skip_lock_busy) + *adapt_count = aconf.skip_lock_busy; + } +- /* Internal abort. There is no chance for retry. ++ /* Internal abort. There is no chance for retry. + Use the normal locking and next time use lock. + Be careful to avoid writing to the lock. */ + else if (*adapt_count != aconf.skip_lock_internal_abort) +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-timed.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-timed.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-timed.c +@@ -14,7 +14,7 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see +- . */ ++ . */ + + #include + #include +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c +@@ -14,7 +14,7 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see +- . */ ++ . */ + + #include + #include +@@ -24,14 +24,14 @@ + + #define aconf __elision_aconf + +-/* Try to elide a futex trylock. FUTEX is the futex variable. ADAPT_COUNT is the +- adaptation counter in the mutex. */ ++/* Try to elide a futex trylock. FUTEX is the futex variable. ADAPT_COUNT is ++ the adaptation counter in the mutex. */ + + int + __lll_trylock_elision (int *futex, short *adapt_count) + { + /* Implement POSIX semantics by forbiding nesting +- trylock. Sorry. After the abort the code is re-executed ++ trylock. Sorry. After the abort the code is re-executed + non transactional and if the lock was already locked + return an error. */ + _xabort (_ABORT_NESTED_TRYLOCK); +@@ -46,7 +46,7 @@ __lll_trylock_elision (int *futex, short + if (*futex == 0) + return 0; + +- /* Lock was busy. Fall back to normal locking. ++ /* Lock was busy. Fall back to normal locking. + Could also _xend here but xabort with 0xff code + is more visible in the profiler. */ + _xabort (_ABORT_LOCK_BUSY); +@@ -54,12 +54,12 @@ __lll_trylock_elision (int *futex, short + + if (!(status & _XABORT_RETRY)) + { +- /* Internal abort. No chance for retry. For future ++ /* Internal abort. No chance for retry. For future + locks don't try speculation for some time. */ + if (*adapt_count != aconf.skip_trylock_internal_abort) + *adapt_count = aconf.skip_trylock_internal_abort; + } +- /* Could do some retries here. */ ++ /* Could do some retries here. */ + } + else + { +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/force-elision.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/force-elision.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/force-elision.h +@@ -14,7 +14,7 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see +- . */ ++ . */ + + /* Check for elision on this lock without upgrading. */ + #define DO_ELISION(m) \ +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/hle.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/hle.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/hle.h +@@ -1,5 +1,5 @@ +-/* Shared RTM header. Emulate TSX intrinsics for compilers and assemblers +- that do not support the intrinsics and instructions yet. */ ++/* Shared RTM header. Emulate TSX intrinsics for compilers and assemblers ++ that do not support the intrinsics and instructions yet. */ + #ifndef _HLE_H + #define _HLE_H 1 + +@@ -28,7 +28,7 @@ + /* Official RTM intrinsics interface matching gcc/icc, but works + on older gcc compatible compilers and binutils. + We should somehow detect if the compiler supports it, because +- it may be able to generate slightly better code. */ ++ it may be able to generate slightly better code. */ + + #define _XBEGIN_STARTED (~0u) + #define _XABORT_EXPLICIT (1 << 0) +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_cond_lock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_cond_lock.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_cond_lock.c +@@ -13,9 +13,10 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see +- . */ ++ . */ + + /* The cond lock is not actually elided yet, but we still need to handle + already elided locks. */ + #include ++ + #include "sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c" +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_lock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_lock.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_lock.c +@@ -14,7 +14,8 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see +- . */ ++ . */ ++ + #include + #include "force-elision.h" + +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_timedlock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_timedlock.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_timedlock.c +@@ -14,7 +14,9 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see +- . */ ++ . */ ++ + #include + #include "force-elision.h" ++ + #include "nptl/pthread_mutex_timedlock.c" +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_trylock.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_trylock.c ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/pthread_mutex_trylock.c +@@ -14,7 +14,8 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see +- . */ ++ . */ ++ + #include + #include "force-elision.h" + diff --git a/SOURCES/glibc-rh841653-7.patch b/SOURCES/glibc-rh841653-7.patch new file mode 100644 index 0000000..9a2c9e8 --- /dev/null +++ b/SOURCES/glibc-rh841653-7.patch @@ -0,0 +1,18 @@ +commit f8bdf1f0b623f05a80cb23890f165cb0cf8bd8c3 +Author: Stefan Liebler +Date: Mon May 26 11:12:44 2014 +0200 + + Fix typo in tst-mutex5 ifndef -> ifdef +Index: glibc-2.17-c758a686/nptl/tst-mutex5.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/tst-mutex5.c ++++ glibc-2.17-c758a686/nptl/tst-mutex5.c +@@ -87,7 +87,7 @@ do_test (void) + } + + /* Elided locks do not time out. */ +-#ifdef ENABLE_LOCK_ELISION ++#ifndef ENABLE_LOCK_ELISION + if (pthread_mutex_trylock (&m) == 0) + { + puts ("mutex_trylock succeeded"); diff --git a/SOURCES/glibc-rh841653-8.patch b/SOURCES/glibc-rh841653-8.patch new file mode 100644 index 0000000..5c1af8f --- /dev/null +++ b/SOURCES/glibc-rh841653-8.patch @@ -0,0 +1,18 @@ +commit 673659263d956f45f1ce0c66900fa7f1129db74a +Author: Stefan Liebler +Date: Mon May 26 11:14:25 2014 +0200 + + Disable lock elision for PTHREAD_MUTEX_NORMAL. +Index: glibc-2.17-c758a686/nptl/pthread_mutexattr_settype.c +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/pthread_mutexattr_settype.c ++++ glibc-2.17-c758a686/nptl/pthread_mutexattr_settype.c +@@ -32,7 +32,7 @@ __pthread_mutexattr_settype (attr, kind) + + /* Cannot distinguish between DEFAULT and NORMAL. So any settype + call disables elision for now. */ +- if (kind == PTHREAD_MUTEX_DEFAULT) ++ if (kind == PTHREAD_MUTEX_NORMAL) + kind |= PTHREAD_MUTEX_NO_ELISION_NP; + + iattr = (struct pthread_mutexattr *) attr; diff --git a/SOURCES/glibc-rh841653-9.patch b/SOURCES/glibc-rh841653-9.patch new file mode 100644 index 0000000..7a98cc4 --- /dev/null +++ b/SOURCES/glibc-rh841653-9.patch @@ -0,0 +1,26 @@ +commit 2307e1261e7ee784afd424a46ad08d3fbed33ba3 +Author: Siddhesh Poyarekar +Date: Tue Jun 24 22:40:07 2014 +0530 + + Fix namespace violation in pthreadtypes.h (BZ #17084) + + This was causing conformtest failures on i386. + +Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h +=================================================================== +--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h +@@ -114,10 +114,10 @@ typedef union + { + short __espins; + short __elision; +-# define __spins d.__espins +-# define __elision d.__elision ++# define __spins __elision_data.__espins ++# define __elision __elision_data.__elision + # define __PTHREAD_MUTEX_HAVE_ELISION 2 +- } d; ++ } __elision_data; + __pthread_slist_t __list; + }; + #endif diff --git a/SOURCES/glibc-rh906468-1.patch b/SOURCES/glibc-rh906468-1.patch new file mode 100644 index 0000000..09f7ebe --- /dev/null +++ b/SOURCES/glibc-rh906468-1.patch @@ -0,0 +1,542 @@ +Backport of these upstream commits: + +commit 29d794863cd6e03115d3670707cc873a9965ba92 +Author: Florian Weimer +Date: Thu Apr 14 09:17:02 2016 +0200 + + malloc: Run fork handler as late as possible [BZ #19431] + + Previously, a thread M invoking fork would acquire locks in this order: + + (M1) malloc arena locks (in the registered fork handler) + (M2) libio list lock + + A thread F invoking flush (NULL) would acquire locks in this order: + + (F1) libio list lock + (F2) individual _IO_FILE locks + + A thread G running getdelim would use this order: + + (G1) _IO_FILE lock + (G2) malloc arena lock + + After executing (M1), (F1), (G1), none of the threads can make progress. + + This commit changes the fork lock order to: + + (M'1) libio list lock + (M'2) malloc arena locks + + It explicitly encodes the lock order in the implementations of fork, + and does not rely on the registration order, thus avoiding the deadlock. + +commit 186fe877f3df0b84d57dfbf0386f6332c6aa69bc +Author: Florian Weimer +Date: Thu Apr 14 12:53:03 2016 +0200 + + malloc: Add missing internal_function attributes on function definitions + + Fixes build on i386 after commit 29d794863cd6e03115d3670707cc873a9965ba92. + +Index: b/malloc/Makefile +=================================================================== +--- a/malloc/Makefile ++++ b/malloc/Makefile +@@ -28,7 +28,7 @@ tests := mallocbug tst-malloc tst-valloc + tst-mallocstate tst-mcheck tst-mallocfork tst-trim1 \ + tst-malloc-usable \ + tst-malloc-backtrace tst-malloc-thread-exit \ +- tst-malloc-thread-fail ++ tst-malloc-thread-fail tst-malloc-fork-deadlock + test-srcs = tst-mtrace + + routines = malloc morecore mcheck mtrace obstack +@@ -49,6 +49,7 @@ $(objpfx)tst-malloc-thread-fail: $(commo + $(common-objpfx)nptl/libpthread_nonshared.a + $(objpfx)tst-malloc-thread-exit: $(common-objpfx)nptl/libpthread.so \ + $(common-objpfx)nptl/libpthread_nonshared.a ++$(objpfx)tst-malloc-fork-deadlock: $(shared-thread-library) + + # These should be removed by `make clean'. + extra-objs = mcheck-init.o libmcheck.a +Index: b/malloc/arena.c +=================================================================== +--- a/malloc/arena.c ++++ b/malloc/arena.c +@@ -162,10 +162,6 @@ static void (*save_free_hook) + const __malloc_ptr_t); + static void* save_arena; + +-#ifdef ATFORK_MEM +-ATFORK_MEM; +-#endif +- + /* Magic value for the thread-specific arena pointer when + malloc_atfork() is in use. */ + +@@ -228,14 +224,15 @@ free_atfork(void* mem, const void *calle + /* Counter for number of times the list is locked by the same thread. */ + static unsigned int atfork_recursive_cntr; + +-/* The following two functions are registered via thread_atfork() to +- make sure that the mutexes remain in a consistent state in the +- fork()ed version of a thread. Also adapt the malloc and free hooks +- temporarily, because the `atfork' handler mechanism may use +- malloc/free internally (e.g. in LinuxThreads). */ ++/* The following three functions are called around fork from a ++ multi-threaded process. We do not use the general fork handler ++ mechanism to make sure that our handlers are the last ones being ++ called, so that other fork handlers can use the malloc ++ subsystem. */ + +-static void +-ptmalloc_lock_all (void) ++void ++internal_function ++__malloc_fork_lock_parent (void) + { + mstate ar_ptr; + +@@ -243,7 +240,7 @@ ptmalloc_lock_all (void) + return; + + /* We do not acquire free_list_lock here because we completely +- reconstruct free_list in ptmalloc_unlock_all2. */ ++ reconstruct free_list in __malloc_fork_unlock_child. */ + + if (mutex_trylock(&list_lock)) + { +@@ -268,7 +265,7 @@ ptmalloc_lock_all (void) + __free_hook = free_atfork; + /* Only the current thread may perform malloc/free calls now. + save_arena will be reattached to the current thread, in +- ptmalloc_lock_all, so save_arena->attached_threads is not ++ __malloc_fork_lock_parent, so save_arena->attached_threads is not + updated. */ + tsd_getspecific(arena_key, save_arena); + tsd_setspecific(arena_key, ATFORK_ARENA_PTR); +@@ -276,8 +273,9 @@ ptmalloc_lock_all (void) + ++atfork_recursive_cntr; + } + +-static void +-ptmalloc_unlock_all (void) ++void ++internal_function ++__malloc_fork_unlock_parent (void) + { + mstate ar_ptr; + +@@ -286,8 +284,8 @@ ptmalloc_unlock_all (void) + if (--atfork_recursive_cntr != 0) + return; + /* Replace ATFORK_ARENA_PTR with save_arena. +- save_arena->attached_threads was not changed in ptmalloc_lock_all +- and is still correct. */ ++ save_arena->attached_threads was not changed in ++ __malloc_fork_lock_parent and is still correct. */ + tsd_setspecific(arena_key, save_arena); + __malloc_hook = save_malloc_hook; + __free_hook = save_free_hook; +@@ -299,15 +297,9 @@ ptmalloc_unlock_all (void) + (void)mutex_unlock(&list_lock); + } + +-# ifdef __linux__ +- +-/* In NPTL, unlocking a mutex in the child process after a +- fork() is currently unsafe, whereas re-initializing it is safe and +- does not leak resources. Therefore, a special atfork handler is +- installed for the child. */ +- +-static void +-ptmalloc_unlock_all2 (void) ++void ++internal_function ++__malloc_fork_unlock_child (void) + { + mstate ar_ptr; + +@@ -338,12 +330,6 @@ ptmalloc_unlock_all2 (void) + atfork_recursive_cntr = 0; + } + +-# else +- +-# define ptmalloc_unlock_all2 ptmalloc_unlock_all +- +-# endif +- + #endif /* !NO_THREADS */ + + /* Initialization routine. */ +@@ -413,7 +399,6 @@ ptmalloc_init (void) + + tsd_key_create(&arena_key, NULL); + tsd_setspecific(arena_key, (void *)&main_arena); +- thread_atfork(ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2); + const char *s = NULL; + if (__builtin_expect (_environ != NULL, 1)) + { +@@ -487,12 +472,6 @@ ptmalloc_init (void) + __malloc_initialized = 1; + } + +-/* There are platforms (e.g. Hurd) with a link-time hook mechanism. */ +-#ifdef thread_atfork_static +-thread_atfork_static(ptmalloc_lock_all, ptmalloc_unlock_all, \ +- ptmalloc_unlock_all2) +-#endif +- + + + /* Managing heaps and arenas (for concurrent threads) */ +@@ -827,7 +806,8 @@ _int_new_arena(size_t size) + limit is reached). At this point, some arena has to be attached + to two threads. We could acquire the arena lock before list_lock + to make it less likely that reused_arena picks this new arena, +- but this could result in a deadlock with ptmalloc_lock_all. */ ++ but this could result in a deadlock with ++ __malloc_fork_lock_parent. */ + + (void) mutex_lock (&a->mutex); + +Index: b/malloc/malloc-internal.h +=================================================================== +--- /dev/null ++++ b/malloc/malloc-internal.h +@@ -0,0 +1,32 @@ ++/* Internal declarations for malloc, for use within libc. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#ifndef _MALLOC_PRIVATE_H ++#define _MALLOC_PRIVATE_H ++ ++/* Called in the parent process before a fork. */ ++void __malloc_fork_lock_parent (void) internal_function attribute_hidden; ++ ++/* Called in the parent process after a fork. */ ++void __malloc_fork_unlock_parent (void) internal_function attribute_hidden; ++ ++/* Called in the child process after a fork. */ ++void __malloc_fork_unlock_child (void) internal_function attribute_hidden; ++ ++ ++#endif /* _MALLOC_PRIVATE_H */ +Index: b/malloc/malloc.c +=================================================================== +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -291,6 +291,7 @@ __malloc_assert (const char *assertion, + } + #endif + ++#include + + /* + INTERNAL_SIZE_T is the word-size used for internal bookkeeping +Index: b/malloc/tst-malloc-fork-deadlock.c +=================================================================== +--- /dev/null ++++ b/malloc/tst-malloc-fork-deadlock.c +@@ -0,0 +1,220 @@ ++/* Test concurrent fork, getline, and fflush (NULL). ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int do_test (void); ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" ++ ++enum { ++ /* Number of threads which call fork. */ ++ fork_thread_count = 4, ++ /* Number of threads which call getline (and, indirectly, ++ malloc). */ ++ read_thread_count = 8, ++}; ++ ++static bool termination_requested; ++ ++static void * ++fork_thread_function (void *closure) ++{ ++ while (!__atomic_load_n (&termination_requested, __ATOMIC_RELAXED)) ++ { ++ pid_t pid = fork (); ++ if (pid < 0) ++ { ++ printf ("error: fork: %m\n"); ++ abort (); ++ } ++ else if (pid == 0) ++ _exit (17); ++ ++ int status; ++ if (waitpid (pid, &status, 0) < 0) ++ { ++ printf ("error: waitpid: %m\n"); ++ abort (); ++ } ++ if (!WIFEXITED (status) || WEXITSTATUS (status) != 17) ++ { ++ printf ("error: waitpid returned invalid status: %d\n", status); ++ abort (); ++ } ++ } ++ return NULL; ++} ++ ++static char *file_to_read; ++ ++static void * ++read_thread_function (void *closure) ++{ ++ FILE *f = fopen (file_to_read, "r"); ++ if (f == NULL) ++ { ++ printf ("error: fopen (%s): %m\n", file_to_read); ++ abort (); ++ } ++ ++ while (!__atomic_load_n (&termination_requested, __ATOMIC_RELAXED)) ++ { ++ rewind (f); ++ char *line = NULL; ++ size_t line_allocated = 0; ++ ssize_t ret = getline (&line, &line_allocated, f); ++ if (ret < 0) ++ { ++ printf ("error: getline: %m\n"); ++ abort (); ++ } ++ free (line); ++ } ++ fclose (f); ++ ++ return NULL; ++} ++ ++static void * ++flushall_thread_function (void *closure) ++{ ++ while (!__atomic_load_n (&termination_requested, __ATOMIC_RELAXED)) ++ if (fflush (NULL) != 0) ++ { ++ printf ("error: fflush (NULL): %m\n"); ++ abort (); ++ } ++ return NULL; ++} ++ ++static void ++create_threads (pthread_t *threads, size_t count, void *(*func) (void *)) ++{ ++ for (size_t i = 0; i < count; ++i) ++ { ++ int ret = pthread_create (threads + i, NULL, func, NULL); ++ if (ret != 0) ++ { ++ errno = ret; ++ printf ("error: pthread_create: %m\n"); ++ abort (); ++ } ++ } ++} ++ ++static void ++join_threads (pthread_t *threads, size_t count) ++{ ++ for (size_t i = 0; i < count; ++i) ++ { ++ int ret = pthread_join (threads[i], NULL); ++ if (ret != 0) ++ { ++ errno = ret; ++ printf ("error: pthread_join: %m\n"); ++ abort (); ++ } ++ } ++} ++ ++/* Create a file which consists of a single long line, and assigns ++ file_to_read. The hope is that this triggers an allocation in ++ getline which needs a lock. */ ++static void ++create_file_with_large_line (void) ++{ ++ int fd = create_temp_file ("bug19431-large-line", &file_to_read); ++ if (fd < 0) ++ { ++ printf ("error: create_temp_file: %m\n"); ++ abort (); ++ } ++ FILE *f = fdopen (fd, "w+"); ++ if (f == NULL) ++ { ++ printf ("error: fdopen: %m\n"); ++ abort (); ++ } ++ for (int i = 0; i < 50000; ++i) ++ fputc ('x', f); ++ fputc ('\n', f); ++ if (ferror (f)) ++ { ++ printf ("error: fputc: %m\n"); ++ abort (); ++ } ++ if (fclose (f) != 0) ++ { ++ printf ("error: fclose: %m\n"); ++ abort (); ++ } ++} ++ ++static int ++do_test (void) ++{ ++ /* Make sure that we do not exceed the arena limit with the number ++ of threads we configured. */ ++ if (mallopt (M_ARENA_MAX, 400) == 0) ++ { ++ printf ("error: mallopt (M_ARENA_MAX) failed\n"); ++ return 1; ++ } ++ ++ /* Leave some room for shutting down all threads gracefully. */ ++ int timeout = 3; ++ if (timeout > TIMEOUT) ++ timeout = TIMEOUT - 1; ++ ++ create_file_with_large_line (); ++ ++ pthread_t fork_threads[fork_thread_count]; ++ create_threads (fork_threads, fork_thread_count, fork_thread_function); ++ pthread_t read_threads[read_thread_count]; ++ create_threads (read_threads, read_thread_count, read_thread_function); ++ pthread_t flushall_threads[1]; ++ create_threads (flushall_threads, 1, flushall_thread_function); ++ ++ struct timespec ts = {timeout, 0}; ++ if (nanosleep (&ts, NULL)) ++ { ++ printf ("error: error: nanosleep: %m\n"); ++ abort (); ++ } ++ ++ __atomic_store_n (&termination_requested, true, __ATOMIC_RELAXED); ++ ++ join_threads (flushall_threads, 1); ++ join_threads (read_threads, read_thread_count); ++ join_threads (fork_threads, fork_thread_count); ++ ++ free (file_to_read); ++ ++ return 0; ++} +Index: b/manual/memory.texi +=================================================================== +--- a/manual/memory.texi ++++ b/manual/memory.texi +@@ -1055,14 +1055,6 @@ systems that do not support @w{ISO C11}. + @c _dl_addr_inside_object ok + @c determine_info ok + @c __rtld_lock_unlock_recursive (dl_load_lock) @aculock +-@c thread_atfork @asulock @aculock @acsfd @acsmem +-@c __register_atfork @asulock @aculock @acsfd @acsmem +-@c lll_lock (__fork_lock) @asulock @aculock +-@c fork_handler_alloc @asulock @aculock @acsfd @acsmem +-@c calloc dup @asulock @aculock @acsfd @acsmem +-@c __linkin_atfork ok +-@c catomic_compare_and_exchange_bool_acq ok +-@c lll_unlock (__fork_lock) @aculock + @c *_environ @mtsenv + @c next_env_entry ok + @c strcspn dup ok +Index: b/nptl/sysdeps/unix/sysv/linux/fork.c +=================================================================== +--- a/nptl/sysdeps/unix/sysv/linux/fork.c ++++ b/nptl/sysdeps/unix/sysv/linux/fork.c +@@ -29,7 +29,7 @@ + #include + #include + #include +- ++#include + + unsigned long int *__fork_generation_pointer; + +@@ -116,6 +116,11 @@ __libc_fork (void) + + _IO_list_lock (); + ++ /* Acquire malloc locks. This needs to come last because fork ++ handlers may use malloc, and the libio list lock has an indirect ++ malloc dependency as well (via the getdelim function). */ ++ __malloc_fork_lock_parent (); ++ + #ifndef NDEBUG + pid_t ppid = THREAD_GETMEM (THREAD_SELF, tid); + #endif +@@ -172,6 +177,9 @@ __libc_fork (void) + # endif + #endif + ++ /* Release malloc locks. */ ++ __malloc_fork_unlock_child (); ++ + /* Reset the file list. These are recursive mutexes. */ + fresetlockfiles (); + +@@ -213,6 +221,9 @@ __libc_fork (void) + /* Restore the PID value. */ + THREAD_SETMEM (THREAD_SELF, pid, parentpid); + ++ /* Release malloc locks, parent process variant. */ ++ __malloc_fork_unlock_parent (); ++ + /* We execute this even if the 'fork' call failed. */ + _IO_list_unlock (); + diff --git a/SOURCES/glibc-rh906468-2.patch b/SOURCES/glibc-rh906468-2.patch new file mode 100644 index 0000000..cbe4cb8 --- /dev/null +++ b/SOURCES/glibc-rh906468-2.patch @@ -0,0 +1,811 @@ +Based on the following upstream commit: + +commit ef4f97648dc95849e417dd3e6328165de4c22185 +Author: Florian Weimer +Date: Fri Aug 26 22:40:27 2016 +0200 + + malloc: Simplify static malloc interposition [BZ #20432] + + Existing interposed mallocs do not define the glibc-internal + fork callbacks (and they should not), so statically interposed + mallocs lead to link failures because the strong reference from + fork pulls in glibc's malloc, resulting in multiple definitions + of malloc-related symbols. + +Adjustments: __libc_memalign is defined. is +included because libsupport provides definitions of xthread_* +functions, but not our version of the test skeleton. + +Index: b/include/libc-symbols.h +=================================================================== +--- a/include/libc-symbols.h ++++ b/include/libc-symbols.h +@@ -119,6 +119,21 @@ + # define weak_extern(symbol) _weak_extern (weak symbol) + # define _weak_extern(expr) _Pragma (#expr) + ++/* In shared builds, the expression call_function_static_weak ++ (FUNCTION-SYMBOL, ARGUMENTS) invokes FUNCTION-SYMBOL (an ++ identifier) unconditionally, with the (potentially empty) argument ++ list ARGUMENTS. In static builds, if FUNCTION-SYMBOL has a ++ definition, the function is invoked as before; if FUNCTION-SYMBOL ++ is NULL, no call is performed. */ ++# ifdef SHARED ++# define call_function_static_weak(func, ...) func (__VA_ARGS__) ++# else /* !SHARED */ ++# define call_function_static_weak(func, ...) \ ++ ({ \ ++ extern __typeof__ (func) func weak_function; \ ++ (func != NULL ? func (__VA_ARGS__) : (void)0); \ ++ }) ++# endif + + #else /* __ASSEMBLER__ */ + +Index: b/malloc/Makefile +=================================================================== +--- a/malloc/Makefile ++++ b/malloc/Makefile +@@ -28,7 +28,16 @@ tests := mallocbug tst-malloc tst-valloc + tst-mallocstate tst-mcheck tst-mallocfork tst-trim1 \ + tst-malloc-usable \ + tst-malloc-backtrace tst-malloc-thread-exit \ +- tst-malloc-thread-fail tst-malloc-fork-deadlock ++ tst-malloc-thread-fail tst-malloc-fork-deadlock \ ++ tst-interpose-nothread \ ++ tst-interpose-thread \ ++ tst-interpose-static-nothread \ ++ tst-interpose-static-thread \ ++ ++tests-static := \ ++ tst-interpose-static-nothread \ ++ tst-interpose-static-thread \ ++ + test-srcs = tst-mtrace + + routines = malloc morecore mcheck mtrace obstack +@@ -40,6 +49,15 @@ non-lib.a := libmcheck.a + extra-libs = libmemusage + extra-libs-others = $(extra-libs) + ++# Helper objects for some tests. ++extra-tests-objs += \ ++ tst-interpose-aux-nothread.o \ ++ tst-interpose-aux-thread.o \ ++ ++test-extras = \ ++ tst-interpose-aux-nothread \ ++ tst-interpose-aux-thread \ ++ + libmemusage-routines = memusage + libmemusage-inhibit-o = $(filter-out .os,$(object-suffixes)) + +@@ -157,6 +175,17 @@ $(objpfx)libmemusage.so: $(common-objpfx + # Extra dependencies + $(foreach o,$(all-object-suffixes),$(objpfx)malloc$(o)): arena.c hooks.c + ++# max_align_t requires C11. ++CFLAGS-tst-interpose-aux-nothread.c = -std=gnu11 ++CFLAGS-tst-interpose-aux-thread.c = -std=gnu11 ++ ++$(objpfx)tst-interpose-nothread: $(objpfx)tst-interpose-aux-nothread.o ++$(objpfx)tst-interpose-thread: \ ++ $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library) ++$(objpfx)tst-interpose-static-nothread: $(objpfx)tst-interpose-aux-nothread.o ++$(objpfx)tst-interpose-static-thread: \ ++ $(objpfx)tst-interpose-aux-thread.o $(static-thread-library) ++ + # Compile the tests with a flag which suppresses the mallopt call in + # the test skeleton. + $(tests:%=$(objpfx)%.o): CPPFLAGS += -DTEST_NO_MALLOPT +Index: b/malloc/tst-interpose-aux-nothread.c +=================================================================== +--- /dev/null ++++ b/malloc/tst-interpose-aux-nothread.c +@@ -0,0 +1,20 @@ ++/* Interposed malloc, version without threading support. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#define INTERPOSE_THREADS 0 ++#include "tst-interpose-aux.c" +Index: b/malloc/tst-interpose-aux-thread.c +=================================================================== +--- /dev/null ++++ b/malloc/tst-interpose-aux-thread.c +@@ -0,0 +1,20 @@ ++/* Interposed malloc, version with threading support. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#define INTERPOSE_THREADS 1 ++#include "tst-interpose-aux.c" +Index: b/malloc/tst-interpose-aux.c +=================================================================== +--- /dev/null ++++ b/malloc/tst-interpose-aux.c +@@ -0,0 +1,283 @@ ++/* Minimal malloc implementation for interposition tests. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#include "tst-interpose-aux.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#if INTERPOSE_THREADS ++#include ++#endif ++ ++/* Print the error message and terminate the process with status 1. */ ++__attribute__ ((noreturn)) ++__attribute__ ((format (printf, 1, 2))) ++static void * ++fail (const char *format, ...) ++{ ++ /* This assumes that vsnprintf will not call malloc. It does not do ++ so for the format strings we use. */ ++ char message[4096]; ++ va_list ap; ++ va_start (ap, format); ++ vsnprintf (message, sizeof (message), format, ap); ++ va_end (ap); ++ ++ enum { count = 3 }; ++ struct iovec iov[count]; ++ ++ iov[0].iov_base = (char *) "error: "; ++ iov[1].iov_base = (char *) message; ++ iov[2].iov_base = (char *) "\n"; ++ ++ for (int i = 0; i < count; ++i) ++ iov[i].iov_len = strlen (iov[i].iov_base); ++ ++ int unused __attribute__ ((unused)); ++ unused = writev (STDOUT_FILENO, iov, count); ++ _exit (1); ++} ++ ++#if INTERPOSE_THREADS ++static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; ++#endif ++ ++static void ++lock (void) ++{ ++#if INTERPOSE_THREADS ++ int ret = pthread_mutex_lock (&mutex); ++ if (ret != 0) ++ { ++ errno = ret; ++ fail ("pthread_mutex_lock: %m"); ++ } ++#endif ++} ++ ++static void ++unlock (void) ++{ ++#if INTERPOSE_THREADS ++ int ret = pthread_mutex_unlock (&mutex); ++ if (ret != 0) ++ { ++ errno = ret; ++ fail ("pthread_mutex_unlock: %m"); ++ } ++#endif ++} ++ ++struct __attribute__ ((aligned (__alignof__ (max_align_t)))) allocation_header ++{ ++ size_t allocation_index; ++ size_t allocation_size; ++}; ++ ++/* Array of known allocations, to track invalid frees. */ ++enum { max_allocations = 65536 }; ++static struct allocation_header *allocations[max_allocations]; ++static size_t allocation_index; ++static size_t deallocation_count; ++ ++/* Sanity check for successful malloc interposition. */ ++__attribute__ ((destructor)) ++static void ++check_for_allocations (void) ++{ ++ if (allocation_index == 0) ++ { ++ /* Make sure that malloc is called at least once from libc. */ ++ void *volatile ptr = strdup ("ptr"); ++ free (ptr); ++ /* Compiler barrier. The strdup function calls malloc, which ++ updates allocation_index, but strdup is marked __THROW, so ++ the compiler could optimize away the reload. */ ++ __asm__ volatile ("" ::: "memory"); ++ /* If the allocation count is still zero, it means we did not ++ interpose malloc successfully. */ ++ if (allocation_index == 0) ++ fail ("malloc does not seem to have been interposed"); ++ } ++} ++ ++static struct allocation_header *get_header (const char *op, void *ptr) ++{ ++ struct allocation_header *header = ((struct allocation_header *) ptr) - 1; ++ if (header->allocation_index >= allocation_index) ++ fail ("%s: %p: invalid allocation index: %zu (not less than %zu)", ++ op, ptr, header->allocation_index, allocation_index); ++ if (allocations[header->allocation_index] != header) ++ fail ("%s: %p: allocation pointer does not point to header, but %p", ++ op, ptr, allocations[header->allocation_index]); ++ return header; ++} ++ ++/* Internal helper functions. Those must be called while the lock is ++ acquired. */ ++ ++static void * ++malloc_internal (size_t size) ++{ ++ if (allocation_index == max_allocations) ++ { ++ errno = ENOMEM; ++ return NULL; ++ } ++ size_t allocation_size = size + sizeof (struct allocation_header); ++ if (allocation_size < size) ++ { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ size_t index = allocation_index++; ++ void *result = mmap (NULL, allocation_size, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); ++ if (result == MAP_FAILED) ++ return NULL; ++ allocations[index] = result; ++ *allocations[index] = (struct allocation_header) ++ { ++ .allocation_index = index, ++ .allocation_size = allocation_size ++ }; ++ return allocations[index] + 1; ++} ++ ++static void ++free_internal (const char *op, struct allocation_header *header) ++{ ++ size_t index = header->allocation_index; ++ int result = mprotect (header, header->allocation_size, PROT_NONE); ++ if (result != 0) ++ fail ("%s: mprotect (%p, %zu): %m", op, header, header->allocation_size); ++ /* Catch double-free issues. */ ++ allocations[index] = NULL; ++ ++deallocation_count; ++} ++ ++static void * ++realloc_internal (void *ptr, size_t new_size) ++{ ++ struct allocation_header *header = get_header ("realloc", ptr); ++ size_t old_size = header->allocation_size - sizeof (struct allocation_header); ++ if (old_size >= new_size) ++ return ptr; ++ ++ void *newptr = malloc_internal (new_size); ++ if (newptr == NULL) ++ return NULL; ++ memcpy (newptr, ptr, old_size); ++ free_internal ("realloc", header); ++ return newptr; ++} ++ ++/* Public interfaces. These functions must perform locking. */ ++ ++size_t ++malloc_allocation_count (void) ++{ ++ lock (); ++ size_t count = allocation_index; ++ unlock (); ++ return count; ++} ++ ++size_t ++malloc_deallocation_count (void) ++{ ++ lock (); ++ size_t count = deallocation_count; ++ unlock (); ++ return count; ++} ++ ++void * ++malloc (size_t size) ++{ ++ lock (); ++ void *result = malloc_internal (size); ++ unlock (); ++ return result; ++} ++ ++void ++free (void *ptr) ++{ ++ if (ptr == NULL) ++ return; ++ lock (); ++ struct allocation_header *header = get_header ("free", ptr); ++ free_internal ("free", header); ++ unlock (); ++} ++ ++void * ++calloc (size_t a, size_t b) ++{ ++ if (b > 0 && a > SIZE_MAX / b) ++ { ++ errno = ENOMEM; ++ return NULL; ++ } ++ lock (); ++ /* malloc_internal uses mmap, so the memory is zeroed. */ ++ void *result = malloc_internal (a * b); ++ unlock (); ++ return result; ++} ++ ++void * ++realloc (void *ptr, size_t n) ++{ ++ if (n ==0) ++ { ++ free (ptr); ++ return NULL; ++ } ++ else if (ptr == NULL) ++ return malloc (n); ++ else ++ { ++ lock (); ++ void *result = realloc_internal (ptr, n); ++ unlock (); ++ return result; ++ } ++} ++ ++/* The dyanmic linker still uses __libc_memalign because we have not ++ backported the fix for swbz#17730. It does not normally request ++ large alignments, so we can call malloc directly. */ ++void * ++__libc_memalign (size_t alignment, size_t size) ++{ ++ void *result = malloc (size); ++ if (((uintptr_t) result % alignment) != 0) ++ fail ("could not fulfill requested alignment %zu", alignment); ++ return result; ++} +Index: b/malloc/tst-interpose-aux.h +=================================================================== +--- /dev/null ++++ b/malloc/tst-interpose-aux.h +@@ -0,0 +1,30 @@ ++/* Statistics interface for the minimal malloc implementation. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#ifndef TST_INTERPOSE_AUX_H ++#define TST_INTERPOSE_AUX_H ++ ++#include ++ ++/* Return the number of allocations performed. */ ++size_t malloc_allocation_count (void); ++ ++/* Return the number of deallocations performed. */ ++size_t malloc_deallocation_count (void); ++ ++#endif /* TST_INTERPOSE_AUX_H */ +Index: b/malloc/tst-interpose-nothread.c +=================================================================== +--- /dev/null ++++ b/malloc/tst-interpose-nothread.c +@@ -0,0 +1,20 @@ ++/* Malloc interposition test, dynamically-linked version without threads. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#define INTERPOSE_THREADS 0 ++#include "tst-interpose-skeleton.c" +Index: b/malloc/tst-interpose-skeleton.c +=================================================================== +--- /dev/null ++++ b/malloc/tst-interpose-skeleton.c +@@ -0,0 +1,204 @@ ++/* Test driver for malloc interposition tests. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#include ++#include ++#include ++#include ++ ++#if INTERPOSE_THREADS ++#include ++#endif ++ ++static int do_test (void); ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" ++ ++/* Fills BUFFER with a test string. */ ++static void ++line_string (int number, char *buffer, size_t length) ++{ ++ for (size_t i = 0; i < length - 2; ++i) ++ buffer[i] = 'A' + ((number + i) % 26); ++ buffer[length - 2] = '\n'; ++ buffer[length - 1] = '\0'; ++} ++ ++/* Perform the tests. */ ++static void * ++run_tests (void *closure) ++{ ++ char *temp_file_path; ++ int fd = create_temp_file ("tst-malloc-interpose", &temp_file_path); ++ if (fd < 0) ++ _exit (1); ++ ++ /* Line lengths excluding the line terminator. */ ++ static const int line_lengths[] = { 0, 45, 80, 2, 8201, 0, 17, -1 }; ++ ++ /* Fill the test file with data. */ ++ { ++ FILE *fp = fdopen (fd, "w"); ++ for (int lineno = 0; line_lengths[lineno] >= 0; ++lineno) ++ { ++ char buffer[line_lengths[lineno] + 2]; ++ line_string (lineno, buffer, sizeof (buffer)); ++ fprintf (fp, "%s", buffer); ++ } ++ ++ if (ferror (fp)) ++ { ++ printf ("error: fprintf: %m\n"); ++ _exit (1); ++ } ++ if (fclose (fp) != 0) ++ { ++ printf ("error: fclose: %m\n"); ++ _exit (1); ++ } ++ } ++ ++ /* Read the test file. This tests libc-internal allocation with ++ realloc. */ ++ { ++ FILE *fp = fopen (temp_file_path, "r"); ++ ++ char *actual = NULL; ++ size_t actual_size = 0; ++ for (int lineno = 0; ; ++lineno) ++ { ++ errno = 0; ++ ssize_t result = getline (&actual, &actual_size, fp); ++ if (result == 0) ++ { ++ printf ("error: invalid return value 0 from getline\n"); ++ _exit (1); ++ } ++ if (result < 0 && errno != 0) ++ { ++ printf ("error: getline: %m\n"); ++ _exit (1); ++ } ++ if (result < 0 && line_lengths[lineno] >= 0) ++ { ++ printf ("error: unexpected end of file after line %d\n", lineno); ++ _exit (1); ++ } ++ if (result > 0 && line_lengths[lineno] < 0) ++ { ++ printf ("error: no end of file after line %d\n", lineno); ++ _exit (1); ++ } ++ if (result == -1 && line_lengths[lineno] == -1) ++ /* End of file reached as expected. */ ++ break; ++ ++ if (result != line_lengths[lineno] + 1) ++ { ++ printf ("error: line length mismatch: expected %d, got %zd\n", ++ line_lengths[lineno], result); ++ _exit (1); ++ } ++ ++ char expected[line_lengths[lineno] + 2]; ++ line_string (lineno, expected, sizeof (expected)); ++ if (strcmp (actual, expected) != 0) ++ { ++ printf ("error: line mismatch\n"); ++ printf ("error: expected: [[%s]]\n", expected); ++ printf ("error: actual: [[%s]]\n", actual); ++ _exit (1); ++ } ++ } ++ ++ if (fclose (fp) != 0) ++ { ++ printf ("error: fclose (after reading): %m\n"); ++ _exit (1); ++ } ++ } ++ ++ free (temp_file_path); ++ ++ /* Make sure that fork is working. */ ++ pid_t pid = fork (); ++ if (pid == -1) ++ { ++ printf ("error: fork: %m\n"); ++ _exit (1); ++ } ++ enum { exit_code = 55 }; ++ if (pid == 0) ++ _exit (exit_code); ++ int status; ++ int ret = waitpid (pid, &status, 0); ++ if (ret < 0) ++ { ++ printf ("error: waitpid: %m\n"); ++ _exit (1); ++ } ++ if (!WIFEXITED (status) || WEXITSTATUS (status) != exit_code) ++ { ++ printf ("error: unexpected exit status from child process: %d\n", ++ status); ++ _exit (1); ++ } ++ ++ return NULL; ++} ++ ++/* This is used to detect if malloc has not been successfully ++ interposed. The interposed malloc does not use brk/sbrk. */ ++static void *initial_brk; ++__attribute__ ((constructor)) ++static void ++set_initial_brk (void) ++{ ++ initial_brk = sbrk (0); ++} ++ ++/* Terminate the process if the break value has been changed. */ ++__attribute__ ((destructor)) ++static void ++check_brk (void) ++{ ++ void *current = sbrk (0); ++ if (current != initial_brk) ++ { ++ printf ("error: brk changed from %p to %p; no interposition?\n", ++ initial_brk, current); ++ _exit (1); ++ } ++} ++ ++static int ++do_test (void) ++{ ++ check_brk (); ++ ++#if INTERPOSE_THREADS ++ pthread_t thr = xpthread_create (NULL, run_tests, NULL); ++ xpthread_join (thr); ++#else ++ run_tests (NULL); ++#endif ++ ++ check_brk (); ++ ++ return 0; ++} +Index: b/malloc/tst-interpose-static-nothread.c +=================================================================== +--- /dev/null ++++ b/malloc/tst-interpose-static-nothread.c +@@ -0,0 +1,19 @@ ++/* Malloc interposition test, static version without threads. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#include "tst-interpose-nothread.c" +Index: b/malloc/tst-interpose-static-thread.c +=================================================================== +--- /dev/null ++++ b/malloc/tst-interpose-static-thread.c +@@ -0,0 +1,19 @@ ++/* Malloc interposition test, static version with threads. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#include "tst-interpose-nothread.c" +Index: b/malloc/tst-interpose-thread.c +=================================================================== +--- /dev/null ++++ b/malloc/tst-interpose-thread.c +@@ -0,0 +1,20 @@ ++/* Malloc interposition test, dynamically-linked version with threads. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#define INTERPOSE_THREADS 1 ++#include "tst-interpose-skeleton.c" +Index: b/nptl/sysdeps/unix/sysv/linux/fork.c +=================================================================== +--- a/nptl/sysdeps/unix/sysv/linux/fork.c ++++ b/nptl/sysdeps/unix/sysv/linux/fork.c +@@ -119,7 +119,7 @@ __libc_fork (void) + /* Acquire malloc locks. This needs to come last because fork + handlers may use malloc, and the libio list lock has an indirect + malloc dependency as well (via the getdelim function). */ +- __malloc_fork_lock_parent (); ++ call_function_static_weak (__malloc_fork_lock_parent); + + #ifndef NDEBUG + pid_t ppid = THREAD_GETMEM (THREAD_SELF, tid); +@@ -178,7 +178,7 @@ __libc_fork (void) + #endif + + /* Release malloc locks. */ +- __malloc_fork_unlock_child (); ++ call_function_static_weak (__malloc_fork_unlock_child); + + /* Reset the file list. These are recursive mutexes. */ + fresetlockfiles (); +@@ -222,7 +222,7 @@ __libc_fork (void) + THREAD_SETMEM (THREAD_SELF, pid, parentpid); + + /* Release malloc locks, parent process variant. */ +- __malloc_fork_unlock_parent (); ++ call_function_static_weak (__malloc_fork_unlock_parent); + + /* We execute this even if the 'fork' call failed. */ + _IO_list_unlock (); diff --git a/SOURCES/glibc-rh988869.patch b/SOURCES/glibc-rh988869.patch new file mode 100644 index 0000000..bb73346 --- /dev/null +++ b/SOURCES/glibc-rh988869.patch @@ -0,0 +1,22 @@ +commit 705a79f82560ff6472cebed86aa5db04cdea3bce +Author: Florian Weimer +Date: Wed Nov 30 14:59:27 2016 +0100 + + libio: Limit buffer size to 8192 bytes [BZ #4099] + + This avoids overly large buffers with network file systems which report + very large block sizes. + +Index: b/libio/filedoalloc.c +=================================================================== +--- a/libio/filedoalloc.c ++++ b/libio/filedoalloc.c +@@ -121,7 +121,7 @@ _IO_file_doallocate (fp) + fp->_flags |= _IO_LINE_BUF; + } + #if _IO_HAVE_ST_BLKSIZE +- if (st.st_blksize > 0) ++ if (st.st_blksize > 0 && st.st_blksize < _IO_BUFSIZ) + size = st.st_blksize; + #endif + } diff --git a/SOURCES/syscall-names.list b/SOURCES/syscall-names.list new file mode 100644 index 0000000..9a47755 --- /dev/null +++ b/SOURCES/syscall-names.list @@ -0,0 +1,594 @@ +# List of all known Linux system calls. +# Copyright (C) 2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# . + +# This file contains the list of system call names names. It has to +# remain in alphabetica order. Lines which start with # are treated +# as comments. This file can list all potential system calls. The +# names are only used if the installed kernel headers also provide +# them. + +# The list of system calls is current as of Linux 4.10. +kernel 4.10 + +FAST_atomic_update +FAST_cmpxchg +FAST_cmpxchg64 +_llseek +_newselect +_sysctl +accept +accept4 +access +acct +add_key +adjtimex +afs_syscall +alarm +arch_prctl +arm_fadvise64_64 +arm_sync_file_range +atomic_barrier +atomic_cmpxchg_32 +bdflush +bind +bpf +break +brk +cachectl +cacheflush +capget +capset +chdir +chmod +chown +chown32 +chroot +clock_adjtime +clock_getres +clock_gettime +clock_nanosleep +clock_settime +clone +clone2 +close +cmpxchg_badaddr +connect +copy_file_range +creat +create_module +delete_module +dipc +dup +dup2 +dup3 +epoll_create +epoll_create1 +epoll_ctl +epoll_ctl_old +epoll_pwait +epoll_wait +epoll_wait_old +eventfd +eventfd2 +exec_with_loader +execv +execve +execveat +exit +exit_group +faccessat +fadvise64 +fadvise64_64 +fallocate +fanotify_init +fanotify_mark +fchdir +fchmod +fchmodat +fchown +fchown32 +fchownat +fcntl +fcntl64 +fdatasync +fgetxattr +finit_module +flistxattr +flock +fork +fremovexattr +fsetxattr +fstat +fstat64 +fstatat64 +fstatfs +fstatfs64 +fsync +ftime +ftruncate +ftruncate64 +futex +futimesat +get_kernel_syms +get_mempolicy +get_robust_list +get_thread_area +getcpu +getcwd +getdents +getdents64 +getdomainname +getdtablesize +getegid +getegid32 +geteuid +geteuid32 +getgid +getgid32 +getgroups +getgroups32 +gethostname +getitimer +getpagesize +getpeername +getpgid +getpgrp +getpid +getpmsg +getppid +getpriority +getrandom +getresgid +getresgid32 +getresuid +getresuid32 +getrlimit +getrusage +getsid +getsockname +getsockopt +gettid +gettimeofday +getuid +getuid32 +getunwind +getxattr +getxgid +getxpid +getxuid +gtty +idle +init_module +inotify_add_watch +inotify_init +inotify_init1 +inotify_rm_watch +io_cancel +io_destroy +io_getevents +io_setup +io_submit +ioctl +ioperm +iopl +ioprio_get +ioprio_set +ipc +kcmp +kern_features +kexec_file_load +kexec_load +keyctl +kill +lchown +lchown32 +lgetxattr +link +linkat +listen +listxattr +llistxattr +llseek +lock +lookup_dcookie +lremovexattr +lseek +lsetxattr +lstat +lstat64 +madvise +mbind +membarrier +memfd_create +memory_ordering +migrate_pages +mincore +mkdir +mkdirat +mknod +mknodat +mlock +mlock2 +mlockall +mmap +mmap2 +modify_ldt +mount +move_pages +mprotect +mpx +mq_getsetattr +mq_notify +mq_open +mq_timedreceive +mq_timedsend +mq_unlink +mremap +msgctl +msgget +msgrcv +msgsnd +msync +multiplexer +munlock +munlockall +munmap +name_to_handle_at +nanosleep +newfstatat +nfsservctl +ni_syscall +nice +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open +open_by_handle_at +openat +osf_adjtime +osf_afs_syscall +osf_alt_plock +osf_alt_setsid +osf_alt_sigpending +osf_asynch_daemon +osf_audcntl +osf_audgen +osf_chflags +osf_execve +osf_exportfs +osf_fchflags +osf_fdatasync +osf_fpathconf +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_fuser +osf_getaddressconf +osf_getdirentries +osf_getdomainname +osf_getfh +osf_getfsstat +osf_gethostid +osf_getitimer +osf_getlogin +osf_getmnt +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_kloadcall +osf_kmodcall +osf_lstat +osf_memcntl +osf_mincore +osf_mount +osf_mremap +osf_msfs_syscall +osf_msleep +osf_mvalid +osf_mwakeup +osf_naccept +osf_nfssvc +osf_ngetpeername +osf_ngetsockname +osf_nrecvfrom +osf_nrecvmsg +osf_nsendmsg +osf_ntp_adjtime +osf_ntp_gettime +osf_old_creat +osf_old_fstat +osf_old_getpgrp +osf_old_killpg +osf_old_lstat +osf_old_open +osf_old_sigaction +osf_old_sigblock +osf_old_sigreturn +osf_old_sigsetmask +osf_old_sigvec +osf_old_stat +osf_old_vadvise +osf_old_vtrace +osf_old_wait +osf_oldquota +osf_pathconf +osf_pid_block +osf_pid_unblock +osf_plock +osf_priocntlset +osf_profil +osf_proplist_syscall +osf_reboot +osf_revoke +osf_sbrk +osf_security +osf_select +osf_set_program_attributes +osf_set_speculative +osf_sethostid +osf_setitimer +osf_setlogin +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_signal +osf_sigprocmask +osf_sigsendset +osf_sigstack +osf_sigwaitprim +osf_sstk +osf_stat +osf_statfs +osf_statfs64 +osf_subsys_info +osf_swapctl +osf_swapon +osf_syscall +osf_sysinfo +osf_table +osf_uadmin +osf_usleep_thread +osf_uswitch +osf_utc_adjtime +osf_utc_gettime +osf_utimes +osf_utsname +osf_wait4 +osf_waitid +pause +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open +perfctr +perfmonctl +personality +pipe +pipe2 +pivot_root +pkey_alloc +pkey_free +pkey_mprotect +poll +ppoll +prctl +pread64 +preadv +preadv2 +prlimit64 +process_vm_readv +process_vm_writev +prof +profil +pselect6 +ptrace +putpmsg +pwrite64 +pwritev +pwritev2 +query_module +quotactl +read +readahead +readdir +readlink +readlinkat +readv +reboot +recv +recvfrom +recvmmsg +recvmsg +remap_file_pages +removexattr +rename +renameat +renameat2 +request_key +restart_syscall +rmdir +rt_sigaction +rt_sigpending +rt_sigprocmask +rt_sigqueueinfo +rt_sigreturn +rt_sigsuspend +rt_sigtimedwait +rt_tgsigqueueinfo +rtas +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +sched_get_affinity +sched_get_priority_max +sched_get_priority_min +sched_getaffinity +sched_getattr +sched_getparam +sched_getscheduler +sched_rr_get_interval +sched_set_affinity +sched_setaffinity +sched_setattr +sched_setparam +sched_setscheduler +sched_yield +seccomp +security +select +semctl +semget +semop +semtimedop +send +sendfile +sendfile64 +sendmmsg +sendmsg +sendto +set_mempolicy +set_robust_list +set_thread_area +set_tid_address +setdomainname +setfsgid +setfsgid32 +setfsuid +setfsuid32 +setgid +setgid32 +setgroups +setgroups32 +sethae +sethostname +setitimer +setns +setpgid +setpgrp +setpriority +setregid +setregid32 +setresgid +setresgid32 +setresuid +setresuid32 +setreuid +setreuid32 +setrlimit +setsid +setsockopt +settimeofday +setuid +setuid32 +setxattr +sgetmask +shmat +shmctl +shmdt +shmget +shutdown +sigaction +sigaltstack +signal +signalfd +signalfd4 +sigpending +sigprocmask +sigreturn +sigsuspend +socket +socketcall +socketpair +splice +spu_create +spu_run +ssetmask +stat +stat64 +statfs +statfs64 +stime +stty +subpage_prot +swapcontext +swapoff +swapon +switch_endian +symlink +symlinkat +sync +sync_file_range +sync_file_range2 +syncfs +sys_debug_setcontext +sys_epoll_create +sys_epoll_ctl +sys_epoll_wait +syscall +sysfs +sysinfo +syslog +sysmips +tee +tgkill +time +timer_create +timer_delete +timer_getoverrun +timer_gettime +timer_settime +timerfd +timerfd_create +timerfd_gettime +timerfd_settime +times +tkill +truncate +truncate64 +tuxcall +ugetrlimit +ulimit +umask +umount +umount2 +uname +unlink +unlinkat +unshare +uselib +userfaultfd +ustat +utime +utimensat +utimes +utrap_install +vfork +vhangup +vm86 +vm86old +vmsplice +vserver +wait4 +waitid +waitpid +write +writev diff --git a/SPECS/glibc.spec b/SPECS/glibc.spec index 5f0b1a4..1d01f86 100644 --- a/SPECS/glibc.spec +++ b/SPECS/glibc.spec @@ -1,6 +1,6 @@ %define glibcsrcdir glibc-2.17-c758a686 %define glibcversion 2.17 -%define glibcrelease 157%{?dist}.5 +%define glibcrelease 196%{?dist} ############################################################################## # We support the following options: # --with/--without, @@ -75,7 +75,6 @@ # - POWER8 LE (default) # 32-bit BE: # - POWER7 (default) -# - POWER8 (enabled via AT_PLATFORM) # # The POWER5 and POWER6 runtimes are now deprecated and no longer provided # or supported. This means that RHEL7 BE will only run on POWER7 or newer @@ -117,6 +116,13 @@ # execution is provided by STT_GNU_IFUNC. %define multiarcharches ppc %{power64} %{ix86} x86_64 %{sparc} s390 s390x ############################################################################## +# If the architecture has elision support in glibc then it should be listed +# here to enable elision for default pthread mutexes and rwlocks. The elision +# is not enabled automatically and each process has to opt-in to elision via +# the environment variable RHEL_GLIBC_TUNABLES by setting it to enabled e.g. +# RHEL_GLIBC_TUNABLES="glibc.elision.enable=1". +%define elisionarches x86_64 %{power64} +############################################################################## # Add -s for a less verbose build output. %define silentrules PARALLELMFLAGS= ############################################################################## @@ -925,11 +931,173 @@ Patch1755: glibc-rh1298526-4.patch # RHBZ #1350733 locale-archive.tmpl cannot be processed by build-locale-archive Patch1756: glibc-rh1350733-1.patch -# RHBZ #:1463274 Rounding issues on POWER -Patch1857: glibc-rh1463274-1.patch -Patch1858: glibc-rh1463274-2.patch -Patch1859: glibc-rh1463274-3.patch -Patch1860: glibc-rh1463274-4.patch +# Fix tst-cancel17/tst-cancelx17, which sometimes segfaults while exiting. +Patch1757: glibc-rh1337242.patch + +# RHBZ #1418978: backport upstream support/ directory +Patch1758: glibc-rh1418978-0.patch +Patch1759: glibc-rh1418978-1.patch +Patch1760: glibc-rh1418978-2-1.patch +Patch1761: glibc-rh1418978-2-2.patch +Patch1762: glibc-rh1418978-2-3.patch +Patch1763: glibc-rh1418978-2-4.patch +Patch1764: glibc-rh1418978-2-5.patch +Patch1765: glibc-rh1418978-2-6.patch +Patch1766: glibc-rh1418978-3-1.patch +Patch1767: glibc-rh1418978-3-2.patch + +# RHBZ #906468: Fix deadlock between fork, malloc, flush (NULL) +Patch1768: glibc-rh906468-1.patch +Patch1769: glibc-rh906468-2.patch + +# RHBZ #988869: stdio buffer auto-tuning should reject large buffer sizes +Patch1770: glibc-rh988869.patch + +# RHBZ #1398244 - RHEL7.3 - glibc: Fix TOC stub on powerpc64 clone() +Patch1771: glibc-rh1398244.patch + +# RHBZ #1228114: Fix sunrpc UDP client timeout handling +Patch1772: glibc-rh1228114-1.patch +Patch1773: glibc-rh1228114-2.patch + +# RHBZ #1298975 - [RFE] Backport the groups merging feature +Patch1774: glibc-rh1298975.patch + +# RHBZ #1318877 - Per C11 and C++11, should not look at +# __STDC_LIMIT_MACROS or __STDC_CONSTANT_MACROS +Patch1775: glibc-rh1318877.patch + +# RHBZ #1417205: Add AF_VSOCK/PF_VSOCK, TCP_TIMESTAMP +Patch1776: glibc-rh1417205.patch + +# RHBZ #1338672: GCC 6 enablement for struct sockaddr_storage +Patch1777: glibc-rh1338672.patch + +# RHBZ #1325138 - glibc: Corrupted aux-cache causes ldconfig to segfault +Patch1778: glibc-rh1325138.patch + +# RHBZ #1374652: Unbounded stack allocation in nan* functions +Patch1779: glibc-rh1374652.patch + +# RHBZ #1374654: Unbounded stack allocation in nan* functions +Patch1780: glibc-rh1374654.patch + +# RHBZ #1322544: Segmentation violation can occur within glibc if fork() +# is used in a multi-threaded application +Patch1781: glibc-rh1322544.patch + +# RHBZ #1418997: does not build with binutils 2.27 due to misuse of the cmpli instruction on ppc64 +Patch1782: glibc-rh1418997.patch + +# RHBZ #1383951: LD_POINTER_GUARD in the environment is not sanitized +Patch1783: glibc-rh1383951.patch + +# RHBZ #1385004: [7.4 FEAT] POWER8 IFUNC update from upstream +Patch1784: glibc-rh1385004-1.patch +Patch1785: glibc-rh1385004-2.patch +Patch1786: glibc-rh1385004-3.patch +Patch1787: glibc-rh1385004-4.patch +Patch1788: glibc-rh1385004-5.patch +Patch1789: glibc-rh1385004-6.patch +Patch1790: glibc-rh1385004-7.patch +Patch1791: glibc-rh1385004-8.patch +Patch1792: glibc-rh1385004-9.patch +Patch1793: glibc-rh1385004-10.patch +Patch1794: glibc-rh1385004-11.patch +Patch1795: glibc-rh1385004-12.patch +Patch1796: glibc-rh1385004-13.patch +Patch1797: glibc-rh1385004-14.patch +Patch1798: glibc-rh1385004-15.patch +Patch1799: glibc-rh1385004-16.patch +Patch1800: glibc-rh1385004-17.patch +Patch1801: glibc-rh1385004-18.patch +Patch1802: glibc-rh1385004-19.patch +Patch1803: glibc-rh1385004-20.patch +Patch1804: glibc-rh1385004-21.patch +Patch1805: glibc-rh1385004-22.patch +Patch1806: glibc-rh1385004-23.patch +Patch1807: glibc-rh1385004-24.patch + +# RHBZ 1380680 - [7.4 FEAT] z13 exploitation in glibc - stage 2 +Patch1808: glibc-rh1380680-1.patch +Patch1809: glibc-rh1380680-2.patch +Patch1810: glibc-rh1380680-3.patch +Patch1811: glibc-rh1380680-4.patch +Patch1812: glibc-rh1380680-5.patch +Patch1813: glibc-rh1380680-6.patch +Patch1814: glibc-rh1380680-7.patch +Patch1815: glibc-rh1380680-8.patch +Patch1816: glibc-rh1380680-9.patch +Patch1817: glibc-rh1380680-10.patch +Patch1818: glibc-rh1380680-11.patch +Patch1819: glibc-rh1380680-12.patch +Patch1820: glibc-rh1380680-13.patch +Patch1821: glibc-rh1380680-14.patch +Patch1822: glibc-rh1380680-15.patch +Patch1823: glibc-rh1380680-16.patch +Patch1824: glibc-rh1380680-17.patch + +# RHBZ #1326739: malloc: additional unlink hardening for non-small bins +Patch1825: glibc-rh1326739.patch + +# RHBZ #1374657: CVE-2015-8778: Integer overflow in hcreate and hcreate_r +Patch1826: glibc-rh1374657.patch + +# RHBZ #1374658 - CVE-2015-8776: Segmentation fault caused by passing +# out-of-range data to strftime() +Patch1827: glibc-rh1374658.patch + +# RHBZ #1385003 - SIZE_MAX evaluates to an expression of the wrong type +# on s390 +Patch1828: glibc-rh1385003.patch + +# RHBZ #1387874 - MSG_FASTOPEN definition missing +Patch1829: glibc-rh1387874.patch + +# RHBZ #1409611 - poor performance with exp() +Patch1830: glibc-rh1409611.patch + +# RHBZ #1421155 - Update dynamic loader trampoline for Intel SSE, AVX, and AVX512 usage. +Patch1831: glibc-rh1421155.patch + +# RHBZ #841653 - [Intel 7.0 FEAT] [RFE] TSX-baed lock elision enabled in glibc. +Patch1832: glibc-rh841653-0.patch +Patch1833: glibc-rh841653-1.patch +Patch1834: glibc-rh841653-2.patch +Patch1835: glibc-rh841653-3.patch +Patch1836: glibc-rh841653-4.patch +Patch1837: glibc-rh841653-5.patch +Patch1838: glibc-rh841653-6.patch +Patch1839: glibc-rh841653-7.patch +Patch1840: glibc-rh841653-8.patch +Patch1841: glibc-rh841653-9.patch +Patch1842: glibc-rh841653-10.patch +Patch1843: glibc-rh841653-11.patch +Patch1844: glibc-rh841653-12.patch +Patch1845: glibc-rh841653-13.patch +Patch1846: glibc-rh841653-14.patch +Patch1847: glibc-rh841653-15.patch +Patch1848: glibc-rh841653-16.patch +Patch1849: glibc-rh841653-17.patch + +# RHBZ #731835 - [RFE] [7.4 FEAT] Hardware Transactional Memory in GLIBC +Patch1850: glibc-rh731835-0.patch +Patch1851: glibc-rh731835-1.patch +Patch1852: glibc-rh731835-2.patch + +# RHBZ #1413638: Inhibit FMA while compiling sqrt, pow +Patch1853: glibc-rh1413638-1.patch +Patch1854: glibc-rh1413638-2.patch + +# RHBZ #1439165: Use a built-in list of known syscalls for +Patch1855: glibc-rh1439165.patch +Patch1856: syscall-names.list + +# RHBZ #1457177: Rounding issues on POWER +Patch1857: glibc-rh1457177-1.patch +Patch1858: glibc-rh1457177-2.patch +Patch1859: glibc-rh1457177-3.patch +Patch1860: glibc-rh1457177-4.patch ############################################################################## # @@ -1011,13 +1179,32 @@ Patch2075: glibc-rh1318890.patch Patch2076: glibc-rh1213603.patch Patch2077: glibc-rh1370630.patch +# Add internal-only support for O_TMPFILE. +Patch2078: glibc-rh1330705-1.patch +Patch2079: glibc-rh1330705-2.patch +Patch2080: glibc-rh1330705-3.patch +Patch2081: glibc-rh1330705-4.patch +Patch2082: glibc-rh1330705-5.patch +# The following patch *removes* the public definition of O_TMPFILE. +Patch2083: glibc-rh1330705-6.patch + # getaddrinfo with nscd fixes -Patch2078: glibc-rh1436312.patch +Patch2084: glibc-rh1324568.patch + +# RHBZ #1404435 - Remove power8 platform directory +Patch2085: glibc-rh1404435.patch + +# RHBZ #1144516 - aarch64 profil fix +Patch2086: glibc-rh1144516.patch + +# RHBZ #1392540 - Add "sss" service to the automount database in nsswitch.conf +Patch2087: glibc-rh1392540.patch -Patch2079: glibc-rh1452720-1.patch -Patch2080: glibc-rh1452720-2.patch -Patch2081: glibc-rh1452720-3.patch -Patch2082: glibc-rh1452720-4.patch +# RHBZ #1452721: Avoid large allocas in the dynamic linker +Patch2088: glibc-rh1452721-1.patch +Patch2089: glibc-rh1452721-2.patch +Patch2090: glibc-rh1452721-3.patch +Patch2091: glibc-rh1452721-4.patch ############################################################################## # End of glibc patches. @@ -1636,6 +1823,15 @@ package or when debugging this package. %patch2080 -p1 %patch2081 -p1 %patch2082 -p1 +%patch2083 -p1 +%patch2084 -p1 +%patch2085 -p1 +%patch2086 -p1 +%patch2087 -p1 +%patch2088 -p1 +%patch2089 -p1 +%patch2090 -p1 +%patch2091 -p1 # Rebase of microbenchmarks. %patch1607 -p1 @@ -1824,6 +2020,113 @@ package or when debugging this package. %patch1754 -p1 %patch1755 -p1 %patch1756 -p1 +%patch1757 -p1 +%patch1758 -p1 +%patch1759 -p1 +%patch1760 -p1 +%patch1761 -p1 +%patch1762 -p1 +%patch1763 -p1 +%patch1764 -p1 +%patch1765 -p1 +%patch1766 -p1 +%patch1767 -p1 +%patch1768 -p1 +%patch1769 -p1 +%patch1770 -p1 +%patch1771 -p1 +%patch1772 -p1 +%patch1773 -p1 +%patch1774 -p1 +%patch1775 -p1 +%patch1776 -p1 +%patch1777 -p1 +%patch1778 -p1 +%patch1779 -p1 +%patch1780 -p1 +%patch1781 -p1 +%patch1782 -p1 +%patch1783 -p1 +%patch1784 -p1 +%patch1785 -p1 +%patch1786 -p1 +%patch1787 -p1 +%patch1788 -p1 +%patch1789 -p1 +%patch1790 -p1 +%patch1791 -p1 +%patch1792 -p1 +%patch1793 -p1 +%patch1794 -p1 +%patch1795 -p1 +%patch1796 -p1 +%patch1797 -p1 +%patch1798 -p1 +%patch1799 -p1 +%patch1800 -p1 +%patch1801 -p1 +%patch1802 -p1 +%patch1803 -p1 +%patch1804 -p1 +%patch1805 -p1 +%patch1806 -p1 +%patch1807 -p1 +%patch1808 -p1 +%patch1809 -p1 +%patch1810 -p1 +%patch1811 -p1 +%patch1812 -p1 +%patch1813 -p1 +%patch1814 -p1 +%patch1815 -p1 +%patch1816 -p1 +%patch1817 -p1 +%patch1818 -p1 +%patch1819 -p1 +%patch1820 -p1 +%patch1821 -p1 +%patch1822 -p1 +%patch1823 -p1 +%patch1824 -p1 +%patch1825 -p1 +%patch1826 -p1 +%patch1827 -p1 +%patch1828 -p1 +%patch1829 -p1 +%patch1830 -p1 +%patch1831 -p1 +# RHBZ #841653 - Intel lock elision patch set. +%patch1832 -p1 +%patch1833 -p1 +%patch1834 -p1 +%patch1835 -p1 +%patch1836 -p1 +%patch1837 -p1 +%patch1838 -p1 +%patch1839 -p1 +%patch1840 -p1 +%patch1841 -p1 +%patch1842 -p1 +%patch1843 -p1 +%patch1844 -p1 +%patch1845 -p1 +%patch1846 -p1 +%patch1847 -p1 +%patch1848 -p1 +%patch1849 -p1 +# End of Intel lock elision patch set. +# RHBZ #731835 - IBM POWER lock elision patch set. +%patch1850 -p1 +%patch1851 -p1 +%patch1852 -p1 +# End of IBM POWER lock elision patch set. + +%patch1853 -p1 +%patch1854 -p1 + +# Built-in list of syscall names. +%patch1855 -p1 +cp %{_sourcedir}/syscall-names.list sysdeps/unix/sysv/linux/ %patch1857 -p1 %patch1858 -p1 @@ -1986,6 +2289,9 @@ configure_CFLAGS="$build_CFLAGS -fno-asynchronous-unwind-tables" %ifarch %{multiarcharches} --enable-multi-arch \ %endif +%ifarch %{elisionarches} + --enable-lock-elision=yes \ +%endif --enable-obsolete-rpc \ --enable-systemtap \ ${core_with_options} \ @@ -2976,20 +3282,134 @@ rm -f *.filelist* %endif %changelog -* Tue Jun 20 2017 Florian Weimer - 2.17-157.5 -- Rounding issues on POWER (#1463274) +* Fri Jun 16 2017 Florian Weimer - 2.17-196 +- Avoid large allocas in the dynamic linker (#1452721) + +* Fri Jun 9 2017 Florian Weimer - 2.17-195 +- Rounding issues on POWER (#1457177) + +* Wed Apr 26 2017 Florian Weimer - 2.17-194 +- Use a built-in list of system call names (#1439165) + +* Tue Apr 18 2017 Florian Weimer - 2.17-193 +- Inhibit FMA while compiling sqrt, pow (#1413638) + +* Wed Mar 29 2017 Carlos O'Donell - 2.17-192 +- Exclude lock elision support for older Intel hardware with + Intel TSX that has hardware errata (#841653). + +* Tue Mar 28 2017 Carlos O'Donell - 2.17-191 +- Add transparent lock elision for default POSIX mutexes on + IBM POWER hardware with support for IBM POWER HTM (#731835). + +* Tue Mar 28 2017 Carlos O'Donell - 2.17-190 +- Add transparent lock elision for default POSIX mutexes on + Intel hardware with support for Intel TSX (#841653). +- Update dynamic loader trampoline for Intel Skylake server (#1421155). + +* Wed Mar 15 2017 Carlos O'Donell - 2.17-189 +- Update dynamic loader trampoline for Intel SSE, AVX, and AVX512 usage (#1421155) + +* Wed Mar 15 2017 Carlos O'Donell - 2.17-188 +- Improve exp() and pow() performance in libm (#1409611) +- Add optimized strcmp and strncmp for IBM POWER9 hardware (#1320947) + +* Tue Mar 14 2017 Patsy Franklin - 2.17-187 +- Define MSG_FASTOPEN. (#1387874) + +* Tue Mar 14 2017 Patsy Franklin - 2.17-186 +- Update patch for glibc-rh1288613.patch to include tst-res_hconf_reorder + in the list of tests to be built and run. (#1367804) + +* Tue Mar 14 2017 Florian Weimer - 2.17-185 +- math: Regenerate ULPs for POWER (#1385004) + +* Thu Mar 9 2017 Martin Sebor - 2.17-184 +- Correct s390 definition of SIZE_MAX (#1385003) + +* Thu Mar 9 2017 Martin Sebor - 2.17-183 +- Fix CVE-2015-8776 glibc: Segmentation fault caused by passing + out-of-range data to strftime() (#1374658) + +* Thu Mar 9 2017 Martin Sebor - 2.17-182 +- Fix CVE-2015-8778: Integer overflow in hcreate and hcreate_r (#1374657) + +* Wed Mar 8 2017 DJ Delorie - 2.17-181 +- Fix rare case where calloc may not zero memory properly (#1430477) + +* Wed Mar 8 2017 Florian Weimer - 2.17-180 +- malloc: additional unlink hardening for non-small bins (#1326739) + +* Wed Mar 8 2017 Martin Sebor - 2.17-179 +- Add improvements and optimizations to take advantage of the new + z13 processor design (#1380680) + +* Wed Mar 8 2017 Martin Sebor - 2.17-178 +- Backport the latest POWER8 performance optimizations (#1385004) + +* Tue Mar 7 2017 DJ Delorie - 2.17-177 +- LD_POINTER_GUARD in the environment is not sanitized (#1383951) + +* Tue Mar 7 2017 DJ Delorie - 2.17-176 +- Fix cmpli usage in power6 memset. (#1418997) + +* Mon Mar 6 2017 Martin Sebor - 2.17-175 +- Avoid accessing user-controlled stdio locks in forked child (#1322544) + +* Mon Mar 6 2017 DJ Delorie - 2.17-174 +- Fix unbounded stack allocation in catopen function (#1374654) + +* Mon Mar 6 2017 DJ Delorie - 2.17-173 +- Fix unbounded stack allocation in nan* functions (#1374652) + +* Fri Mar 3 2017 Martin Sebor - 2.17-172 +- Handle /var/cache/ldconfig/aux-cache corruption (#1325138) + +* Wed Mar 1 2017 DJ Delorie - 2.17-171 +- Make padding in struct sockaddr_storage explicit (#1338672) + +* Wed Mar 1 2017 Florian Weimer - 2.17-170 +- Add AF_VSOCK/PF_VSOCK, TCP_TIMESTAMP (#1417205) + +* Tue Feb 28 2017 Martin Sebor - 2.17-169 +- Define and macros unconditionally (#1318877) + +* Tue Feb 28 2017 Martin Sebor - 2.17-168 +- Backport the groups merging feature (#1298975) + +* Tue Feb 28 2017 Florian Weimer - 2.17-167 +- Fix sunrpc UDP client timeout handling (#1228114) + +* Tue Feb 28 2017 Florian Weimer - 2.17-166 +- Add "sss" service to the automount database in nsswitch.conf (#1392540) + +* Mon Feb 27 2017 Florian Weimer - 2.17-165 +- Fix use of uninitialized data in getaddrinfo with nscd (#1324568) +- Remove the "power8" AT_PLATFORM directory (#1404435) +- Fix profil on aarch64 (#1144516) + +* Tue Feb 21 2017 Martin Sebor - 2.17-164 +- Fix TOC stub on powerpc64 clone() (#1398244) + +* Wed Feb 15 2017 Florian Weimer - 2.17-163 +- stdio buffer auto-tuning should reject large buffer sizes (#988869) + +* Tue Feb 14 2017 Florian Weimer - 2.17-162 +- Backport support/ subdirectory from upstream (#1418978) +- Fix deadlock between fork, malloc, flush (NULL) (#906468) -* Fri May 26 2017 Florian Weimer - 2.17-157.4 -- Avoid large allocas in the dynamic linker (#1452720) +* Fri Jan 27 2017 Patsy Franklin - 2.17-161 +- Fix tst-cancel17/tst-cancelx17 was sometimes segfaulting. + Wait for the read to finish before returning. (#1337242) -* Tue Mar 28 2017 DJ Delorie - 2.17-157.2 -- Fix use of uninitialized data in getaddrinfo with nscd (#1436312) +* Wed Jan 25 2017 Florian Weimer - 2.17-160 +- Add internal-only support for O_TMPFILE (#1330705) -* Thu Oct 27 2016 Carlos O'Donell - 2.17-157.1 -- Do not set initgroups in default nsswitch.conf (#1388638) -- nss_db: Request larger buffers for long group entries (#1388637) -- nss_db: Fix get*ent crash without preceding set*ent (#1388635) -- nss_db: Fix endless loop in services database processing (#1388639) +* Thu Oct 20 2016 Carlos O'Donell - 2.17-158 +- Do not set initgroups in default nsswitch.conf (#1366569) +- nss_db: Request larger buffers for long group entries (#1318890) +- nss_db: Fix get*ent crash without preceding set*ent (#1213603) +- nss_db: Fix endless loop in services database processing (#1370630) * Thu Aug 11 2016 Florian Weimer - 2.17-157 - Rebuild with updated binutils (#1268008)