diff --git a/SOURCES/glibc-rh1821531-1.patch b/SOURCES/glibc-rh1821531-1.patch new file mode 100644 index 0000000..c09ace9 --- /dev/null +++ b/SOURCES/glibc-rh1821531-1.patch @@ -0,0 +1,136 @@ +commit 284f42bc778e487dfd5dff5c01959f93b9e0c4f5 +Author: Wilco Dijkstra +Date: Fri Aug 3 17:24:12 2018 +0100 + + Simplify and speedup strstr/strcasestr first match + + Looking at the benchtests, both strstr and strcasestr spend a lot of time + in a slow initialization loop handling one character per iteration. + This can be simplified and use the much faster strlen/strnlen/strchr/memcmp. + Read ahead a few cachelines to reduce the number of strnlen calls, which + improves performance by ~3-4%. This patch improves the time taken for the + full strstr benchtest by >40%. + + * string/strcasestr.c (STRCASESTR): Simplify and speedup first match. + * string/strstr.c (AVAILABLE): Likewise. + +diff --git a/string/strcasestr.c b/string/strcasestr.c +index 421764bd1b0ff22e..8aa76037dcc052f3 100644 +--- a/string/strcasestr.c ++++ b/string/strcasestr.c +@@ -59,31 +59,22 @@ + case-insensitive comparison. This function gives unspecified + results in multibyte locales. */ + char * +-STRCASESTR (const char *haystack_start, const char *needle_start) ++STRCASESTR (const char *haystack, const char *needle) + { +- const char *haystack = haystack_start; +- const char *needle = needle_start; + size_t needle_len; /* Length of NEEDLE. */ + size_t haystack_len; /* Known minimum length of HAYSTACK. */ +- bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */ +- +- /* Determine length of NEEDLE, and in the process, make sure +- HAYSTACK is at least as long (no point processing all of a long +- NEEDLE if HAYSTACK is too short). */ +- while (*haystack && *needle) +- { +- ok &= (TOLOWER ((unsigned char) *haystack) +- == TOLOWER ((unsigned char) *needle)); +- haystack++; +- needle++; +- } +- if (*needle) ++ ++ /* Handle empty NEEDLE special case. */ ++ if (needle[0] == '\0') ++ return (char *) haystack; ++ ++ /* Ensure HAYSTACK length is at least as long as NEEDLE length. ++ Since a match may occur early on in a huge HAYSTACK, use strnlen ++ and read ahead a few cachelines for improved performance. */ ++ needle_len = strlen (needle); ++ haystack_len = __strnlen (haystack, needle_len + 256); ++ if (haystack_len < needle_len) + return NULL; +- if (ok) +- return (char *) haystack_start; +- needle_len = needle - needle_start; +- haystack = haystack_start + 1; +- haystack_len = needle_len - 1; + + /* Perform the search. Abstract memory is considered to be an array + of 'unsigned char' values, not an array of 'char' values. See +@@ -91,10 +82,10 @@ STRCASESTR (const char *haystack_start, const char *needle_start) + if (needle_len < LONG_NEEDLE_THRESHOLD) + return two_way_short_needle ((const unsigned char *) haystack, + haystack_len, +- (const unsigned char *) needle_start, ++ (const unsigned char *) needle, + needle_len); + return two_way_long_needle ((const unsigned char *) haystack, haystack_len, +- (const unsigned char *) needle_start, ++ (const unsigned char *) needle, + needle_len); + } + +diff --git a/string/strstr.c b/string/strstr.c +index 79ebcc75329d0b17..f74d7189ed1319f6 100644 +--- a/string/strstr.c ++++ b/string/strstr.c +@@ -51,33 +51,32 @@ + if NEEDLE is empty, otherwise NULL if NEEDLE is not found in + HAYSTACK. */ + char * +-STRSTR (const char *haystack_start, const char *needle_start) ++STRSTR (const char *haystack, const char *needle) + { +- const char *haystack = haystack_start; +- const char *needle = needle_start; + size_t needle_len; /* Length of NEEDLE. */ + size_t haystack_len; /* Known minimum length of HAYSTACK. */ +- bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */ +- +- /* Determine length of NEEDLE, and in the process, make sure +- HAYSTACK is at least as long (no point processing all of a long +- NEEDLE if HAYSTACK is too short). */ +- while (*haystack && *needle) +- ok &= *haystack++ == *needle++; +- if (*needle) ++ ++ /* Handle empty NEEDLE special case. */ ++ if (needle[0] == '\0') ++ return (char *) haystack; ++ ++ /* Skip until we find the first matching char from NEEDLE. */ ++ haystack = strchr (haystack, needle[0]); ++ if (haystack == NULL || needle[1] == '\0') ++ return (char *) haystack; ++ ++ /* Ensure HAYSTACK length is at least as long as NEEDLE length. ++ Since a match may occur early on in a huge HAYSTACK, use strnlen ++ and read ahead a few cachelines for improved performance. */ ++ needle_len = strlen (needle); ++ haystack_len = __strnlen (haystack, needle_len + 256); ++ if (haystack_len < needle_len) + return NULL; +- if (ok) +- return (char *) haystack_start; +- +- /* Reduce the size of haystack using strchr, since it has a smaller +- linear coefficient than the Two-Way algorithm. */ +- needle_len = needle - needle_start; +- haystack = strchr (haystack_start + 1, *needle_start); +- if (!haystack || __builtin_expect (needle_len == 1, 0)) ++ ++ /* Check whether we have a match. This improves performance since we avoid ++ the initialization overhead of the two-way algorithm. */ ++ if (memcmp (haystack, needle, needle_len) == 0) + return (char *) haystack; +- needle -= needle_len; +- haystack_len = (haystack > haystack_start + needle_len ? 1 +- : needle_len + haystack_start - haystack); + + /* Perform the search. Abstract memory is considered to be an array + of 'unsigned char' values, not an array of 'char' values. See diff --git a/SOURCES/glibc-rh1821531-2.patch b/SOURCES/glibc-rh1821531-2.patch new file mode 100644 index 0000000..f97e4a5 --- /dev/null +++ b/SOURCES/glibc-rh1821531-2.patch @@ -0,0 +1,260 @@ +commit 5e0a7ecb6629461b28adc1a5aabcc0ede122f201 +Author: Wilco Dijkstra +Date: Wed Jun 12 11:38:52 2019 +0100 + + Improve performance of strstr + + This patch significantly improves performance of strstr using a novel + modified Horspool algorithm. Needles up to size 256 use a bad-character + table indexed by hashed pairs of characters to quickly skip past mismatches. + Long needles use a self-adapting filtering step to avoid comparing the whole + needle repeatedly. + + By limiting the needle length to 256, the shift table only requires 8 bits + per entry, lowering preprocessing overhead and minimizing cache effects. + This limit also implies worst-case performance is linear. + + Small needles up to size 3 use a dedicated linear search. Very long needles + use the Two-Way algorithm. + + The performance gain using the improved bench-strstr on Cortex-A72 is 5.8 + times basic_strstr and 3.7 times twoway_strstr. + + Tested against GLIBC testsuite, randomized tests and the GNULIB strstr test + (https://git.savannah.gnu.org/cgit/gnulib.git/tree/tests/test-strstr.c). + + Reviewed-by: Szabolcs Nagy + + * string/str-two-way.h (two_way_short_needle): Add inline to avoid + warning. + (two_way_long_needle): Block inlining. + * string/strstr.c (strstr2): Add new function. + (strstr3): Likewise. + (STRSTR): Completely rewrite strstr to improve performance. + +diff --git a/string/str-two-way.h b/string/str-two-way.h +index 523d946c59412e1f..358959bef0fd6f74 100644 +--- a/string/str-two-way.h ++++ b/string/str-two-way.h +@@ -221,7 +221,7 @@ critical_factorization (const unsigned char *needle, size_t needle_len, + most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. + If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * + HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. */ +-static RETURN_TYPE ++static inline RETURN_TYPE + two_way_short_needle (const unsigned char *haystack, size_t haystack_len, + const unsigned char *needle, size_t needle_len) + { +@@ -382,8 +382,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, + and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible. + If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * + HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and +- sublinear performance is not possible. */ +-static RETURN_TYPE ++ sublinear performance is not possible. ++ ++ Since this function is large and complex, block inlining to avoid ++ slowing down the common case of small needles. */ ++__attribute__((noinline)) static RETURN_TYPE + two_way_long_needle (const unsigned char *haystack, size_t haystack_len, + const unsigned char *needle, size_t needle_len) + { +diff --git a/string/strstr.c b/string/strstr.c +index f74d7189ed1319f6..7ffb18ab4285b060 100644 +--- a/string/strstr.c ++++ b/string/strstr.c +@@ -16,29 +16,17 @@ + License along with the GNU C Library; if not, see + . */ + +-/* This particular implementation was written by Eric Blake, 2008. */ +- + #ifndef _LIBC + # include + #endif + +-/* Specification of strstr. */ + #include + +-#include +- +-#ifndef _LIBC +-# define __builtin_expect(expr, val) (expr) +-#endif +- + #define RETURN_TYPE char * + #define AVAILABLE(h, h_l, j, n_l) \ + (((j) + (n_l) <= (h_l)) \ + || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \ + (j) + (n_l) <= (h_l))) +-#define CHECK_EOL (1) +-#define RET0_IF_0(a) if (!a) goto ret0 +-#define FASTSEARCH(S,C,N) (void*) strchr ((void*)(S), (C)) + #include "str-two-way.h" + + #undef strstr +@@ -47,47 +35,128 @@ + #define STRSTR strstr + #endif + +-/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK +- if NEEDLE is empty, otherwise NULL if NEEDLE is not found in +- HAYSTACK. */ +-char * +-STRSTR (const char *haystack, const char *needle) ++static inline char * ++strstr2 (const unsigned char *hs, const unsigned char *ne) + { +- size_t needle_len; /* Length of NEEDLE. */ +- size_t haystack_len; /* Known minimum length of HAYSTACK. */ +- +- /* Handle empty NEEDLE special case. */ +- if (needle[0] == '\0') +- return (char *) haystack; ++ uint32_t h1 = (ne[0] << 16) | ne[1]; ++ uint32_t h2 = 0; ++ for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs) ++ h2 = (h2 << 16) | c; ++ return h1 == h2 ? (char *)hs - 2 : NULL; ++} + +- /* Skip until we find the first matching char from NEEDLE. */ +- haystack = strchr (haystack, needle[0]); +- if (haystack == NULL || needle[1] == '\0') +- return (char *) haystack; ++static inline char * ++strstr3 (const unsigned char *hs, const unsigned char *ne) ++{ ++ uint32_t h1 = ((uint32_t)ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8); ++ uint32_t h2 = 0; ++ for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs) ++ h2 = (h2 | c) << 8; ++ return h1 == h2 ? (char *)hs - 3 : NULL; ++} + +- /* Ensure HAYSTACK length is at least as long as NEEDLE length. +- Since a match may occur early on in a huge HAYSTACK, use strnlen ++/* Hash character pairs so a small shift table can be used. All bits of ++ p[0] are included, but not all bits from p[-1]. So if two equal hashes ++ match on p[-1], p[0] matches too. Hash collisions are harmless and result ++ in smaller shifts. */ ++#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift)) ++ ++/* Fast strstr algorithm with guaranteed linear-time performance. ++ Small needles up to size 3 use a dedicated linear search. Longer needles ++ up to size 256 use a novel modified Horspool algorithm. It hashes pairs ++ of characters to quickly skip past mismatches. The main search loop only ++ exits if the last 2 characters match, avoiding unnecessary calls to memcmp ++ and allowing for a larger skip if there is no match. A self-adapting ++ filtering check is used to quickly detect mismatches in long needles. ++ By limiting the needle length to 256, the shift table can be reduced to 8 ++ bits per entry, lowering preprocessing overhead and minimizing cache effects. ++ The limit also implies worst-case performance is linear. ++ Needles larger than 256 characters use the linear-time Two-Way algorithm. */ ++char * ++STRSTR (const char *haystack, const char *needle) ++{ ++ const unsigned char *hs = (const unsigned char *) haystack; ++ const unsigned char *ne = (const unsigned char *) needle; ++ ++ /* Handle short needle special cases first. */ ++ if (ne[0] == '\0') ++ return (char *)hs; ++ hs = (const unsigned char *)strchr ((const char*)hs, ne[0]); ++ if (hs == NULL || ne[1] == '\0') ++ return (char*)hs; ++ if (ne[2] == '\0') ++ return strstr2 (hs, ne); ++ if (ne[3] == '\0') ++ return strstr3 (hs, ne); ++ ++ /* Ensure haystack length is at least as long as needle length. ++ Since a match may occur early on in a huge haystack, use strnlen + and read ahead a few cachelines for improved performance. */ +- needle_len = strlen (needle); +- haystack_len = __strnlen (haystack, needle_len + 256); +- if (haystack_len < needle_len) ++ size_t ne_len = strlen ((const char*)ne); ++ size_t hs_len = __strnlen ((const char*)hs, ne_len | 512); ++ if (hs_len < ne_len) + return NULL; + +- /* Check whether we have a match. This improves performance since we avoid +- the initialization overhead of the two-way algorithm. */ +- if (memcmp (haystack, needle, needle_len) == 0) +- return (char *) haystack; +- +- /* Perform the search. Abstract memory is considered to be an array +- of 'unsigned char' values, not an array of 'char' values. See +- ISO C 99 section 6.2.6.1. */ +- if (needle_len < LONG_NEEDLE_THRESHOLD) +- return two_way_short_needle ((const unsigned char *) haystack, +- haystack_len, +- (const unsigned char *) needle, needle_len); +- return two_way_long_needle ((const unsigned char *) haystack, haystack_len, +- (const unsigned char *) needle, needle_len); ++ /* Check whether we have a match. This improves performance since we ++ avoid initialization overheads. */ ++ if (memcmp (hs, ne, ne_len) == 0) ++ return (char *) hs; ++ ++ /* Use Two-Way algorithm for very long needles. */ ++ if (__glibc_unlikely (ne_len > 256)) ++ return two_way_long_needle (hs, hs_len, ne, ne_len); ++ ++ const unsigned char *end = hs + hs_len - ne_len; ++ uint8_t shift[256]; ++ size_t tmp, shift1; ++ size_t m1 = ne_len - 1; ++ size_t offset = 0; ++ ++ /* Initialize bad character shift hash table. */ ++ memset (shift, 0, sizeof (shift)); ++ for (int i = 1; i < m1; i++) ++ shift[hash2 (ne + i)] = i; ++ /* Shift1 is the amount we can skip after matching the hash of the ++ needle end but not the full needle. */ ++ shift1 = m1 - shift[hash2 (ne + m1)]; ++ shift[hash2 (ne + m1)] = m1; ++ ++ while (1) ++ { ++ if (__glibc_unlikely (hs > end)) ++ { ++ end += __strnlen ((const char*)end + m1 + 1, 2048); ++ if (hs > end) ++ return NULL; ++ } ++ ++ /* Skip past character pairs not in the needle. */ ++ do ++ { ++ hs += m1; ++ tmp = shift[hash2 (hs)]; ++ } ++ while (tmp == 0 && hs <= end); ++ ++ /* If the match is not at the end of the needle, shift to the end ++ and continue until we match the hash of the needle end. */ ++ hs -= tmp; ++ if (tmp < m1) ++ continue; ++ ++ /* Hash of the last 2 characters matches. If the needle is long, ++ try to quickly filter out mismatches. */ ++ if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0) ++ { ++ if (memcmp (hs, ne, m1) == 0) ++ return (void *) hs; ++ ++ /* Adjust filter offset when it doesn't find the mismatch. */ ++ offset = (offset >= 8 ? offset : m1) - 8; ++ } ++ ++ /* Skip based on matching the hash of the needle end. */ ++ hs += shift1; ++ } + } + libc_hidden_builtin_def (strstr) +- +-#undef LONG_NEEDLE_THRESHOLD diff --git a/SPECS/glibc.spec b/SPECS/glibc.spec index f2da64a..40339bb 100644 --- a/SPECS/glibc.spec +++ b/SPECS/glibc.spec @@ -1,6 +1,6 @@ %define glibcsrcdir glibc-2.28 %define glibcversion 2.28 -%define glibcrelease 126%{?dist} +%define glibcrelease 127%{?dist} # Pre-release tarballs are pulled in from git using a command that is # effectively: # @@ -484,6 +484,8 @@ Patch350: glibc-rh1748197-6.patch Patch351: glibc-rh1748197-7.patch Patch352: glibc-rh1642150-4.patch Patch353: glibc-rh1836867.patch +Patch354: glibc-rh1821531-1.patch +Patch355: glibc-rh1821531-2.patch ############################################################################## # Continued list of core "glibc" package information: @@ -2382,6 +2384,9 @@ fi %files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared %changelog +* Tue Jun 09 2020 Carlos O'Donell - 2.28-127 +- Improve performance of library strstr() function (#1821531) + * Wed May 27 2020 Florian Weimer - 2.28-126 - Do not clobber errno in nss_compat (#1836867)