| commit 284f42bc778e487dfd5dff5c01959f93b9e0c4f5 |
| Author: Wilco Dijkstra <wdijkstr@arm.com> |
| Date: Fri Aug 3 17:24:12 2018 +0100 |
| |
| Simplify and speedup strstr/strcasestr first match |
| |
| Looking at the benchtests, both strstr and strcasestr spend a lot of time |
| in a slow initialization loop handling one character per iteration. |
| This can be simplified and use the much faster strlen/strnlen/strchr/memcmp. |
| Read ahead a few cachelines to reduce the number of strnlen calls, which |
| improves performance by ~3-4%. This patch improves the time taken for the |
| full strstr benchtest by >40%. |
| |
| * string/strcasestr.c (STRCASESTR): Simplify and speedup first match. |
| * string/strstr.c (AVAILABLE): Likewise. |
| |
| diff --git a/string/strcasestr.c b/string/strcasestr.c |
| index 421764bd1b0ff22e..8aa76037dcc052f3 100644 |
| |
| |
| @@ -59,31 +59,22 @@ |
| case-insensitive comparison. This function gives unspecified |
| results in multibyte locales. */ |
| char * |
| -STRCASESTR (const char *haystack_start, const char *needle_start) |
| +STRCASESTR (const char *haystack, const char *needle) |
| { |
| - const char *haystack = haystack_start; |
| - const char *needle = needle_start; |
| size_t needle_len; /* Length of NEEDLE. */ |
| size_t haystack_len; /* Known minimum length of HAYSTACK. */ |
| - bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */ |
| - |
| - /* Determine length of NEEDLE, and in the process, make sure |
| - HAYSTACK is at least as long (no point processing all of a long |
| - NEEDLE if HAYSTACK is too short). */ |
| - while (*haystack && *needle) |
| - { |
| - ok &= (TOLOWER ((unsigned char) *haystack) |
| - == TOLOWER ((unsigned char) *needle)); |
| - haystack++; |
| - needle++; |
| - } |
| - if (*needle) |
| + |
| + /* Handle empty NEEDLE special case. */ |
| + if (needle[0] == '\0') |
| + return (char *) haystack; |
| + |
| + /* Ensure HAYSTACK length is at least as long as NEEDLE length. |
| + Since a match may occur early on in a huge HAYSTACK, use strnlen |
| + and read ahead a few cachelines for improved performance. */ |
| + needle_len = strlen (needle); |
| + haystack_len = __strnlen (haystack, needle_len + 256); |
| + if (haystack_len < needle_len) |
| return NULL; |
| - if (ok) |
| - return (char *) haystack_start; |
| - needle_len = needle - needle_start; |
| - haystack = haystack_start + 1; |
| - haystack_len = needle_len - 1; |
| |
| /* Perform the search. Abstract memory is considered to be an array |
| of 'unsigned char' values, not an array of 'char' values. See |
| @@ -91,10 +82,10 @@ STRCASESTR (const char *haystack_start, const char *needle_start) |
| if (needle_len < LONG_NEEDLE_THRESHOLD) |
| return two_way_short_needle ((const unsigned char *) haystack, |
| haystack_len, |
| - (const unsigned char *) needle_start, |
| + (const unsigned char *) needle, |
| needle_len); |
| return two_way_long_needle ((const unsigned char *) haystack, haystack_len, |
| - (const unsigned char *) needle_start, |
| + (const unsigned char *) needle, |
| needle_len); |
| } |
| |
| diff --git a/string/strstr.c b/string/strstr.c |
| index 79ebcc75329d0b17..f74d7189ed1319f6 100644 |
| |
| |
| @@ -51,33 +51,32 @@ |
| if NEEDLE is empty, otherwise NULL if NEEDLE is not found in |
| HAYSTACK. */ |
| char * |
| -STRSTR (const char *haystack_start, const char *needle_start) |
| +STRSTR (const char *haystack, const char *needle) |
| { |
| - const char *haystack = haystack_start; |
| - const char *needle = needle_start; |
| size_t needle_len; /* Length of NEEDLE. */ |
| size_t haystack_len; /* Known minimum length of HAYSTACK. */ |
| - bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */ |
| - |
| - /* Determine length of NEEDLE, and in the process, make sure |
| - HAYSTACK is at least as long (no point processing all of a long |
| - NEEDLE if HAYSTACK is too short). */ |
| - while (*haystack && *needle) |
| - ok &= *haystack++ == *needle++; |
| - if (*needle) |
| + |
| + /* Handle empty NEEDLE special case. */ |
| + if (needle[0] == '\0') |
| + return (char *) haystack; |
| + |
| + /* Skip until we find the first matching char from NEEDLE. */ |
| + haystack = strchr (haystack, needle[0]); |
| + if (haystack == NULL || needle[1] == '\0') |
| + return (char *) haystack; |
| + |
| + /* Ensure HAYSTACK length is at least as long as NEEDLE length. |
| + Since a match may occur early on in a huge HAYSTACK, use strnlen |
| + and read ahead a few cachelines for improved performance. */ |
| + needle_len = strlen (needle); |
| + haystack_len = __strnlen (haystack, needle_len + 256); |
| + if (haystack_len < needle_len) |
| return NULL; |
| - if (ok) |
| - return (char *) haystack_start; |
| - |
| - /* Reduce the size of haystack using strchr, since it has a smaller |
| - linear coefficient than the Two-Way algorithm. */ |
| - needle_len = needle - needle_start; |
| - haystack = strchr (haystack_start + 1, *needle_start); |
| - if (!haystack || __builtin_expect (needle_len == 1, 0)) |
| + |
| + /* Check whether we have a match. This improves performance since we avoid |
| + the initialization overhead of the two-way algorithm. */ |
| + if (memcmp (haystack, needle, needle_len) == 0) |
| return (char *) haystack; |
| - needle -= needle_len; |
| - haystack_len = (haystack > haystack_start + needle_len ? 1 |
| - : needle_len + haystack_start - haystack); |
| |
| /* Perform the search. Abstract memory is considered to be an array |
| of 'unsigned char' values, not an array of 'char' values. See |