commit 99d3c7e1308beb1ce9a3c535ca4b6581ebd653ee
Author: Paolo Bonzini <bonzini@gnu.org>
Date: Tue Sep 21 17:00:55 2010 +0200
dfa: process range expressions consistently with system regex
The actual meaning of range expressions in glibc is not exactly strcoll,
which makes the behavior of grep hard to predict when compiled with the
system regex. Leave to the system regex matcher the decision of which
single-byte characters are matched by a range expression.
This partially reverts a change made in commit 0d38a8bb (which made
sense at the time, but not now that src/dfa.c is not doing multibyte
character set matching anymore).
* src/dfa.c (in_coll_range): Remove.
(parse_bracket_exp): Use system regex to find which single-char
bytes match a range expression.
diff --git a/src/dfa.c b/src/dfa.c
index a2f4174..f3e066f 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -697,13 +697,6 @@ static unsigned char const *buf_end; /* reference to end in dfaexec(). */
#endif /* MBS_SUPPORT */
-static int
-in_coll_range (char ch, char from, char to)
-{
- char c[6] = { from, 0, ch, 0, to, 0 };
- return strcoll (&c[0], &c[2]) <= 0 && strcoll (&c[2], &c[4]) <= 0;
-}
-
typedef int predicate (int);
/* The following list maps the names of the Posix named character classes
@@ -979,10 +972,22 @@ parse_bracket_exp (void)
for (c = c1; c <= c2; c++)
setbit_case_fold (c, ccl);
else
- for (c = 0; c < NOTCHAR; ++c)
- if (!(case_fold && isupper (c))
- && in_coll_range (c, c1, c2))
- setbit_case_fold (c, ccl);
+ {
+ /* Defer to the system regex library about the meaning
+ of range expressions. */
+ regex_t re;
+ char pattern[6] = { '[', c1, '-', c2, ']', 0 };
+ char subject[2] = { 0, 0 };
+ regcomp (&re, pattern, REG_NOSUB);
+ for (c = 0; c < NOTCHAR; ++c)
+ {
+ subject[0] = c;
+ if (!(case_fold && isupper (c))
+ && regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH)
+ setbit_case_fold (c, ccl);
+ }
+ regfree (&re);
+ }
}
colon_warning_state |= 8;