|
|
fdca3f |
commit 99d3c7e1308beb1ce9a3c535ca4b6581ebd653ee
|
|
|
fdca3f |
Author: Paolo Bonzini <bonzini@gnu.org>
|
|
|
fdca3f |
Date: Tue Sep 21 17:00:55 2010 +0200
|
|
|
fdca3f |
|
|
|
fdca3f |
dfa: process range expressions consistently with system regex
|
|
|
fdca3f |
|
|
|
fdca3f |
The actual meaning of range expressions in glibc is not exactly strcoll,
|
|
|
fdca3f |
which makes the behavior of grep hard to predict when compiled with the
|
|
|
fdca3f |
system regex. Leave to the system regex matcher the decision of which
|
|
|
fdca3f |
single-byte characters are matched by a range expression.
|
|
|
fdca3f |
|
|
|
fdca3f |
This partially reverts a change made in commit 0d38a8bb (which made
|
|
|
fdca3f |
sense at the time, but not now that src/dfa.c is not doing multibyte
|
|
|
fdca3f |
character set matching anymore).
|
|
|
fdca3f |
|
|
|
fdca3f |
* src/dfa.c (in_coll_range): Remove.
|
|
|
fdca3f |
(parse_bracket_exp): Use system regex to find which single-char
|
|
|
fdca3f |
bytes match a range expression.
|
|
|
fdca3f |
|
|
|
fdca3f |
diff --git a/src/dfa.c b/src/dfa.c
|
|
|
fdca3f |
index a2f4174..f3e066f 100644
|
|
|
fdca3f |
--- a/src/dfa.c
|
|
|
fdca3f |
+++ b/src/dfa.c
|
|
|
fdca3f |
@@ -697,13 +697,6 @@ static unsigned char const *buf_end; /* reference to end in dfaexec(). */
|
|
|
fdca3f |
|
|
|
fdca3f |
#endif /* MBS_SUPPORT */
|
|
|
fdca3f |
|
|
|
fdca3f |
-static int
|
|
|
fdca3f |
-in_coll_range (char ch, char from, char to)
|
|
|
fdca3f |
-{
|
|
|
fdca3f |
- char c[6] = { from, 0, ch, 0, to, 0 };
|
|
|
fdca3f |
- return strcoll (&c[0], &c[2]) <= 0 && strcoll (&c[2], &c[4]) <= 0;
|
|
|
fdca3f |
-}
|
|
|
fdca3f |
-
|
|
|
fdca3f |
typedef int predicate (int);
|
|
|
fdca3f |
|
|
|
fdca3f |
/* The following list maps the names of the Posix named character classes
|
|
|
fdca3f |
@@ -979,10 +972,22 @@ parse_bracket_exp (void)
|
|
|
fdca3f |
for (c = c1; c <= c2; c++)
|
|
|
fdca3f |
setbit_case_fold (c, ccl);
|
|
|
fdca3f |
else
|
|
|
fdca3f |
- for (c = 0; c < NOTCHAR; ++c)
|
|
|
fdca3f |
- if (!(case_fold && isupper (c))
|
|
|
fdca3f |
- && in_coll_range (c, c1, c2))
|
|
|
fdca3f |
- setbit_case_fold (c, ccl);
|
|
|
fdca3f |
+ {
|
|
|
fdca3f |
+ /* Defer to the system regex library about the meaning
|
|
|
fdca3f |
+ of range expressions. */
|
|
|
fdca3f |
+ regex_t re;
|
|
|
fdca3f |
+ char pattern[6] = { '[', c1, '-', c2, ']', 0 };
|
|
|
fdca3f |
+ char subject[2] = { 0, 0 };
|
|
|
fdca3f |
+ regcomp (&re, pattern, REG_NOSUB);
|
|
|
fdca3f |
+ for (c = 0; c < NOTCHAR; ++c)
|
|
|
fdca3f |
+ {
|
|
|
fdca3f |
+ subject[0] = c;
|
|
|
fdca3f |
+ if (!(case_fold && isupper (c))
|
|
|
fdca3f |
+ && regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH)
|
|
|
fdca3f |
+ setbit_case_fold (c, ccl);
|
|
|
fdca3f |
+ }
|
|
|
fdca3f |
+ regfree (&re);
|
|
|
fdca3f |
+ }
|
|
|
fdca3f |
}
|
|
|
fdca3f |
|
|
|
fdca3f |
colon_warning_state |= 8;
|