fdca3f
commit 99d3c7e1308beb1ce9a3c535ca4b6581ebd653ee
fdca3f
Author: Paolo Bonzini <bonzini@gnu.org>
fdca3f
Date:   Tue Sep 21 17:00:55 2010 +0200
fdca3f
fdca3f
    dfa: process range expressions consistently with system regex
fdca3f
    
fdca3f
    The actual meaning of range expressions in glibc is not exactly strcoll,
fdca3f
    which makes the behavior of grep hard to predict when compiled with the
fdca3f
    system regex.  Leave to the system regex matcher the decision of which
fdca3f
    single-byte characters are matched by a range expression.
fdca3f
    
fdca3f
    This partially reverts a change made in commit 0d38a8bb (which made
fdca3f
    sense at the time, but not now that src/dfa.c is not doing multibyte
fdca3f
    character set matching anymore).
fdca3f
    
fdca3f
    * src/dfa.c (in_coll_range): Remove.
fdca3f
    (parse_bracket_exp): Use system regex to find which single-char
fdca3f
    bytes match a range expression.
fdca3f
fdca3f
diff --git a/src/dfa.c b/src/dfa.c
fdca3f
index a2f4174..f3e066f 100644
fdca3f
--- a/src/dfa.c
fdca3f
+++ b/src/dfa.c
fdca3f
@@ -697,13 +697,6 @@ static unsigned char const *buf_end;	/* reference to end in dfaexec().  */
fdca3f
 
fdca3f
 #endif /* MBS_SUPPORT */
fdca3f
 
fdca3f
-static int
fdca3f
-in_coll_range (char ch, char from, char to)
fdca3f
-{
fdca3f
-  char c[6] = { from, 0, ch, 0, to, 0 };
fdca3f
-  return strcoll (&c[0], &c[2]) <= 0 && strcoll (&c[2], &c[4]) <= 0;
fdca3f
-}
fdca3f
-
fdca3f
 typedef int predicate (int);
fdca3f
 
fdca3f
 /* The following list maps the names of the Posix named character classes
fdca3f
@@ -979,10 +972,22 @@ parse_bracket_exp (void)
fdca3f
                 for (c = c1; c <= c2; c++)
fdca3f
                   setbit_case_fold (c, ccl);
fdca3f
               else
fdca3f
-                for (c = 0; c < NOTCHAR; ++c)
fdca3f
-                  if (!(case_fold && isupper (c))
fdca3f
-                      && in_coll_range (c, c1, c2))
fdca3f
-                    setbit_case_fold (c, ccl);
fdca3f
+                {
fdca3f
+                  /* Defer to the system regex library about the meaning
fdca3f
+                     of range expressions.  */
fdca3f
+                  regex_t re;
fdca3f
+                  char pattern[6] = { '[', c1, '-', c2, ']', 0 };
fdca3f
+                  char subject[2] = { 0, 0 };
fdca3f
+                  regcomp (&re, pattern, REG_NOSUB);
fdca3f
+                  for (c = 0; c < NOTCHAR; ++c)
fdca3f
+                    {
fdca3f
+                      subject[0] = c;
fdca3f
+                      if (!(case_fold && isupper (c))
fdca3f
+                          && regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH)
fdca3f
+                        setbit_case_fold (c, ccl);
fdca3f
+                    }
fdca3f
+                  regfree (&re);
fdca3f
+                }
fdca3f
             }
fdca3f
 
fdca3f
           colon_warning_state |= 8;