From a5ca95c10cb6da41568b09b7c646441773147535 Mon Sep 17 00:00:00 2001 From: Tim Waugh Date: Dec 16 2004 17:50:47 +0000 Subject: - Bypass kwset matching when ignoring case and processing multibyte input (bug #143079). --- diff --git a/grep-2.5.1-egf-speedup.patch b/grep-2.5.1-egf-speedup.patch index 87e4dee..0e9a5eb 100644 --- a/grep-2.5.1-egf-speedup.patch +++ b/grep-2.5.1-egf-speedup.patch @@ -1,5 +1,5 @@ ---- grep-2.5.1/src/search.c 2004-12-14 15:55:21.257729918 +0000 -+++ grep-2.5.1/src/search.c 2004-12-16 16:53:01.110921088 +0000 +--- grep-2.5.1/src/search.c 2004-12-16 16:53:01.110921088 +0000 ++++ grep-2.5.1/src/search.c 2004-12-16 17:46:57.039678304 +0000 @@ -39,6 +39,9 @@ #ifdef HAVE_LIBPCRE # include @@ -125,7 +125,7 @@ #endif /* MBS_SUPPORT */ buflim = buf + size; -@@ -373,18 +337,56 @@ +@@ -373,21 +337,60 @@ if (kwset) { /* Find a possible match using the KWset matcher. */ @@ -134,8 +134,8 @@ + size_t bytes_left = 0; +#endif /* MBS_SUPPORT */ + size_t offset; -+ /* kwsexec doesn't work with match_icase and multibyte input. */ +#ifdef MBS_SUPPORT ++ /* kwsexec doesn't work with match_icase and multibyte input. */ + if (match_icase && MB_CUR_MAX > 1) + /* Avoid kwset */ + offset = 0; @@ -184,8 +184,13 @@ +#endif /* MBS_SUPPORT */ while (beg > buf && beg[-1] != eol) --beg; - if (kwsm.index < kwset_exact_matches) -@@ -395,13 +397,47 @@ +- if (kwsm.index < kwset_exact_matches) ++ if (!(match_icase && MB_CUR_MAX > 1) && ++ (kwsm.index < kwset_exact_matches)) + goto success_in_beg_and_end; + if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) + continue; +@@ -395,13 +398,47 @@ else { /* No good fixed strings; start with DFA. */ @@ -233,7 +238,7 @@ while (beg > buf && beg[-1] != eol) --beg; } -@@ -469,15 +505,6 @@ +@@ -469,15 +506,6 @@ } /* for (beg = end ..) */ failure: @@ -249,7 +254,7 @@ return (size_t) -1; success_in_beg_and_end: -@@ -486,15 +513,6 @@ +@@ -486,15 +514,6 @@ /* FALLTHROUGH */ success_in_start_and_len: @@ -265,7 +270,7 @@ *match_size = len; return start; } -@@ -504,6 +522,7 @@ +@@ -504,6 +523,7 @@ { char const *beg, *lim, *err; @@ -273,7 +278,7 @@ kwsinit (); beg = pattern; do -@@ -531,17 +550,8 @@ +@@ -531,17 +551,8 @@ struct kwsmatch kwsmatch; size_t ret_val; #ifdef MBS_SUPPORT @@ -293,7 +298,7 @@ #endif /* MBS_SUPPORT */ for (beg = buf; beg <= buf + size; ++beg) -@@ -550,8 +560,33 @@ +@@ -550,8 +561,33 @@ if (offset == (size_t) -1) goto failure; #ifdef MBS_SUPPORT @@ -329,7 +334,7 @@ #endif /* MBS_SUPPORT */ beg += offset; len = kwsmatch.size[0]; -@@ -587,6 +622,36 @@ +@@ -587,6 +623,36 @@ if (offset == -1) { break; /* Try a different anchor. */ } @@ -366,7 +371,7 @@ beg += offset; len = kwsmatch.size[0]; } -@@ -597,19 +662,31 @@ +@@ -597,19 +663,31 @@ } failure: @@ -406,7 +411,7 @@ end++; while (buf < beg && beg[-1] != eol) --beg; -@@ -618,15 +695,6 @@ +@@ -618,15 +696,6 @@ success_in_beg_and_len: *match_size = len; diff --git a/grep.spec b/grep.spec index 42f9ca9..d022343 100644 --- a/grep.spec +++ b/grep.spec @@ -85,6 +85,10 @@ fi %{_mandir}/*/* %changelog +* Thu Dec 16 2004 Tim Waugh +- Bypass kwset matching when ignoring case and processing multibyte input + (bug #143079). + * Tue Dec 14 2004 Tim Waugh 2.5.1-42 - Further UTF-8 processing avoided since a '\n' byte is always an end-of-line character in that encoding.