From 6f2e162151bc45f9d1dca2e7c5d0d67d4992f57a Mon Sep 17 00:00:00 2001 From: Tim Waugh Date: Nov 04 2004 11:15:52 +0000 Subject: - Small improvements to egf-speedup patch. --- diff --git a/grep-2.5.1-egf-speedup.patch b/grep-2.5.1-egf-speedup.patch index 0578a8c..76d97a0 100644 --- a/grep-2.5.1-egf-speedup.patch +++ b/grep-2.5.1-egf-speedup.patch @@ -1,5 +1,5 @@ ---- grep-2.5.1/src/search.c.egf-speedup 2004-11-03 17:38:36.338557746 +0000 -+++ grep-2.5.1/src/search.c 2004-11-03 17:39:51.853925940 +0000 +--- grep-2.5.1/src/search.c.egf-speedup 2004-11-04 10:42:45.000000000 +0000 ++++ grep-2.5.1/src/search.c 2004-11-04 11:12:44.688292744 +0000 @@ -70,9 +70,6 @@ call the regexp matcher at all. */ static int kwset_exact_matches; @@ -58,7 +58,7 @@ static void Gcompile (char const *pattern, size_t size) { -@@ -350,18 +306,9 @@ +@@ -350,18 +306,8 @@ struct kwsmatch kwsm; size_t i, ret_val; #ifdef MBS_SUPPORT @@ -74,53 +74,38 @@ - if (kwset) - mb_properties = check_multibyte_string(buf, size); - } -+ size_t n; + mbstate_t mbs; + memset (&mbs, '\0', sizeof (mbstate_t)); #endif /* MBS_SUPPORT */ buflim = buf + size; -@@ -376,15 +323,50 @@ +@@ -376,15 +322,34 @@ size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); if (offset == (size_t) -1) goto failure; +#ifdef MBS_SUPPORT -+ n = offset; -+ while (n) ++ if (MB_CUR_MAX > 1) + { -+ size_t len = mbrlen (beg, n, &mbs); -+ if (len == (size_t) -1 || len == (size_t) -2 || len == 0) ++ size_t n = offset; ++ while (n) + { -+ /* Incomplete character. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg += n; -+ break; -+ } -+ -+ beg += len; -+ n -= len; -+ } -+ if (n) -+ continue; ++ size_t len = mbrlen (beg, n, &mbs); ++ if (len == (size_t) -1 || len == (size_t) -2 || len == 0) ++ { ++ /* Incomplete character. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg += n; ++ break; ++ } + -+ /* Narrow down to the line containing the candidate, and -+ run it through DFA. */ -+ end = beg; -+ while (end < buflim) -+ { -+ size_t len = mbrlen (end, buflim - end, &mbs); -+ if (len == (size_t) -1 || len == (size_t) -2 || len == 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ continue; ++ beg += len; ++ n -= len; + } -+ if (len == 1 && *end == eol) -+ break; -+ -+ end += len; ++ if (n) ++ continue; + } -+ end++; -+#else ++ else ++#endif /* MBS_SUPPORT */ beg += offset; /* Narrow down to the line containing the candidate, and run it through DFA. */ @@ -130,57 +115,40 @@ - if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) - continue; -#endif -+#endif /* MBS_SUPPORT */ -+ /* Hmm, is this correct for multibyte? */ while (beg > buf && beg[-1] != eol) --beg; if (kwsm.index < kwset_exact_matches) -@@ -399,9 +381,44 @@ +@@ -399,6 +364,29 @@ if (offset == (size_t) -1) break; /* Narrow down to the line we've found. */ +#ifdef MBS_SUPPORT -+ n = offset; -+ while (n) ++ if (MB_CUR_MAX > 1) + { -+ size_t len = mbrlen (beg, n, &mbs); -+ if (len == (size_t) -1 || len == (size_t) -2 || len == 0) ++ size_t n = offset; ++ while (n) + { -+ /* Incomplete character. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg += n; -+ continue; -+ } ++ size_t len = mbrlen (beg, n, &mbs); ++ if (len == (size_t) -1 || len == (size_t) -2 || len == 0) ++ { ++ /* Incomplete character. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg += n; ++ break; ++ } + -+ beg += len; -+ n -= len; -+ } -+ end = beg; -+ while (end < buflim) -+ { -+ size_t len = mbrlen (end, buflim - end, &mbs); -+ if (len == (size_t) -1 || len == (size_t) -2 || len == 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ continue; ++ beg += len; ++ n -= len; + } -+ -+ if (len == 1 && *end == eol) -+ break; -+ -+ end += len; ++ if (n) ++ continue; + } -+ end++; -+#else ++ else ++#endif /* MBS_SUPPORT */ beg += offset; end = memchr (beg, eol, buflim - beg); end++; -+#endif /* MBS_SUPPORT */ -+ /* Hmm, is this correct for multibyte? */ - while (beg > buf && beg[-1] != eol) - --beg; - } -@@ -469,15 +486,6 @@ +@@ -469,15 +457,6 @@ } /* for (beg = end ..) */ failure: @@ -196,7 +164,7 @@ return (size_t) -1; success_in_beg_and_end: -@@ -486,15 +494,6 @@ +@@ -486,15 +465,6 @@ /* FALLTHROUGH */ success_in_start_and_len: @@ -212,7 +180,7 @@ *match_size = len; return start; } -@@ -531,29 +530,37 @@ +@@ -531,17 +501,8 @@ struct kwsmatch kwsmatch; size_t ret_val; #ifdef MBS_SUPPORT @@ -232,65 +200,70 @@ #endif /* MBS_SUPPORT */ for (beg = buf; beg <= buf + size; ++beg) - { -+#ifdef MBS_SUPPORT -+ size_t n; -+#endif /* MBS_SUPPORT */ - size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); +@@ -550,8 +511,28 @@ if (offset == (size_t) -1) goto failure; #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) - continue; /* It is a part of multibyte character. */ --#endif /* MBS_SUPPORT */ -+ n = offset; -+ while (n) ++ if (MB_CUR_MAX > 1) + { -+ size_t len = mbrlen (beg, n, &mbs); -+ if (len == (size_t) -1 || len == (size_t) -2 || len == 0) ++ size_t n = offset; ++ while (n) + { -+ /* Incomplete character. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg += n; -+ continue; ++ size_t len = mbrlen (beg, n, &mbs); ++ if (len == (size_t) -1 || len == (size_t) -2 || len == 0) ++ { ++ /* Incomplete character. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg += n; ++ break; ++ } ++ ++ beg += len; ++ n -= len; + } + -+ beg += len; -+ n -= len; ++ if (n) ++ continue; + } -+#else ++ else + #endif /* MBS_SUPPORT */ beg += offset; -+#endif /* MBS_SUPPORT */ len = kwsmatch.size[0]; - if (exact && !match_words) - goto success_in_beg_and_len; -@@ -587,7 +594,25 @@ +@@ -587,6 +568,31 @@ if (offset == -1) { break; /* Try a different anchor. */ } +#ifdef MBS_SUPPORT -+ n = offset; -+ while (n) ++ if (MB_CUR_MAX > 1) + { -+ size_t len = mbrlen (beg, n, &mbs); -+ if (len == (size_t) -1 || len == (size_t) -2 || len == 0) ++ size_t n = offset; ++ while (n) + { -+ /* Incomplete character. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg += n; -+ continue; ++ size_t len = mbrlen (beg, n, &mbs); ++ if (len == (size_t) -1 || len == (size_t) -2 || ++ len == 0) ++ { ++ /* Incomplete character. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg += n; ++ break; ++ } ++ ++ beg += len; ++ n -= len; + } + -+ beg += len; -+ n -= len; ++ if (n) ++ break; /* Try a different anchor. */ + } -+#else - beg += offset; ++ else +#endif /* MBS_SUPPORT */ + beg += offset; len = kwsmatch.size[0]; } - } -@@ -597,20 +622,30 @@ +@@ -597,20 +603,30 @@ } failure: @@ -330,7 +303,7 @@ while (buf < beg && beg[-1] != eol) --beg; len = end - beg; -@@ -618,15 +653,6 @@ +@@ -618,15 +634,6 @@ success_in_beg_and_len: *match_size = len; diff --git a/grep.spec b/grep.spec index 1e3918a..54c0646 100644 --- a/grep.spec +++ b/grep.spec @@ -1,7 +1,7 @@ Summary: The GNU versions of grep pattern matching utilities. Name: grep Version: 2.5.1 -Release: 32 +Release: 33 License: GPL Group: Applications/Text Source: ftp://ftp.gnu.org/pub/gnu/grep/grep-%{version}.tar.bz2 @@ -81,9 +81,12 @@ fi %{_mandir}/*/* %changelog +* Thu Nov 4 2004 Tim Waugh 2.5.1-33 +- Small improvements to egf-speedup patch. + * Wed Nov 3 2004 Tim Waugh 2.5.1-32 - Remove mb-caching hack. -- Better multibyte handling in EGexecute(). +- Better multibyte handling in EGexecute() and Fexecute(). - Don't need regex.c changes in grep-2.5-i18n.patch. * Wed Oct 13 2004 Tim Waugh 2.5.1-31