|
Tim Waugh |
a8485b |
--- grep-2.5.1a/src/search.c.w 2006-02-20 14:27:27.000000000 +0000
|
|
Tim Waugh |
a8485b |
+++ grep-2.5.1a/src/search.c 2006-02-20 14:32:07.000000000 +0000
|
|
Tim Waugh |
a8485b |
@@ -507,10 +507,114 @@
|
|
Tim Waugh |
f416c4 |
if (match_words)
|
|
Tim Waugh |
f416c4 |
while (start >= 0)
|
|
Tim Waugh |
f416c4 |
{
|
|
Tim Waugh |
f416c4 |
- if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
|
|
Tim Waugh |
f416c4 |
- && (len == end - beg - 1
|
|
Tim Waugh |
f416c4 |
- || !WCHAR ((unsigned char) beg[start + len])))
|
|
Tim Waugh |
f416c4 |
- goto success_in_beg_and_end;
|
|
Tim Waugh |
f416c4 |
+ int lword_match = 0;
|
|
Tim Waugh |
f416c4 |
+ if (start == 0)
|
|
Tim Waugh |
f416c4 |
+ lword_match = 1;
|
|
Tim Waugh |
f416c4 |
+ else
|
|
Tim Waugh |
f416c4 |
+ {
|
|
Tim Waugh |
f416c4 |
+ assert (start > 0);
|
|
Tim Waugh |
f416c4 |
+#ifdef MBS_SUPPORT
|
|
Tim Waugh |
f416c4 |
+ if (mb_cur_max > 1)
|
|
Tim Waugh |
f416c4 |
+ {
|
|
Tim Waugh |
f416c4 |
+ const char *s;
|
|
Tim Waugh |
a8485b |
+ size_t mr;
|
|
Tim Waugh |
f416c4 |
+ wchar_t pwc;
|
|
Tim Waugh |
f416c4 |
+
|
|
Tim Waugh |
a8485b |
+ /* Locate the start of the multibyte character
|
|
Tim Waugh |
a8485b |
+ before the match position (== beg + start). */
|
|
Tim Waugh |
f416c4 |
+ if (using_utf8)
|
|
Tim Waugh |
f416c4 |
+ {
|
|
Tim Waugh |
a8485b |
+ /* UTF-8 is a special case: scan backwards
|
|
Tim Waugh |
a8485b |
+ until we find a 7-bit character or a
|
|
Tim Waugh |
a8485b |
+ lead byte. */
|
|
Tim Waugh |
f416c4 |
+ s = beg + start - 1;
|
|
Tim Waugh |
f416c4 |
+ while (s > buf
|
|
Tim Waugh |
f416c4 |
+ && (unsigned char) *s >= 0x80
|
|
Tim Waugh |
f416c4 |
+ && (unsigned char) *s <= 0xbf)
|
|
Tim Waugh |
f416c4 |
+ --s;
|
|
Tim Waugh |
f416c4 |
+ }
|
|
Tim Waugh |
f416c4 |
+ else
|
|
Tim Waugh |
a8485b |
+ {
|
|
Tim Waugh |
a8485b |
+ /* Scan forwards to find the start of the
|
|
Tim Waugh |
a8485b |
+ last complete character before the
|
|
Tim Waugh |
a8485b |
+ match position. */
|
|
Tim Waugh |
a8485b |
+ size_t bytes_left = start - 1;
|
|
Tim Waugh |
a8485b |
+ s = beg;
|
|
Tim Waugh |
a8485b |
+ while (bytes_left > 0)
|
|
Tim Waugh |
a8485b |
+ {
|
|
Tim Waugh |
a8485b |
+ mr = mbrlen (s, bytes_left, &mbs);
|
|
Tim Waugh |
a8485b |
+ if (mr == (size_t) -1 || mr == 0)
|
|
Tim Waugh |
a8485b |
+ {
|
|
Tim Waugh |
a8485b |
+ memset (&mbs, '\0', sizeof (mbs));
|
|
Tim Waugh |
a8485b |
+ s++;
|
|
Tim Waugh |
a8485b |
+ bytes_left--;
|
|
Tim Waugh |
a8485b |
+ continue;
|
|
Tim Waugh |
a8485b |
+ }
|
|
Tim Waugh |
a8485b |
+ if (mr == (size_t) -2)
|
|
Tim Waugh |
a8485b |
+ {
|
|
Tim Waugh |
a8485b |
+ memset (&mbs, '\0', sizeof (mbs));
|
|
Tim Waugh |
a8485b |
+ break;
|
|
Tim Waugh |
a8485b |
+ }
|
|
Tim Waugh |
a8485b |
+ s += mr;
|
|
Tim Waugh |
a8485b |
+ bytes_left -= mr;
|
|
Tim Waugh |
a8485b |
+ }
|
|
Tim Waugh |
a8485b |
+ }
|
|
Tim Waugh |
a8485b |
+ mr = mbrtowc (&pwc, s, beg + start - s, &mbs);
|
|
Tim Waugh |
a8485b |
+ if (mr == (size_t) -2 || mr == (size_t) -1 ||
|
|
Tim Waugh |
a8485b |
+ mr == 0)
|
|
Tim Waugh |
f416c4 |
+ {
|
|
Tim Waugh |
f416c4 |
+ memset (&mbs, '\0', sizeof (mbstate_t));
|
|
Tim Waugh |
f416c4 |
+ lword_match = 1;
|
|
Tim Waugh |
f416c4 |
+ }
|
|
Tim Waugh |
f416c4 |
+ else if (!(iswalnum (pwc) || pwc == L'_')
|
|
Tim Waugh |
a8485b |
+ && mr == beg + start - s)
|
|
Tim Waugh |
f416c4 |
+ lword_match = 1;
|
|
Tim Waugh |
f416c4 |
+ }
|
|
Tim Waugh |
f416c4 |
+ else
|
|
Tim Waugh |
f416c4 |
+#endif /* MBS_SUPPORT */
|
|
Tim Waugh |
f416c4 |
+ if (!WCHAR ((unsigned char) beg[start - 1]))
|
|
Tim Waugh |
f416c4 |
+ lword_match = 1;
|
|
Tim Waugh |
f416c4 |
+ }
|
|
Tim Waugh |
f416c4 |
+
|
|
Tim Waugh |
f416c4 |
+ if (lword_match)
|
|
Tim Waugh |
f416c4 |
+ {
|
|
Tim Waugh |
f416c4 |
+ int rword_match = 0;
|
|
Tim Waugh |
f416c4 |
+ if (start + len == end - beg - 1)
|
|
Tim Waugh |
f416c4 |
+ rword_match = 1;
|
|
Tim Waugh |
f416c4 |
+ else
|
|
Tim Waugh |
f416c4 |
+ {
|
|
Tim Waugh |
f416c4 |
+#ifdef MBS_SUPPORT
|
|
Tim Waugh |
f416c4 |
+ if (mb_cur_max > 1)
|
|
Tim Waugh |
f416c4 |
+ {
|
|
Tim Waugh |
f416c4 |
+ wchar_t nwc;
|
|
Tim Waugh |
f416c4 |
+ int mr;
|
|
Tim Waugh |
f416c4 |
+
|
|
Tim Waugh |
f416c4 |
+ mr = mbtowc (&nwc, beg + start + len,
|
|
Tim Waugh |
f416c4 |
+ end - beg - start - len - 1);
|
|
Tim Waugh |
f416c4 |
+ if (mr <= 0)
|
|
Tim Waugh |
f416c4 |
+ {
|
|
Tim Waugh |
f416c4 |
+ memset (&mbs, '\0', sizeof (mbstate_t));
|
|
Tim Waugh |
f416c4 |
+ rword_match = 1;
|
|
Tim Waugh |
f416c4 |
+ }
|
|
Tim Waugh |
f416c4 |
+ else if (!iswalnum (nwc) && nwc != L'_')
|
|
Tim Waugh |
f416c4 |
+ rword_match = 1;
|
|
Tim Waugh |
f416c4 |
+ }
|
|
Tim Waugh |
f416c4 |
+ else
|
|
Tim Waugh |
f416c4 |
+#endif /* MBS_SUPPORT */
|
|
Tim Waugh |
f416c4 |
+ if (!WCHAR ((unsigned char) beg[start + len]))
|
|
Tim Waugh |
f416c4 |
+ rword_match = 1;
|
|
Tim Waugh |
f416c4 |
+ }
|
|
Tim Waugh |
f416c4 |
+
|
|
Tim Waugh |
f416c4 |
+ if (rword_match)
|
|
Tim Waugh |
f416c4 |
+ {
|
|
Tim Waugh |
f416c4 |
+ if (!exact)
|
|
Tim Waugh |
f416c4 |
+ /* Returns the whole line. */
|
|
Tim Waugh |
f416c4 |
+ goto success_in_beg_and_end;
|
|
Tim Waugh |
f416c4 |
+ else
|
|
Tim Waugh |
f416c4 |
+ /* Returns just this word match. */
|
|
Tim Waugh |
f416c4 |
+ goto success_in_start_and_len;
|
|
Tim Waugh |
f416c4 |
+ }
|
|
Tim Waugh |
f416c4 |
+ }
|
|
Tim Waugh |
f416c4 |
if (len > 0)
|
|
Tim Waugh |
f416c4 |
{
|
|
Tim Waugh |
f416c4 |
/* Try a shorter length anchored at the same place. */
|