Tim Waugh f416c4
--- grep-2.5.1a/src/search.c.w	2005-01-07 15:04:18.766280754 +0000
Tim Waugh f416c4
+++ grep-2.5.1a/src/search.c	2005-01-07 16:59:19.287275172 +0000
Tim Waugh f416c4
@@ -330,6 +330,7 @@
Tim Waugh f416c4
   static int use_dfa;
Tim Waugh f416c4
   static int use_dfa_checked = 0;
Tim Waugh f416c4
 #ifdef MBS_SUPPORT
Tim Waugh f416c4
+  const char *last_char = NULL;
Tim Waugh f416c4
   int mb_cur_max = MB_CUR_MAX;
Tim Waugh f416c4
   mbstate_t mbs;
Tim Waugh f416c4
   memset (&mbs, '\0', sizeof (mbstate_t));
Tim Waugh f416c4
@@ -385,6 +386,8 @@
Tim Waugh f416c4
 		  while (bytes_left)
Tim Waugh f416c4
 		    {
Tim Waugh f416c4
 		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
Tim Waugh f416c4
+
Tim Waugh f416c4
+		      last_char = beg;
Tim Waugh f416c4
 		      if (mlen == (size_t) -1 || mlen == 0)
Tim Waugh f416c4
 			{
Tim Waugh f416c4
 			  /* Incomplete character: treat as single-byte. */
Tim Waugh f416c4
@@ -445,6 +448,8 @@
Tim Waugh f416c4
 		  while (bytes_left)
Tim Waugh f416c4
 		    {
Tim Waugh f416c4
 		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
Tim Waugh f416c4
+
Tim Waugh f416c4
+		      last_char = beg;
Tim Waugh f416c4
 		      if (mlen == (size_t) -1 || mlen == 0)
Tim Waugh f416c4
 			{
Tim Waugh f416c4
 			  /* Incomplete character: treat as single-byte. */
Tim Waugh f416c4
@@ -507,10 +512,84 @@
Tim Waugh f416c4
 	      if (match_words)
Tim Waugh f416c4
 		while (start >= 0)
Tim Waugh f416c4
 		  {
Tim Waugh f416c4
-		    if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
Tim Waugh f416c4
-			&& (len == end - beg - 1
Tim Waugh f416c4
-			    || !WCHAR ((unsigned char) beg[start + len])))
Tim Waugh f416c4
-		      goto success_in_beg_and_end;
Tim Waugh f416c4
+		    int lword_match = 0;
Tim Waugh f416c4
+		    if (start == 0)
Tim Waugh f416c4
+		      lword_match = 1;
Tim Waugh f416c4
+		    else
Tim Waugh f416c4
+		      {
Tim Waugh f416c4
+			assert (start > 0);
Tim Waugh f416c4
+#ifdef MBS_SUPPORT
Tim Waugh f416c4
+			if (mb_cur_max > 1)
Tim Waugh f416c4
+			  {
Tim Waugh f416c4
+			    const char *s;
Tim Waugh f416c4
+			    int mr;
Tim Waugh f416c4
+			    wchar_t pwc;
Tim Waugh f416c4
+
Tim Waugh f416c4
+			    if (using_utf8)
Tim Waugh f416c4
+			      {
Tim Waugh f416c4
+				s = beg + start - 1;
Tim Waugh f416c4
+				while (s > buf
Tim Waugh f416c4
+				       && (unsigned char) *s >= 0x80
Tim Waugh f416c4
+				       && (unsigned char) *s <= 0xbf)
Tim Waugh f416c4
+				  --s;
Tim Waugh f416c4
+			      }
Tim Waugh f416c4
+			    else
Tim Waugh f416c4
+			      s = last_char;
Tim Waugh f416c4
+			    mr = mbtowc (&pwc, s, beg + start - s);
Tim Waugh f416c4
+			    if (mr <= 0)
Tim Waugh f416c4
+			      {
Tim Waugh f416c4
+				memset (&mbs, '\0', sizeof (mbstate_t));
Tim Waugh f416c4
+				lword_match = 1;
Tim Waugh f416c4
+			      }
Tim Waugh f416c4
+			    else if (!(iswalnum (pwc) || pwc == L'_')
Tim Waugh f416c4
+				     && mr == (int) (beg + start - s))
Tim Waugh f416c4
+			      lword_match = 1;
Tim Waugh f416c4
+			  }
Tim Waugh f416c4
+			else
Tim Waugh f416c4
+#endif /* MBS_SUPPORT */
Tim Waugh f416c4
+			if (!WCHAR ((unsigned char) beg[start - 1]))
Tim Waugh f416c4
+			  lword_match = 1;
Tim Waugh f416c4
+		      }
Tim Waugh f416c4
+
Tim Waugh f416c4
+		    if (lword_match)
Tim Waugh f416c4
+		      {
Tim Waugh f416c4
+			int rword_match = 0;
Tim Waugh f416c4
+			if (start + len == end - beg - 1)
Tim Waugh f416c4
+			  rword_match = 1;
Tim Waugh f416c4
+			else
Tim Waugh f416c4
+			  {
Tim Waugh f416c4
+#ifdef MBS_SUPPORT
Tim Waugh f416c4
+			    if (mb_cur_max > 1)
Tim Waugh f416c4
+			      {
Tim Waugh f416c4
+				wchar_t nwc;
Tim Waugh f416c4
+				int mr;
Tim Waugh f416c4
+
Tim Waugh f416c4
+				mr = mbtowc (&nwc, beg + start + len,
Tim Waugh f416c4
+					     end - beg - start - len - 1);
Tim Waugh f416c4
+				if (mr <= 0)
Tim Waugh f416c4
+				  {
Tim Waugh f416c4
+				    memset (&mbs, '\0', sizeof (mbstate_t));
Tim Waugh f416c4
+				    rword_match = 1;
Tim Waugh f416c4
+				  }
Tim Waugh f416c4
+				else if (!iswalnum (nwc) && nwc != L'_')
Tim Waugh f416c4
+				  rword_match = 1;
Tim Waugh f416c4
+			      }
Tim Waugh f416c4
+			    else
Tim Waugh f416c4
+#endif /* MBS_SUPPORT */
Tim Waugh f416c4
+			    if (!WCHAR ((unsigned char) beg[start + len]))
Tim Waugh f416c4
+			      rword_match = 1;
Tim Waugh f416c4
+			  }
Tim Waugh f416c4
+
Tim Waugh f416c4
+			if (rword_match)
Tim Waugh f416c4
+			  {
Tim Waugh f416c4
+			    if (!exact)
Tim Waugh f416c4
+			      /* Returns the whole line. */
Tim Waugh f416c4
+			      goto success_in_beg_and_end;
Tim Waugh f416c4
+			    else
Tim Waugh f416c4
+			      /* Returns just this word match. */
Tim Waugh f416c4
+			      goto success_in_start_and_len;
Tim Waugh f416c4
+			  }
Tim Waugh f416c4
+		      }
Tim Waugh f416c4
 		    if (len > 0)
Tim Waugh f416c4
 		      {
Tim Waugh f416c4
 			/* Try a shorter length anchored at the same place. */