3ef2ca
To: vim_dev@googlegroups.com
3ef2ca
Subject: Patch 7.4.292
3ef2ca
Fcc: outbox
3ef2ca
From: Bram Moolenaar <Bram@moolenaar.net>
3ef2ca
Mime-Version: 1.0
3ef2ca
Content-Type: text/plain; charset=UTF-8
3ef2ca
Content-Transfer-Encoding: 8bit
3ef2ca
------------
3ef2ca
3ef2ca
Patch 7.4.292
3ef2ca
Problem:    Searching for "a" does not match accented "a" with new regexp
3ef2ca
	    engine, does match with old engine. (David Bürgin)
3ef2ca
	    "ca" does not match "ca" with accented "a" with either engine.
3ef2ca
Solution:   Change the old engine, check for following composing character
3ef2ca
	    also for single-byte patterns.
3ef2ca
Files:	    src/regexp.c, src/testdir/test95.in, src/testdir/test95.ok
3ef2ca
3ef2ca
3ef2ca
*** ../vim-7.4.291/src/regexp.c	2014-05-13 16:46:25.693696760 +0200
3ef2ca
--- src/regexp.c	2014-05-13 17:45:50.977727970 +0200
3ef2ca
***************
3ef2ca
*** 4692,4722 ****
3ef2ca
  		    /* match empty string always works; happens when "~" is
3ef2ca
  		     * empty. */
3ef2ca
  		}
3ef2ca
! 		else if (opnd[1] == NUL
3ef2ca
  #ifdef FEAT_MBYTE
3ef2ca
  			    && !(enc_utf8 && ireg_ic)
3ef2ca
  #endif
3ef2ca
  			)
3ef2ca
! 		    ++reginput;		/* matched a single char */
3ef2ca
! 		else
3ef2ca
! 		{
3ef2ca
! 		    len = (int)STRLEN(opnd);
3ef2ca
! 		    /* Need to match first byte again for multi-byte. */
3ef2ca
! 		    if (cstrncmp(opnd, reginput, &len) != 0)
3ef2ca
! 			status = RA_NOMATCH;
3ef2ca
  #ifdef FEAT_MBYTE
3ef2ca
  		    /* Check for following composing character. */
3ef2ca
! 		    else if (enc_utf8
3ef2ca
! 			       && UTF_COMPOSINGLIKE(reginput, reginput + len))
3ef2ca
  		    {
3ef2ca
  			/* raaron: This code makes a composing character get
3ef2ca
  			 * ignored, which is the correct behavior (sometimes)
3ef2ca
  			 * for voweled Hebrew texts. */
3ef2ca
! 			if (!ireg_icombine)
3ef2ca
! 			    status = RA_NOMATCH;
3ef2ca
  		    }
3ef2ca
  #endif
3ef2ca
! 		    else
3ef2ca
  			reginput += len;
3ef2ca
  		}
3ef2ca
  	    }
3ef2ca
--- 4692,4728 ----
3ef2ca
  		    /* match empty string always works; happens when "~" is
3ef2ca
  		     * empty. */
3ef2ca
  		}
3ef2ca
! 		else
3ef2ca
! 		{
3ef2ca
! 		    if (opnd[1] == NUL
3ef2ca
  #ifdef FEAT_MBYTE
3ef2ca
  			    && !(enc_utf8 && ireg_ic)
3ef2ca
  #endif
3ef2ca
  			)
3ef2ca
! 		    {
3ef2ca
! 			len = 1;	/* matched a single byte above */
3ef2ca
! 		    }
3ef2ca
! 		    else
3ef2ca
! 		    {
3ef2ca
! 			/* Need to match first byte again for multi-byte. */
3ef2ca
! 			len = (int)STRLEN(opnd);
3ef2ca
! 			if (cstrncmp(opnd, reginput, &len) != 0)
3ef2ca
! 			    status = RA_NOMATCH;
3ef2ca
! 		    }
3ef2ca
  #ifdef FEAT_MBYTE
3ef2ca
  		    /* Check for following composing character. */
3ef2ca
! 		    if (status != RA_NOMATCH
3ef2ca
! 			    && enc_utf8
3ef2ca
! 			    && UTF_COMPOSINGLIKE(reginput, reginput + len)
3ef2ca
! 			    && !ireg_icombine)
3ef2ca
  		    {
3ef2ca
  			/* raaron: This code makes a composing character get
3ef2ca
  			 * ignored, which is the correct behavior (sometimes)
3ef2ca
  			 * for voweled Hebrew texts. */
3ef2ca
! 			status = RA_NOMATCH;
3ef2ca
  		    }
3ef2ca
  #endif
3ef2ca
! 		    if (status != RA_NOMATCH)
3ef2ca
  			reginput += len;
3ef2ca
  		}
3ef2ca
  	    }
3ef2ca
*** ../vim-7.4.291/src/testdir/test95.in	2013-07-21 16:53:52.000000000 +0200
3ef2ca
--- src/testdir/test95.in	2014-05-13 17:49:00.201729626 +0200
3ef2ca
***************
3ef2ca
*** 50,55 ****
3ef2ca
--- 50,57 ----
3ef2ca
  :call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
3ef2ca
  :call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
3ef2ca
  :call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
3ef2ca
+ :call add(tl, [2, "a", "ca\u0300t"])
3ef2ca
+ :call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
3ef2ca
  
3ef2ca
  
3ef2ca
  :"""" Test \Z
3ef2ca
*** ../vim-7.4.291/src/testdir/test95.ok	2013-07-21 17:01:22.000000000 +0200
3ef2ca
--- src/testdir/test95.ok	2014-05-13 17:49:46.709730033 +0200
3ef2ca
***************
3ef2ca
*** 67,72 ****
3ef2ca
--- 67,78 ----
3ef2ca
  OK 0 - .ֹֻ
3ef2ca
  OK 1 - .ֹֻ
3ef2ca
  OK 2 - .ֹֻ
3ef2ca
+ OK 0 - a
3ef2ca
+ OK 1 - a
3ef2ca
+ OK 2 - a
3ef2ca
+ OK 0 - à
3ef2ca
+ OK 1 - à
3ef2ca
+ OK 2 - à
3ef2ca
  OK 0 - ú\Z
3ef2ca
  OK 1 - ú\Z
3ef2ca
  OK 2 - ú\Z
3ef2ca
*** ../vim-7.4.291/src/version.c	2014-05-13 16:46:25.693696760 +0200
3ef2ca
--- src/version.c	2014-05-13 18:00:22.149735596 +0200
3ef2ca
***************
3ef2ca
*** 736,737 ****
3ef2ca
--- 736,739 ----
3ef2ca
  {   /* Add new patch number below this line */
3ef2ca
+ /**/
3ef2ca
+     292,
3ef2ca
  /**/
3ef2ca
3ef2ca
-- 
3ef2ca
hundred-and-one symptoms of being an internet addict:
3ef2ca
154. You fondle your mouse.
3ef2ca
3ef2ca
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
3ef2ca
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
3ef2ca
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
3ef2ca
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///