Karsten Hopp 31a074
To: vim_dev@googlegroups.com
Karsten Hopp 31a074
Subject: Patch 7.3.1015
Karsten Hopp 31a074
Fcc: outbox
Karsten Hopp 31a074
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp 31a074
Mime-Version: 1.0
Karsten Hopp 31a074
Content-Type: text/plain; charset=UTF-8
Karsten Hopp 31a074
Content-Transfer-Encoding: 8bit
Karsten Hopp 31a074
------------
Karsten Hopp 31a074
Karsten Hopp 31a074
Patch 7.3.1015
Karsten Hopp 31a074
Problem:    New regexp engine: Matching composing characters is wrong.
Karsten Hopp 31a074
Solution:   Fix matching composing characters.
Karsten Hopp 31a074
Files:	    src/regexp_nfa.c, src/testdir/test95.in, src/testdir/test95.ok
Karsten Hopp 31a074
Karsten Hopp 31a074
Karsten Hopp 31a074
*** ../vim-7.3.1014/src/regexp_nfa.c	2013-05-25 12:28:08.000000000 +0200
Karsten Hopp 31a074
--- src/regexp_nfa.c	2013-05-25 14:25:42.000000000 +0200
Karsten Hopp 31a074
***************
Karsten Hopp 31a074
*** 716,721 ****
Karsten Hopp 31a074
--- 716,722 ----
Karsten Hopp 31a074
  	     * the composing char is matched here. */
Karsten Hopp 31a074
  	    if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
Karsten Hopp 31a074
  	    {
Karsten Hopp 31a074
+ 		old_regparse = regparse;
Karsten Hopp 31a074
  		c = getchr();
Karsten Hopp 31a074
  		goto nfa_do_multibyte;
Karsten Hopp 31a074
  	    }
Karsten Hopp 31a074
***************
Karsten Hopp 31a074
*** 1217,1225 ****
Karsten Hopp 31a074
  
Karsten Hopp 31a074
  nfa_do_multibyte:
Karsten Hopp 31a074
  		/* Length of current char with composing chars. */
Karsten Hopp 31a074
! 		if (enc_utf8 && clen != (plen = (*mb_ptr2len)(old_regparse)))
Karsten Hopp 31a074
  		{
Karsten Hopp 31a074
! 		    /* A base character plus composing characters.
Karsten Hopp 31a074
  		     * This requires creating a separate atom as if enclosing
Karsten Hopp 31a074
  		     * the characters in (), where NFA_COMPOSING is the ( and
Karsten Hopp 31a074
  		     * NFA_END_COMPOSING is the ). Note that right now we are
Karsten Hopp 31a074
--- 1218,1228 ----
Karsten Hopp 31a074
  
Karsten Hopp 31a074
  nfa_do_multibyte:
Karsten Hopp 31a074
  		/* Length of current char with composing chars. */
Karsten Hopp 31a074
! 		if (enc_utf8 && (clen != (plen = (*mb_ptr2len)(old_regparse))
Karsten Hopp 31a074
! 			    || utf_iscomposing(c)))
Karsten Hopp 31a074
  		{
Karsten Hopp 31a074
! 		    /* A base character plus composing characters, or just one
Karsten Hopp 31a074
! 		     * or more composing characters.
Karsten Hopp 31a074
  		     * This requires creating a separate atom as if enclosing
Karsten Hopp 31a074
  		     * the characters in (), where NFA_COMPOSING is the ( and
Karsten Hopp 31a074
  		     * NFA_END_COMPOSING is the ). Note that right now we are
Karsten Hopp 31a074
***************
Karsten Hopp 31a074
*** 1400,1406 ****
Karsten Hopp 31a074
  	    /* Save pos after the repeated atom and the \{} */
Karsten Hopp 31a074
  	    new_regparse = regparse;
Karsten Hopp 31a074
  
Karsten Hopp 31a074
- 	    new_regparse = regparse;
Karsten Hopp 31a074
  	    quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY);
Karsten Hopp 31a074
  	    for (i = 0; i < maxval; i++)
Karsten Hopp 31a074
  	    {
Karsten Hopp 31a074
--- 1403,1408 ----
Karsten Hopp 31a074
***************
Karsten Hopp 31a074
*** 3218,3228 ****
Karsten Hopp 31a074
  		result = OK;
Karsten Hopp 31a074
  		sta = t->state->out;
Karsten Hopp 31a074
  		len = 0;
Karsten Hopp 31a074
  		if (ireg_icombine)
Karsten Hopp 31a074
  		{
Karsten Hopp 31a074
! 		    /* If \Z was present, then ignore composing characters. */
Karsten Hopp 31a074
  		    /* TODO: How about negated? */
Karsten Hopp 31a074
! 		    if (sta->c != c)
Karsten Hopp 31a074
  			result = FAIL;
Karsten Hopp 31a074
  		    len = n;
Karsten Hopp 31a074
  		    while (sta->c != NFA_END_COMPOSING)
Karsten Hopp 31a074
--- 3220,3238 ----
Karsten Hopp 31a074
  		result = OK;
Karsten Hopp 31a074
  		sta = t->state->out;
Karsten Hopp 31a074
  		len = 0;
Karsten Hopp 31a074
+ 		if (utf_iscomposing(sta->c))
Karsten Hopp 31a074
+ 		{
Karsten Hopp 31a074
+ 		    /* Only match composing character(s), ignore base
Karsten Hopp 31a074
+ 		     * character.  Used for ".{composing}" and "{composing}"
Karsten Hopp 31a074
+ 		     * (no preceding character). */
Karsten Hopp 31a074
+ 		    len += mb_char2len(c);
Karsten Hopp 31a074
+ 		}
Karsten Hopp 31a074
  		if (ireg_icombine)
Karsten Hopp 31a074
  		{
Karsten Hopp 31a074
! 		    /* If \Z was present, then ignore composing characters.
Karsten Hopp 31a074
! 		     * When ignoring the base character this always matches. */
Karsten Hopp 31a074
  		    /* TODO: How about negated? */
Karsten Hopp 31a074
! 		    if (len == 0 && sta->c != c)
Karsten Hopp 31a074
  			result = FAIL;
Karsten Hopp 31a074
  		    len = n;
Karsten Hopp 31a074
  		    while (sta->c != NFA_END_COMPOSING)
Karsten Hopp 31a074
*** ../vim-7.3.1014/src/testdir/test95.in	2013-05-24 23:10:45.000000000 +0200
Karsten Hopp 31a074
--- src/testdir/test95.in	2013-05-25 14:36:50.000000000 +0200
Karsten Hopp 31a074
***************
Karsten Hopp 31a074
*** 38,43 ****
Karsten Hopp 31a074
--- 38,52 ----
Karsten Hopp 31a074
  :"""" Test composing character matching
Karsten Hopp 31a074
  :call add(tl, ['.ม', 'xม่x yมy', 'yม'])
Karsten Hopp 31a074
  :call add(tl, ['.ม่', 'xม่x yมy', 'xม่'])
Karsten Hopp 31a074
+ :call add(tl, ["\u05b9", " x\u05b9 ", "x\u05b9"])
Karsten Hopp 31a074
+ :call add(tl, [".\u05b9", " x\u05b9 ", "x\u05b9"])
Karsten Hopp 31a074
+ :call add(tl, ["\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
Karsten Hopp 31a074
+ :call add(tl, [".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
Karsten Hopp 31a074
+ :call add(tl, ["\u05bb\u05b9", " x\u05b9\u05bb "])
Karsten Hopp 31a074
+ :call add(tl, [".\u05bb\u05b9", " x\u05b9\u05bb "])
Karsten Hopp 31a074
+ :call add(tl, ["\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
Karsten Hopp 31a074
+ :call add(tl, [".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
Karsten Hopp 31a074
+ 
Karsten Hopp 31a074
  
Karsten Hopp 31a074
  :"""" Test \Z
Karsten Hopp 31a074
  :call add(tl, ['ú\Z', 'x'])
Karsten Hopp 31a074
***************
Karsten Hopp 31a074
*** 50,55 ****
Karsten Hopp 31a074
--- 59,66 ----
Karsten Hopp 31a074
  :call add(tl, ["ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"])
Karsten Hopp 31a074
  :call add(tl, ["ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
Karsten Hopp 31a074
  :call add(tl, ["ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"])
Karsten Hopp 31a074
+ :call add(tl, ["\u05b9\\+\\Z", "xyz", "xyz"])
Karsten Hopp 31a074
+ :call add(tl, ["\\Z\u05b9\\+", "xyz", "xyz"])
Karsten Hopp 31a074
  
Karsten Hopp 31a074
  :"""" Combining different tests and features
Karsten Hopp 31a074
  :call add(tl, ['[^[=a=]]\+', 'ddaãâbcd', 'dd'])
Karsten Hopp 31a074
*** ../vim-7.3.1014/src/testdir/test95.ok	2013-05-24 23:10:45.000000000 +0200
Karsten Hopp 31a074
--- src/testdir/test95.ok	2013-05-25 14:36:54.000000000 +0200
Karsten Hopp 31a074
***************
Karsten Hopp 31a074
*** 11,16 ****
Karsten Hopp 31a074
--- 11,24 ----
Karsten Hopp 31a074
  OK - \%#=1\f\+
Karsten Hopp 31a074
  OK - .ม
Karsten Hopp 31a074
  OK - .ม่
Karsten Hopp 31a074
+ OK - ֹ
Karsten Hopp 31a074
+ OK - .ֹ
Karsten Hopp 31a074
+ OK - ֹֻ
Karsten Hopp 31a074
+ OK - .ֹֻ
Karsten Hopp 31a074
+ OK - ֹֻ
Karsten Hopp 31a074
+ OK - .ֹֻ
Karsten Hopp 31a074
+ OK - ֹ
Karsten Hopp 31a074
+ OK - .ֹ
Karsten Hopp 31a074
  OK - ú\Z
Karsten Hopp 31a074
  OK - יהוה\Z
Karsten Hopp 31a074
  OK - יְהוָה\Z
Karsten Hopp 31a074
***************
Karsten Hopp 31a074
*** 21,24 ****
Karsten Hopp 31a074
--- 29,34 ----
Karsten Hopp 31a074
  OK - ק‍ֹx\Z
Karsten Hopp 31a074
  OK - ק‍x\Z
Karsten Hopp 31a074
  OK - ק‍x\Z
Karsten Hopp 31a074
+ OK - ֹ\+\Z
Karsten Hopp 31a074
+ OK - \Zֹ\+
Karsten Hopp 31a074
  OK - [^[=a=]]\+
Karsten Hopp 31a074
*** ../vim-7.3.1014/src/version.c	2013-05-25 12:28:08.000000000 +0200
Karsten Hopp 31a074
--- src/version.c	2013-05-25 14:41:05.000000000 +0200
Karsten Hopp 31a074
***************
Karsten Hopp 31a074
*** 730,731 ****
Karsten Hopp 31a074
--- 730,733 ----
Karsten Hopp 31a074
  {   /* Add new patch number below this line */
Karsten Hopp 31a074
+ /**/
Karsten Hopp 31a074
+     1015,
Karsten Hopp 31a074
  /**/
Karsten Hopp 31a074
Karsten Hopp 31a074
-- 
Karsten Hopp 31a074
If your company is not involved in something called "ISO 9000" you probably
Karsten Hopp 31a074
have no idea what it is.  If your company _is_ involved in ISO 9000 then you
Karsten Hopp 31a074
definitely have no idea what it is.
Karsten Hopp 31a074
				(Scott Adams - The Dilbert principle)
Karsten Hopp 31a074
Karsten Hopp 31a074
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp 31a074
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp 31a074
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
Karsten Hopp 31a074
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///