Karsten Hopp 1e6789
To: vim_dev@googlegroups.com
Karsten Hopp 1e6789
Subject: Patch 7.3.1021
Karsten Hopp 1e6789
Fcc: outbox
Karsten Hopp 1e6789
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp 1e6789
Mime-Version: 1.0
Karsten Hopp 1e6789
Content-Type: text/plain; charset=UTF-8
Karsten Hopp 1e6789
Content-Transfer-Encoding: 8bit
Karsten Hopp 1e6789
------------
Karsten Hopp 1e6789
Karsten Hopp 1e6789
Patch 7.3.1021
Karsten Hopp 1e6789
Problem:    New regexp engine does not ignore order of composing chars.
Karsten Hopp 1e6789
Solution:   Ignore composing chars order.
Karsten Hopp 1e6789
Files:	    src/regexp_nfa.c, src/testdir/test95.in, src/testdir/test95.ok
Karsten Hopp 1e6789
Karsten Hopp 1e6789
Karsten Hopp 1e6789
*** ../vim-7.3.1020/src/regexp_nfa.c	2013-05-25 22:04:19.000000000 +0200
Karsten Hopp 1e6789
--- src/regexp_nfa.c	2013-05-26 14:27:11.000000000 +0200
Karsten Hopp 1e6789
***************
Karsten Hopp 1e6789
*** 3275,3282 ****
Karsten Hopp 1e6789
  		int	    len = 0;
Karsten Hopp 1e6789
  		nfa_state_T *end;
Karsten Hopp 1e6789
  		nfa_state_T *sta;
Karsten Hopp 1e6789
  
Karsten Hopp 1e6789
- 		result = OK;
Karsten Hopp 1e6789
  		sta = t->state->out;
Karsten Hopp 1e6789
  		len = 0;
Karsten Hopp 1e6789
  		if (utf_iscomposing(sta->c))
Karsten Hopp 1e6789
--- 3275,3284 ----
Karsten Hopp 1e6789
  		int	    len = 0;
Karsten Hopp 1e6789
  		nfa_state_T *end;
Karsten Hopp 1e6789
  		nfa_state_T *sta;
Karsten Hopp 1e6789
+ 		int	    cchars[MAX_MCO];
Karsten Hopp 1e6789
+ 		int	    ccount = 0;
Karsten Hopp 1e6789
+ 		int	    j;
Karsten Hopp 1e6789
  
Karsten Hopp 1e6789
  		sta = t->state->out;
Karsten Hopp 1e6789
  		len = 0;
Karsten Hopp 1e6789
  		if (utf_iscomposing(sta->c))
Karsten Hopp 1e6789
***************
Karsten Hopp 1e6789
*** 3293,3316 ****
Karsten Hopp 1e6789
  		    /* TODO: How about negated? */
Karsten Hopp 1e6789
  		    if (len == 0 && sta->c != c)
Karsten Hopp 1e6789
  			result = FAIL;
Karsten Hopp 1e6789
! 		    len = n;
Karsten Hopp 1e6789
  		    while (sta->c != NFA_END_COMPOSING)
Karsten Hopp 1e6789
  			sta = sta->out;
Karsten Hopp 1e6789
  		}
Karsten Hopp 1e6789
! 		else
Karsten Hopp 1e6789
! 		    while (sta->c != NFA_END_COMPOSING && len < n)
Karsten Hopp 1e6789
  		    {
Karsten Hopp 1e6789
- 			if (len > 0)
Karsten Hopp 1e6789
- 			    mc = mb_ptr2char(reginput + len);
Karsten Hopp 1e6789
- 			if (mc != sta->c)
Karsten Hopp 1e6789
- 			    break;
Karsten Hopp 1e6789
  			len += mb_char2len(mc);
Karsten Hopp 1e6789
  			sta = sta->out;
Karsten Hopp 1e6789
  		    }
Karsten Hopp 1e6789
  
Karsten Hopp 1e6789
! 		/* if input char length doesn't match regexp char length */
Karsten Hopp 1e6789
! 		if (len < n || sta->c != NFA_END_COMPOSING)
Karsten Hopp 1e6789
  		    result = FAIL;
Karsten Hopp 1e6789
  		end = t->state->out1;	    /* NFA_END_COMPOSING */
Karsten Hopp 1e6789
  		ADD_POS_NEG_STATE(end);
Karsten Hopp 1e6789
  		break;
Karsten Hopp 1e6789
--- 3295,3346 ----
Karsten Hopp 1e6789
  		    /* TODO: How about negated? */
Karsten Hopp 1e6789
  		    if (len == 0 && sta->c != c)
Karsten Hopp 1e6789
  			result = FAIL;
Karsten Hopp 1e6789
! 		    else
Karsten Hopp 1e6789
! 			result = OK;
Karsten Hopp 1e6789
  		    while (sta->c != NFA_END_COMPOSING)
Karsten Hopp 1e6789
  			sta = sta->out;
Karsten Hopp 1e6789
  		}
Karsten Hopp 1e6789
! 
Karsten Hopp 1e6789
! 		/* Check base character matches first, unless ignored. */
Karsten Hopp 1e6789
! 		else if (len > 0 || mc == sta->c)
Karsten Hopp 1e6789
! 		{
Karsten Hopp 1e6789
! 		    if (len == 0)
Karsten Hopp 1e6789
  		    {
Karsten Hopp 1e6789
  			len += mb_char2len(mc);
Karsten Hopp 1e6789
  			sta = sta->out;
Karsten Hopp 1e6789
  		    }
Karsten Hopp 1e6789
  
Karsten Hopp 1e6789
! 		    /* We don't care about the order of composing characters.
Karsten Hopp 1e6789
! 		     * Get them into cchars[] first. */
Karsten Hopp 1e6789
! 		    while (len < n)
Karsten Hopp 1e6789
! 		    {
Karsten Hopp 1e6789
! 			mc = mb_ptr2char(reginput + len);
Karsten Hopp 1e6789
! 			cchars[ccount++] = mc;
Karsten Hopp 1e6789
! 			len += mb_char2len(mc);
Karsten Hopp 1e6789
! 			if (ccount == MAX_MCO)
Karsten Hopp 1e6789
! 			    break;
Karsten Hopp 1e6789
! 		    }
Karsten Hopp 1e6789
! 
Karsten Hopp 1e6789
! 		    /* Check that each composing char in the pattern matches a
Karsten Hopp 1e6789
! 		     * composing char in the text.  We do not check if all
Karsten Hopp 1e6789
! 		     * composing chars are matched. */
Karsten Hopp 1e6789
! 		    result = OK;
Karsten Hopp 1e6789
! 		    while (sta->c != NFA_END_COMPOSING)
Karsten Hopp 1e6789
! 		    {
Karsten Hopp 1e6789
! 			for (j = 0; j < ccount; ++j)
Karsten Hopp 1e6789
! 			    if (cchars[j] == sta->c)
Karsten Hopp 1e6789
! 				break;
Karsten Hopp 1e6789
! 			if (j == ccount)
Karsten Hopp 1e6789
! 			{
Karsten Hopp 1e6789
! 			    result = FAIL;
Karsten Hopp 1e6789
! 			    break;
Karsten Hopp 1e6789
! 			}
Karsten Hopp 1e6789
! 			sta = sta->out;
Karsten Hopp 1e6789
! 		    }
Karsten Hopp 1e6789
! 		}
Karsten Hopp 1e6789
! 		else
Karsten Hopp 1e6789
  		    result = FAIL;
Karsten Hopp 1e6789
+ 
Karsten Hopp 1e6789
  		end = t->state->out1;	    /* NFA_END_COMPOSING */
Karsten Hopp 1e6789
  		ADD_POS_NEG_STATE(end);
Karsten Hopp 1e6789
  		break;
Karsten Hopp 1e6789
*** ../vim-7.3.1020/src/testdir/test95.in	2013-05-25 23:15:21.000000000 +0200
Karsten Hopp 1e6789
--- src/testdir/test95.in	2013-05-26 14:12:13.000000000 +0200
Karsten Hopp 1e6789
***************
Karsten Hopp 1e6789
*** 9,14 ****
Karsten Hopp 1e6789
--- 9,15 ----
Karsten Hopp 1e6789
  :so mbyte.vim
Karsten Hopp 1e6789
  :set nocp encoding=utf-8 viminfo+=nviminfo nomore
Karsten Hopp 1e6789
  :" tl is a List of Lists with:
Karsten Hopp 1e6789
+ :"    2: test auto/old/new  0: test auto/old  1: test auto/new
Karsten Hopp 1e6789
  :"    regexp pattern
Karsten Hopp 1e6789
  :"    text to test the pattern on
Karsten Hopp 1e6789
  :"    expected match (optional)
Karsten Hopp 1e6789
***************
Karsten Hopp 1e6789
*** 40,49 ****
Karsten Hopp 1e6789
  :call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
Karsten Hopp 1e6789
  :call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
Karsten Hopp 1e6789
  :call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
Karsten Hopp 1e6789
! :"call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb "])
Karsten Hopp 1e6789
! :"call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb "])
Karsten Hopp 1e6789
  :call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
Karsten Hopp 1e6789
  :call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
Karsten Hopp 1e6789
  
Karsten Hopp 1e6789
  
Karsten Hopp 1e6789
  :"""" Test \Z
Karsten Hopp 1e6789
--- 41,54 ----
Karsten Hopp 1e6789
  :call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
Karsten Hopp 1e6789
  :call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
Karsten Hopp 1e6789
  :call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
Karsten Hopp 1e6789
! :call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
Karsten Hopp 1e6789
! :call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
Karsten Hopp 1e6789
  :call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
Karsten Hopp 1e6789
  :call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
Karsten Hopp 1e6789
+ :call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
Karsten Hopp 1e6789
+ :call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
Karsten Hopp 1e6789
+ :call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
Karsten Hopp 1e6789
+ :call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
Karsten Hopp 1e6789
  
Karsten Hopp 1e6789
  
Karsten Hopp 1e6789
  :"""" Test \Z
Karsten Hopp 1e6789
***************
Karsten Hopp 1e6789
*** 74,80 ****
Karsten Hopp 1e6789
  :  let text = t[2]
Karsten Hopp 1e6789
  :  let matchidx = 3
Karsten Hopp 1e6789
  :  for engine in [0, 1, 2]
Karsten Hopp 1e6789
! :    if engine == 2 && !re
Karsten Hopp 1e6789
  :      continue
Karsten Hopp 1e6789
  :    endif
Karsten Hopp 1e6789
  :    let &regexpengine = engine
Karsten Hopp 1e6789
--- 79,85 ----
Karsten Hopp 1e6789
  :  let text = t[2]
Karsten Hopp 1e6789
  :  let matchidx = 3
Karsten Hopp 1e6789
  :  for engine in [0, 1, 2]
Karsten Hopp 1e6789
! :    if engine == 2 && re == 0 || engine == 1 && re == 1
Karsten Hopp 1e6789
  :      continue
Karsten Hopp 1e6789
  :    endif
Karsten Hopp 1e6789
  :    let &regexpengine = engine
Karsten Hopp 1e6789
*** ../vim-7.3.1020/src/testdir/test95.ok	2013-05-25 23:15:21.000000000 +0200
Karsten Hopp 1e6789
--- src/testdir/test95.ok	2013-05-26 14:12:36.000000000 +0200
Karsten Hopp 1e6789
***************
Karsten Hopp 1e6789
*** 41,52 ****
Karsten Hopp 1e6789
--- 41,69 ----
Karsten Hopp 1e6789
  OK 0 - .ֹֻ
Karsten Hopp 1e6789
  OK 1 - .ֹֻ
Karsten Hopp 1e6789
  OK 2 - .ֹֻ
Karsten Hopp 1e6789
+ OK 0 - ֹֻ
Karsten Hopp 1e6789
+ OK 1 - ֹֻ
Karsten Hopp 1e6789
+ OK 2 - ֹֻ
Karsten Hopp 1e6789
+ OK 0 - .ֹֻ
Karsten Hopp 1e6789
+ OK 1 - .ֹֻ
Karsten Hopp 1e6789
+ OK 2 - .ֹֻ
Karsten Hopp 1e6789
  OK 0 - ֹ
Karsten Hopp 1e6789
  OK 1 - ֹ
Karsten Hopp 1e6789
  OK 2 - ֹ
Karsten Hopp 1e6789
  OK 0 - .ֹ
Karsten Hopp 1e6789
  OK 1 - .ֹ
Karsten Hopp 1e6789
  OK 2 - .ֹ
Karsten Hopp 1e6789
+ OK 0 - ֹ
Karsten Hopp 1e6789
+ OK 1 - ֹ
Karsten Hopp 1e6789
+ OK 2 - ֹ
Karsten Hopp 1e6789
+ OK 0 - .ֹ
Karsten Hopp 1e6789
+ OK 1 - .ֹ
Karsten Hopp 1e6789
+ OK 2 - .ֹ
Karsten Hopp 1e6789
+ OK 0 - ֹֻ
Karsten Hopp 1e6789
+ OK 2 - ֹֻ
Karsten Hopp 1e6789
+ OK 0 - .ֹֻ
Karsten Hopp 1e6789
+ OK 1 - .ֹֻ
Karsten Hopp 1e6789
+ OK 2 - .ֹֻ
Karsten Hopp 1e6789
  OK 0 - ú\Z
Karsten Hopp 1e6789
  OK 1 - ú\Z
Karsten Hopp 1e6789
  OK 2 - ú\Z
Karsten Hopp 1e6789
*** ../vim-7.3.1020/src/version.c	2013-05-25 23:15:21.000000000 +0200
Karsten Hopp 1e6789
--- src/version.c	2013-05-26 13:54:16.000000000 +0200
Karsten Hopp 1e6789
***************
Karsten Hopp 1e6789
*** 730,731 ****
Karsten Hopp 1e6789
--- 730,733 ----
Karsten Hopp 1e6789
  {   /* Add new patch number below this line */
Karsten Hopp 1e6789
+ /**/
Karsten Hopp 1e6789
+     1021,
Karsten Hopp 1e6789
  /**/
Karsten Hopp 1e6789
Karsten Hopp 1e6789
-- 
Karsten Hopp 1e6789
Engineers are always delighted to share wisdom, even in areas in which they
Karsten Hopp 1e6789
have no experience whatsoever.  Their logic provides them with inherent
Karsten Hopp 1e6789
insight into any field of expertise.  This can be a problem when dealing with
Karsten Hopp 1e6789
the illogical people who believe that knowledge can only be derived through
Karsten Hopp 1e6789
experience.
Karsten Hopp 1e6789
				(Scott Adams - The Dilbert principle)
Karsten Hopp 1e6789
Karsten Hopp 1e6789
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp 1e6789
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp 1e6789
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
Karsten Hopp 1e6789
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///