Karsten Hopp c6c14b
To: vim_dev@googlegroups.com
Karsten Hopp c6c14b
Subject: Patch 7.4.036
Karsten Hopp c6c14b
Fcc: outbox
Karsten Hopp c6c14b
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp c6c14b
Mime-Version: 1.0
Karsten Hopp c6c14b
Content-Type: text/plain; charset=UTF-8
Karsten Hopp c6c14b
Content-Transfer-Encoding: 8bit
Karsten Hopp c6c14b
------------
Karsten Hopp c6c14b
Karsten Hopp c6c14b
Patch 7.4.036
Karsten Hopp c6c14b
Problem:    NFA engine does not capture group correctly when using \@>. (ZyX)
Karsten Hopp c6c14b
Solution:   Copy submatches before doing the recursive match.
Karsten Hopp c6c14b
Files:	    src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok
Karsten Hopp c6c14b
Karsten Hopp c6c14b
Karsten Hopp c6c14b
*** ../vim-7.4.035/src/regexp_nfa.c	2013-09-22 13:57:19.000000000 +0200
Karsten Hopp c6c14b
--- src/regexp_nfa.c	2013-09-25 16:35:54.000000000 +0200
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 36,42 ****
Karsten Hopp c6c14b
  {
Karsten Hopp c6c14b
      NFA_SPLIT = -1024,
Karsten Hopp c6c14b
      NFA_MATCH,
Karsten Hopp c6c14b
!     NFA_SKIP_CHAR,		    /* matches a 0-length char */
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
      NFA_START_COLL,		    /* [abc] start */
Karsten Hopp c6c14b
      NFA_END_COLL,		    /* [abc] end */
Karsten Hopp c6c14b
--- 36,42 ----
Karsten Hopp c6c14b
  {
Karsten Hopp c6c14b
      NFA_SPLIT = -1024,
Karsten Hopp c6c14b
      NFA_MATCH,
Karsten Hopp c6c14b
!     NFA_EMPTY,			    /* matches 0-length */
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
      NFA_START_COLL,		    /* [abc] start */
Karsten Hopp c6c14b
      NFA_END_COLL,		    /* [abc] end */
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 2005,2012 ****
Karsten Hopp c6c14b
  	    {
Karsten Hopp c6c14b
  		/* Ignore result of previous call to nfa_regatom() */
Karsten Hopp c6c14b
  		post_ptr = post_start + my_post_start;
Karsten Hopp c6c14b
! 		/* NFA_SKIP_CHAR has 0-length and works everywhere */
Karsten Hopp c6c14b
! 		EMIT(NFA_SKIP_CHAR);
Karsten Hopp c6c14b
  		return OK;
Karsten Hopp c6c14b
  	    }
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
--- 2005,2012 ----
Karsten Hopp c6c14b
  	    {
Karsten Hopp c6c14b
  		/* Ignore result of previous call to nfa_regatom() */
Karsten Hopp c6c14b
  		post_ptr = post_start + my_post_start;
Karsten Hopp c6c14b
! 		/* NFA_EMPTY is 0-length and works everywhere */
Karsten Hopp c6c14b
! 		EMIT(NFA_EMPTY);
Karsten Hopp c6c14b
  		return OK;
Karsten Hopp c6c14b
  	    }
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 2170,2185 ****
Karsten Hopp c6c14b
  	old_post_pos = (int)(post_ptr - post_start);
Karsten Hopp c6c14b
  	if (nfa_regconcat() == FAIL)
Karsten Hopp c6c14b
  	    return FAIL;
Karsten Hopp c6c14b
! 	/* if concat is empty, skip a input char. But do emit a node */
Karsten Hopp c6c14b
  	if (old_post_pos == (int)(post_ptr - post_start))
Karsten Hopp c6c14b
! 	    EMIT(NFA_SKIP_CHAR);
Karsten Hopp c6c14b
  	EMIT(NFA_CONCAT);
Karsten Hopp c6c14b
  	ch = peekchr();
Karsten Hopp c6c14b
      }
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
!     /* Even if a branch is empty, emit one node for it */
Karsten Hopp c6c14b
      if (old_post_pos == (int)(post_ptr - post_start))
Karsten Hopp c6c14b
! 	EMIT(NFA_SKIP_CHAR);
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
      return OK;
Karsten Hopp c6c14b
  }
Karsten Hopp c6c14b
--- 2170,2185 ----
Karsten Hopp c6c14b
  	old_post_pos = (int)(post_ptr - post_start);
Karsten Hopp c6c14b
  	if (nfa_regconcat() == FAIL)
Karsten Hopp c6c14b
  	    return FAIL;
Karsten Hopp c6c14b
! 	/* if concat is empty do emit a node */
Karsten Hopp c6c14b
  	if (old_post_pos == (int)(post_ptr - post_start))
Karsten Hopp c6c14b
! 	    EMIT(NFA_EMPTY);
Karsten Hopp c6c14b
  	EMIT(NFA_CONCAT);
Karsten Hopp c6c14b
  	ch = peekchr();
Karsten Hopp c6c14b
      }
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
!     /* if a branch is empty, emit one node for it */
Karsten Hopp c6c14b
      if (old_post_pos == (int)(post_ptr - post_start))
Karsten Hopp c6c14b
! 	EMIT(NFA_EMPTY);
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
      return OK;
Karsten Hopp c6c14b
  }
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 2423,2429 ****
Karsten Hopp c6c14b
  	case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
Karsten Hopp c6c14b
  	case NFA_QUEST:		STRCPY(code, "NFA_QUEST"); break;
Karsten Hopp c6c14b
  	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
Karsten Hopp c6c14b
! 	case NFA_SKIP_CHAR:	STRCPY(code, "NFA_SKIP_CHAR"); break;
Karsten Hopp c6c14b
  	case NFA_OR:		STRCPY(code, "NFA_OR"); break;
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
  	case NFA_START_COLL:	STRCPY(code, "NFA_START_COLL"); break;
Karsten Hopp c6c14b
--- 2423,2429 ----
Karsten Hopp c6c14b
  	case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
Karsten Hopp c6c14b
  	case NFA_QUEST:		STRCPY(code, "NFA_QUEST"); break;
Karsten Hopp c6c14b
  	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
Karsten Hopp c6c14b
! 	case NFA_EMPTY:		STRCPY(code, "NFA_EMPTY"); break;
Karsten Hopp c6c14b
  	case NFA_OR:		STRCPY(code, "NFA_OR"); break;
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
  	case NFA_START_COLL:	STRCPY(code, "NFA_START_COLL"); break;
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 3067,3073 ****
Karsten Hopp c6c14b
  	    case NFA_ZSTART:
Karsten Hopp c6c14b
  	    case NFA_ZEND:
Karsten Hopp c6c14b
  	    case NFA_OPT_CHARS:
Karsten Hopp c6c14b
! 	    case NFA_SKIP_CHAR:
Karsten Hopp c6c14b
  	    case NFA_START_PATTERN:
Karsten Hopp c6c14b
  	    case NFA_END_PATTERN:
Karsten Hopp c6c14b
  	    case NFA_COMPOSING:
Karsten Hopp c6c14b
--- 3067,3073 ----
Karsten Hopp c6c14b
  	    case NFA_ZSTART:
Karsten Hopp c6c14b
  	    case NFA_ZEND:
Karsten Hopp c6c14b
  	    case NFA_OPT_CHARS:
Karsten Hopp c6c14b
! 	    case NFA_EMPTY:
Karsten Hopp c6c14b
  	    case NFA_START_PATTERN:
Karsten Hopp c6c14b
  	    case NFA_END_PATTERN:
Karsten Hopp c6c14b
  	    case NFA_COMPOSING:
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 3265,3279 ****
Karsten Hopp c6c14b
  	    PUSH(frag(e1.start, e2.out));
Karsten Hopp c6c14b
  	    break;
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
! 	case NFA_SKIP_CHAR:
Karsten Hopp c6c14b
! 	    /* Symbol of 0-length, Used in a repetition
Karsten Hopp c6c14b
! 	     * with max/min count of 0 */
Karsten Hopp c6c14b
  	    if (nfa_calc_size == TRUE)
Karsten Hopp c6c14b
  	    {
Karsten Hopp c6c14b
  		nstate++;
Karsten Hopp c6c14b
  		break;
Karsten Hopp c6c14b
  	    }
Karsten Hopp c6c14b
! 	    s = alloc_state(NFA_SKIP_CHAR, NULL, NULL);
Karsten Hopp c6c14b
  	    if (s == NULL)
Karsten Hopp c6c14b
  		goto theend;
Karsten Hopp c6c14b
  	    PUSH(frag(s, list1(&s->out)));
Karsten Hopp c6c14b
--- 3265,3278 ----
Karsten Hopp c6c14b
  	    PUSH(frag(e1.start, e2.out));
Karsten Hopp c6c14b
  	    break;
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
! 	case NFA_EMPTY:
Karsten Hopp c6c14b
! 	    /* 0-length, used in a repetition with max/min count of 0 */
Karsten Hopp c6c14b
  	    if (nfa_calc_size == TRUE)
Karsten Hopp c6c14b
  	    {
Karsten Hopp c6c14b
  		nstate++;
Karsten Hopp c6c14b
  		break;
Karsten Hopp c6c14b
  	    }
Karsten Hopp c6c14b
! 	    s = alloc_state(NFA_EMPTY, NULL, NULL);
Karsten Hopp c6c14b
  	    if (s == NULL)
Karsten Hopp c6c14b
  		goto theend;
Karsten Hopp c6c14b
  	    PUSH(frag(s, list1(&s->out)));
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 4209,4215 ****
Karsten Hopp c6c14b
  	case NFA_MOPEN:
Karsten Hopp c6c14b
  	case NFA_ZEND:
Karsten Hopp c6c14b
  	case NFA_SPLIT:
Karsten Hopp c6c14b
! 	case NFA_SKIP_CHAR:
Karsten Hopp c6c14b
  	    /* These nodes are not added themselves but their "out" and/or
Karsten Hopp c6c14b
  	     * "out1" may be added below.  */
Karsten Hopp c6c14b
  	    break;
Karsten Hopp c6c14b
--- 4208,4214 ----
Karsten Hopp c6c14b
  	case NFA_MOPEN:
Karsten Hopp c6c14b
  	case NFA_ZEND:
Karsten Hopp c6c14b
  	case NFA_SPLIT:
Karsten Hopp c6c14b
! 	case NFA_EMPTY:
Karsten Hopp c6c14b
  	    /* These nodes are not added themselves but their "out" and/or
Karsten Hopp c6c14b
  	     * "out1" may be added below.  */
Karsten Hopp c6c14b
  	    break;
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 4337,4343 ****
Karsten Hopp c6c14b
  	    subs = addstate(l, state->out1, subs, pim, off);
Karsten Hopp c6c14b
  	    break;
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
! 	case NFA_SKIP_CHAR:
Karsten Hopp c6c14b
  	case NFA_NOPEN:
Karsten Hopp c6c14b
  	case NFA_NCLOSE:
Karsten Hopp c6c14b
  	    subs = addstate(l, state->out, subs, pim, off);
Karsten Hopp c6c14b
--- 4336,4342 ----
Karsten Hopp c6c14b
  	    subs = addstate(l, state->out1, subs, pim, off);
Karsten Hopp c6c14b
  	    break;
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
! 	case NFA_EMPTY:
Karsten Hopp c6c14b
  	case NFA_NOPEN:
Karsten Hopp c6c14b
  	case NFA_NCLOSE:
Karsten Hopp c6c14b
  	    subs = addstate(l, state->out, subs, pim, off);
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 5604,5612 ****
Karsten Hopp c6c14b
  		    {
Karsten Hopp c6c14b
  			int in_use = m->norm.in_use;
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
! 			/* Copy submatch info for the recursive call, so that
Karsten Hopp c6c14b
! 			 * \1 can be matched. */
Karsten Hopp c6c14b
  			copy_sub_off(&m->norm, &t->subs.norm);
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
  			/*
Karsten Hopp c6c14b
  			 * First try matching the invisible match, then what
Karsten Hopp c6c14b
--- 5603,5615 ----
Karsten Hopp c6c14b
  		    {
Karsten Hopp c6c14b
  			int in_use = m->norm.in_use;
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
! 			/* Copy submatch info for the recursive call, opposite
Karsten Hopp c6c14b
! 			 * of what happens on success below. */
Karsten Hopp c6c14b
  			copy_sub_off(&m->norm, &t->subs.norm);
Karsten Hopp c6c14b
+ #ifdef FEAT_SYN_HL
Karsten Hopp c6c14b
+ 			if (nfa_has_zsubexpr)
Karsten Hopp c6c14b
+ 			    copy_sub_off(&m->synt, &t->subs.synt);
Karsten Hopp c6c14b
+ #endif
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
  			/*
Karsten Hopp c6c14b
  			 * First try matching the invisible match, then what
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 5713,5718 ****
Karsten Hopp c6c14b
--- 5716,5728 ----
Karsten Hopp c6c14b
  #endif
Karsten Hopp c6c14b
  		    break;
Karsten Hopp c6c14b
  		}
Karsten Hopp c6c14b
+ 		/* Copy submatch info to the recursive call, opposite of what
Karsten Hopp c6c14b
+ 		 * happens afterwards. */
Karsten Hopp c6c14b
+ 		copy_sub_off(&m->norm, &t->subs.norm);
Karsten Hopp c6c14b
+ #ifdef FEAT_SYN_HL
Karsten Hopp c6c14b
+ 		if (nfa_has_zsubexpr)
Karsten Hopp c6c14b
+ 		    copy_sub_off(&m->synt, &t->subs.synt);
Karsten Hopp c6c14b
+ #endif
Karsten Hopp c6c14b
  
Karsten Hopp c6c14b
  		/* First try matching the pattern. */
Karsten Hopp c6c14b
  		result = recursive_regmatch(t->state, NULL, prog,
Karsten Hopp c6c14b
*** ../vim-7.4.035/src/testdir/test64.in	2013-09-22 13:57:19.000000000 +0200
Karsten Hopp c6c14b
--- src/testdir/test64.in	2013-09-25 15:51:12.000000000 +0200
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 430,435 ****
Karsten Hopp c6c14b
--- 430,436 ----
Karsten Hopp c6c14b
  :call add(tl, [2, '\(a*\)\@>a', 'aaaa'])
Karsten Hopp c6c14b
  :call add(tl, [2, '\(a*\)\@>b', 'aaab', 'aaab', 'aaa'])
Karsten Hopp c6c14b
  :call add(tl, [2, '^\(.\{-}b\)\@>.', '  abcbd', '  abc', '  ab'])
Karsten Hopp c6c14b
+ :call add(tl, [2, '\(.\{-}\)\(\)\@>$', 'abc', 'abc', 'abc', ''])
Karsten Hopp c6c14b
  :" TODO: BT engine does not restore submatch after failure
Karsten Hopp c6c14b
  :call add(tl, [1, '\(a*\)\@>a\|a\+', 'aaaa', 'aaaa'])
Karsten Hopp c6c14b
  :"
Karsten Hopp c6c14b
*** ../vim-7.4.035/src/testdir/test64.ok	2013-09-22 13:57:19.000000000 +0200
Karsten Hopp c6c14b
--- src/testdir/test64.ok	2013-09-25 16:39:31.000000000 +0200
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 992,997 ****
Karsten Hopp c6c14b
--- 992,1000 ----
Karsten Hopp c6c14b
  OK 0 - ^\(.\{-}b\)\@>.
Karsten Hopp c6c14b
  OK 1 - ^\(.\{-}b\)\@>.
Karsten Hopp c6c14b
  OK 2 - ^\(.\{-}b\)\@>.
Karsten Hopp c6c14b
+ OK 0 - \(.\{-}\)\(\)\@>$
Karsten Hopp c6c14b
+ OK 1 - \(.\{-}\)\(\)\@>$
Karsten Hopp c6c14b
+ OK 2 - \(.\{-}\)\(\)\@>$
Karsten Hopp c6c14b
  OK 0 - \(a*\)\@>a\|a\+
Karsten Hopp c6c14b
  OK 2 - \(a*\)\@>a\|a\+
Karsten Hopp c6c14b
  OK 0 - \_[^8-9]\+
Karsten Hopp c6c14b
*** ../vim-7.4.035/src/version.c	2013-09-22 15:43:34.000000000 +0200
Karsten Hopp c6c14b
--- src/version.c	2013-09-25 16:40:01.000000000 +0200
Karsten Hopp c6c14b
***************
Karsten Hopp c6c14b
*** 740,741 ****
Karsten Hopp c6c14b
--- 740,743 ----
Karsten Hopp c6c14b
  {   /* Add new patch number below this line */
Karsten Hopp c6c14b
+ /**/
Karsten Hopp c6c14b
+     36,
Karsten Hopp c6c14b
  /**/
Karsten Hopp c6c14b
Karsten Hopp c6c14b
-- 
Karsten Hopp c6c14b
There is a fine line between courage and foolishness.
Karsten Hopp c6c14b
Unfortunately, it's not a fence.
Karsten Hopp c6c14b
Karsten Hopp c6c14b
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp c6c14b
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp c6c14b
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
Karsten Hopp c6c14b
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///