Blob Blame History Raw
To: vim_dev@googlegroups.com
Subject: Patch 7.3.1122
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------

Patch 7.3.1122
Problem:    New regexp engine: \@> not supported.
Solution:   Implement \@>.
Files:	    src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok


*** ../vim-7.3.1121/src/regexp_nfa.c	2013-06-05 16:51:53.000000000 +0200
--- src/regexp_nfa.c	2013-06-05 18:45:57.000000000 +0200
***************
*** 57,63 ****
--- 57,65 ----
      NFA_NCLOSE,			    /* End of subexpr. marked with \%( ... \) */
      NFA_START_INVISIBLE,
      NFA_START_INVISIBLE_BEFORE,
+     NFA_START_PATTERN,
      NFA_END_INVISIBLE,
+     NFA_END_PATTERN,
      NFA_COMPOSING,		    /* Next nodes in NFA are part of the
  				       composing multibyte char */
      NFA_END_COMPOSING,		    /* End of a composing char in the NFA */
***************
*** 1505,1513 ****
  			i = NFA_PREV_ATOM_JUST_BEFORE_NEG;
  		    break;
  		case '>':
! 		    /* \@> Not supported yet */
! 		    /* i = NFA_PREV_ATOM_LIKE_PATTERN; */
! 		    return FAIL;
  	    }
  	    if (i == 0)
  	    {
--- 1507,1515 ----
  			i = NFA_PREV_ATOM_JUST_BEFORE_NEG;
  		    break;
  		case '>':
! 		    /* \@>  */
! 		    i = NFA_PREV_ATOM_LIKE_PATTERN;
! 		    break;
  	    }
  	    if (i == 0)
  	    {
***************
*** 1885,1896 ****
--- 1887,1903 ----
  			    STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE"); break;
  	case NFA_PREV_ATOM_JUST_BEFORE_NEG:
  			 STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE_NEG"); break;
+ 	case NFA_PREV_ATOM_LIKE_PATTERN:
+ 			    STRCPY(code, "NFA_PREV_ATOM_LIKE_PATTERN"); break;
+ 
  	case NFA_NOPEN:		    STRCPY(code, "NFA_NOPEN"); break;
  	case NFA_NCLOSE:	    STRCPY(code, "NFA_NCLOSE"); break;
  	case NFA_START_INVISIBLE:   STRCPY(code, "NFA_START_INVISIBLE"); break;
  	case NFA_START_INVISIBLE_BEFORE:
  			    STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break;
+ 	case NFA_START_PATTERN:   STRCPY(code, "NFA_START_PATTERN"); break;
  	case NFA_END_INVISIBLE:	    STRCPY(code, "NFA_END_INVISIBLE"); break;
+ 	case NFA_END_PATTERN:	    STRCPY(code, "NFA_END_PATTERN"); break;
  
  	case NFA_COMPOSING:	    STRCPY(code, "NFA_COMPOSING"); break;
  	case NFA_END_COMPOSING:	    STRCPY(code, "NFA_END_COMPOSING"); break;
***************
*** 2601,2612 ****
  	case NFA_PREV_ATOM_NO_WIDTH_NEG:
  	case NFA_PREV_ATOM_JUST_BEFORE:
  	case NFA_PREV_ATOM_JUST_BEFORE_NEG:
  	  {
  	    int neg = (*p == NFA_PREV_ATOM_NO_WIDTH_NEG
  				      || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG);
  	    int before = (*p == NFA_PREV_ATOM_JUST_BEFORE
  				      || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG);
! 	    int n;
  
  	    if (before)
  		n = *++p; /* get the count */
--- 2608,2633 ----
  	case NFA_PREV_ATOM_NO_WIDTH_NEG:
  	case NFA_PREV_ATOM_JUST_BEFORE:
  	case NFA_PREV_ATOM_JUST_BEFORE_NEG:
+ 	case NFA_PREV_ATOM_LIKE_PATTERN:
  	  {
  	    int neg = (*p == NFA_PREV_ATOM_NO_WIDTH_NEG
  				      || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG);
  	    int before = (*p == NFA_PREV_ATOM_JUST_BEFORE
  				      || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG);
! 	    int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN);
! 	    int start_state = NFA_START_INVISIBLE;
! 	    int end_state = NFA_END_INVISIBLE;
! 	    int n = 0;
! 	    nfa_state_T *zend;
! 	    nfa_state_T *skip;
! 
! 	    if (before)
! 		start_state = NFA_START_INVISIBLE_BEFORE;
! 	    else if (pattern)
! 	    {
! 		start_state = NFA_START_PATTERN;
! 		end_state = NFA_END_PATTERN;
! 	    }
  
  	    if (before)
  		n = *++p; /* get the count */
***************
*** 2620,2635 ****
  
  	    if (nfa_calc_size == TRUE)
  	    {
! 		nstate += 2;
  		break;
  	    }
  	    e = POP();
! 	    s1 = alloc_state(NFA_END_INVISIBLE, NULL, NULL);
  	    if (s1 == NULL)
  		goto theend;
- 	    patch(e.out, s1);
  
! 	    s = alloc_state(NFA_START_INVISIBLE, e.start, s1);
  	    if (s == NULL)
  		goto theend;
  	    if (neg)
--- 2641,2655 ----
  
  	    if (nfa_calc_size == TRUE)
  	    {
! 		nstate += pattern ? 4 : 2;
  		break;
  	    }
  	    e = POP();
! 	    s1 = alloc_state(end_state, NULL, NULL);
  	    if (s1 == NULL)
  		goto theend;
  
! 	    s = alloc_state(start_state, e.start, s1);
  	    if (s == NULL)
  		goto theend;
  	    if (neg)
***************
*** 2638,2649 ****
  		s1->negated = TRUE;
  	    }
  	    if (before)
- 	    {
  		s->val = n; /* store the count */
! 		++s->c; /* NFA_START_INVISIBLE -> NFA_START_INVISIBLE_BEFORE */
  	    }
- 
- 	    PUSH(frag(s, list1(&s1->out)));
  	    break;
  	  }
  
--- 2658,2678 ----
  		s1->negated = TRUE;
  	    }
  	    if (before)
  		s->val = n; /* store the count */
! 	    if (pattern)
! 	    {
! 		/* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */
! 		skip = alloc_state(NFA_SKIP, NULL, NULL);
! 		zend = alloc_state(NFA_ZEND, s1, NULL);
! 		s1->out= skip;
! 		patch(e.out, zend);
! 		PUSH(frag(s, list1(&skip->out)));
! 	    }
! 	    else
! 	    {
! 		patch(e.out, s1);
! 		PUSH(frag(s, list1(&s1->out)));
  	    }
  	    break;
  	  }
  
***************
*** 2953,2959 ****
  
      for (j = 0; j < sub->in_use; j++)
  	if (REG_MULTI)
! 	    fprintf(log_fd, "\n *** group %d, start: c=%d, l=%d, end: c=%d, l=%d",
  		    j,
  		    sub->list.multi[j].start.col,
  		    (int)sub->list.multi[j].start.lnum,
--- 2982,2988 ----
  
      for (j = 0; j < sub->in_use; j++)
  	if (REG_MULTI)
! 	    fprintf(log_fd, "*** group %d, start: c=%d, l=%d, end: c=%d, l=%d\n",
  		    j,
  		    sub->list.multi[j].start.col,
  		    (int)sub->list.multi[j].start.lnum,
***************
*** 2964,2975 ****
  	    char *s = (char *)sub->list.line[j].start;
  	    char *e = (char *)sub->list.line[j].end;
  
! 	    fprintf(log_fd, "\n *** group %d, start: \"%s\", end: \"%s\"",
  		    j,
  		    s == NULL ? "NULL" : s,
  		    e == NULL ? "NULL" : e);
  	}
-     fprintf(log_fd, "\n");
  }
  #endif
  
--- 2993,3003 ----
  	    char *s = (char *)sub->list.line[j].start;
  	    char *e = (char *)sub->list.line[j].end;
  
! 	    fprintf(log_fd, "*** group %d, start: \"%s\", end: \"%s\"\n",
  		    j,
  		    s == NULL ? "NULL" : s,
  		    e == NULL ? "NULL" : e);
  	}
  }
  #endif
  
***************
*** 4317,4322 ****
--- 4345,4351 ----
  	      }
  
  	    case NFA_END_INVISIBLE:
+ 	    case NFA_END_PATTERN:
  		/*
  		 * This is only encountered after a NFA_START_INVISIBLE or
  		 * NFA_START_INVISIBLE_BEFORE node.
***************
*** 4343,4349 ****
  				(int)(nfa_endp->se_u.ptr - reginput));
  		}
  #endif
! 		/* It's only a match if it ends at "nfa_endp" */
  		if (nfa_endp != NULL && (REG_MULTI
  			? (reglnum != nfa_endp->se_u.pos.lnum
  			    || (int)(reginput - regline)
--- 4372,4379 ----
  				(int)(nfa_endp->se_u.ptr - reginput));
  		}
  #endif
! 		/* If "nfa_endp" is set it's only a match if it ends at
! 		 * "nfa_endp" */
  		if (nfa_endp != NULL && (REG_MULTI
  			? (reglnum != nfa_endp->se_u.pos.lnum
  			    || (int)(reginput - regline)
***************
*** 4360,4365 ****
--- 4390,4399 ----
  			copy_sub(&m->synt, &t->subs.synt);
  #endif
  		}
+ #ifdef ENABLE_LOG
+ 		fprintf(log_fd, "Match found:\n");
+ 		log_subsexpr(m);
+ #endif
  		nfa_match = TRUE;
  		break;
  
***************
*** 4435,4440 ****
--- 4469,4531 ----
  		}
  		break;
  
+ 	    case NFA_START_PATTERN:
+ 		/* First try matching the pattern. */
+ 		result = recursive_regmatch(t->state, prog,
+ 						       submatch, m, &listids);
+ 		if (result)
+ 		{
+ 		    int bytelen;
+ 
+ #ifdef ENABLE_LOG
+ 		    fprintf(log_fd, "NFA_START_PATTERN matches:\n");
+ 		    log_subsexpr(m);
+ #endif
+ 		    /* Copy submatch info from the recursive call */
+ 		    copy_sub_off(&t->subs.norm, &m->norm);
+ #ifdef FEAT_SYN_HL
+ 		    copy_sub_off(&t->subs.synt, &m->synt);
+ #endif
+ 		    /* Now we need to skip over the matched text and then
+ 		     * continue with what follows. */
+ 		    if (REG_MULTI)
+ 			/* TODO: multi-line match */
+ 			bytelen = m->norm.list.multi[0].end.col
+ 						  - (int)(reginput - regline);
+ 		    else
+ 			bytelen = (int)(m->norm.list.line[0].end - reginput);
+ 
+ #ifdef ENABLE_LOG
+ 		    fprintf(log_fd, "NFA_START_PATTERN length: %d\n", bytelen);
+ #endif
+ 		    if (bytelen == 0)
+ 		    {
+ 			/* empty match, output of corresponding
+ 			 * NFA_END_PATTERN/NFA_SKIP to be used at current
+ 			 * position */
+ 			addstate_here(thislist, t->state->out1->out->out,
+ 						  &t->subs, t->pim, &listidx);
+ 		    }
+ 		    else if (bytelen <= clen)
+ 		    {
+ 			/* match current character, output of corresponding
+ 			 * NFA_END_PATTERN to be used at next position. */
+ 			ll = nextlist;
+ 			add_state = t->state->out1->out->out;
+ 			add_off = clen;
+ 		    }
+ 		    else
+ 		    {
+ 			/* skip over the matched characters, set character
+ 			 * count in NFA_SKIP */
+ 			ll = nextlist;
+ 			add_state = t->state->out1->out;
+ 			add_off = bytelen;
+ 			add_count = bytelen - clen;
+ 		    }
+ 		}
+ 		break;
+ 
  	    case NFA_BOL:
  		if (reginput == regline)
  		    addstate_here(thislist, t->state->out, &t->subs,
***************
*** 4846,4854 ****
  			ll = nextlist;
  			add_state = t->state->out->out;
  			add_off = clen;
- #ifdef ENABLE_LOG
- 			log_subsexpr(&nextlist->t[nextlist->n - 1].subs);
- #endif
  		    }
  		    else
  		    {
--- 4937,4942 ----
***************
*** 4858,4866 ****
  			add_state = t->state->out;
  			add_off = bytelen;
  			add_count = bytelen - clen;
- #ifdef ENABLE_LOG
- 			log_subsexpr(&nextlist->t[nextlist->n - 1].subs);
- #endif
  		    }
  		}
  		break;
--- 4946,4951 ----
***************
*** 4873,4881 ****
  		  ll = nextlist;
  		  add_state = t->state->out;
  		  add_off = clen;
- #ifdef ENABLE_LOG
- 		  log_subsexpr(&nextlist->t[nextlist->n - 1].subs);
- #endif
  	      }
  	      else
  	      {
--- 4958,4963 ----
***************
*** 4884,4892 ****
  		  add_state = t->state;
  		  add_off = 0;
  		  add_count = t->count - clen;
- #ifdef ENABLE_LOG
- 		  log_subsexpr(&nextlist->t[nextlist->n - 1].subs);
- #endif
  	      }
  	      break;
  
--- 4966,4971 ----
***************
*** 5158,5170 ****
      f = fopen(NFA_REGEXP_RUN_LOG, "a");
      if (f != NULL)
      {
! 	fprintf(f, "\n\n\n\n\n\n\t\t=======================================================\n");
! 	fprintf(f, "		=======================================================\n");
  #ifdef DEBUG
  	fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr);
  #endif
  	fprintf(f, "\tInput text is \"%s\" \n", reginput);
! 	fprintf(f, "		=======================================================\n\n");
  	nfa_print_state(f, start);
  	fprintf(f, "\n\n");
  	fclose(f);
--- 5237,5248 ----
      f = fopen(NFA_REGEXP_RUN_LOG, "a");
      if (f != NULL)
      {
! 	fprintf(f, "\n\n\t=======================================================\n");
  #ifdef DEBUG
  	fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr);
  #endif
  	fprintf(f, "\tInput text is \"%s\" \n", reginput);
! 	fprintf(f, "\t=======================================================\n\n");
  	nfa_print_state(f, start);
  	fprintf(f, "\n\n");
  	fclose(f);
*** ../vim-7.3.1121/src/testdir/test64.in	2013-06-05 11:05:12.000000000 +0200
--- src/testdir/test64.in	2013-06-05 18:45:28.000000000 +0200
***************
*** 385,390 ****
--- 385,396 ----
  :call add(tl, [2, '\(<<\)\@2<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
  :call add(tl, [2, '\(foo\)\@<!bar.', 'xx foobar1 xbar2 xx', 'bar2'])
  :"
+ :""""" \@>
+ :call add(tl, [2, '\(a*\)\@>a', 'aaaa'])
+ :call add(tl, [2, '\(a*\)\@>b', 'aaab', 'aaab', 'aaa'])
+ :" TODO: BT engine does not restore submatch after failure
+ :call add(tl, [1, '\(a*\)\@>a\|a\+', 'aaaa', 'aaaa'])
+ :"
  :"""" "\_" prepended negated collection matches EOL
  :call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"])
  :call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"])
***************
*** 401,407 ****
  :  let text = t[2]
  :  let matchidx = 3
  :  for engine in [0, 1, 2]
! :    if engine == 2 && !re
  :      continue
  :    endif
  :    let &regexpengine = engine
--- 407,413 ----
  :  let text = t[2]
  :  let matchidx = 3
  :  for engine in [0, 1, 2]
! :    if engine == 2 && re == 0 || engine == 1 && re ==1
  :      continue
  :    endif
  :    let &regexpengine = engine
*** ../vim-7.3.1121/src/testdir/test64.ok	2013-06-05 11:05:12.000000000 +0200
--- src/testdir/test64.ok	2013-06-05 18:47:54.000000000 +0200
***************
*** 872,877 ****
--- 872,885 ----
  OK 0 - \(foo\)\@<!bar.
  OK 1 - \(foo\)\@<!bar.
  OK 2 - \(foo\)\@<!bar.
+ OK 0 - \(a*\)\@>a
+ OK 1 - \(a*\)\@>a
+ OK 2 - \(a*\)\@>a
+ OK 0 - \(a*\)\@>b
+ OK 1 - \(a*\)\@>b
+ OK 2 - \(a*\)\@>b
+ OK 0 - \(a*\)\@>a\|a\+
+ OK 2 - \(a*\)\@>a\|a\+
  OK 0 - \_[^8-9]\+
  OK 1 - \_[^8-9]\+
  OK 2 - \_[^8-9]\+
*** ../vim-7.3.1121/src/version.c	2013-06-05 16:51:53.000000000 +0200
--- src/version.c	2013-06-05 18:51:35.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
  {   /* Add new patch number below this line */
+ /**/
+     1122,
  /**/

-- 
From "know your smileys":
 :q	vi user saying, "How do I get out of this damn emacs editor?"

 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///