Karsten Hopp 2ef055
To: vim_dev@googlegroups.com
Karsten Hopp 2ef055
Subject: Patch 7.3.1094
Karsten Hopp 2ef055
Fcc: outbox
Karsten Hopp 2ef055
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp 2ef055
Mime-Version: 1.0
Karsten Hopp 2ef055
Content-Type: text/plain; charset=UTF-8
Karsten Hopp 2ef055
Content-Transfer-Encoding: 8bit
Karsten Hopp 2ef055
------------
Karsten Hopp 2ef055
Karsten Hopp 2ef055
Patch 7.3.1094
Karsten Hopp 2ef055
Problem:    New regexp engine: Attempts to match "^" at every character.
Karsten Hopp 2ef055
Solution:   Only try "^" at the start of a line.
Karsten Hopp 2ef055
Files:	    rc/regexp_nfa.c
Karsten Hopp 2ef055
Karsten Hopp 2ef055
Karsten Hopp 2ef055
*** ../vim-7.3.1093/src/regexp_nfa.c	2013-06-02 16:07:05.000000000 +0200
Karsten Hopp 2ef055
--- src/regexp_nfa.c	2013-06-02 16:24:04.000000000 +0200
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 249,254 ****
Karsten Hopp 2ef055
--- 249,256 ----
Karsten Hopp 2ef055
  			 * executing. */
Karsten Hopp 2ef055
  static int istate;	/* Index in the state vector, used in new_state() */
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
+ /* If not NULL match must end at this position */
Karsten Hopp 2ef055
+ static save_se_T *nfa_endp = NULL;
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
  static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags));
Karsten Hopp 2ef055
  static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 3080,3085 ****
Karsten Hopp 2ef055
--- 3082,3099 ----
Karsten Hopp 2ef055
  	    state->lastlist = l->id;
Karsten Hopp 2ef055
  	    break;
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
+ 	case NFA_BOL:
Karsten Hopp 2ef055
+ 	case NFA_BOF:
Karsten Hopp 2ef055
+ 	    /* "^" won't match past end-of-line, don't bother trying.
Karsten Hopp 2ef055
+ 	     * Except when we are going to the next line for a look-behind
Karsten Hopp 2ef055
+ 	     * match. */
Karsten Hopp 2ef055
+ 	    if (reginput > regline
Karsten Hopp 2ef055
+ 		    && (nfa_endp == NULL
Karsten Hopp 2ef055
+ 			|| !REG_MULTI
Karsten Hopp 2ef055
+ 			|| reglnum == nfa_endp->se_u.pos.lnum))
Karsten Hopp 2ef055
+ 		goto skip_add;
Karsten Hopp 2ef055
+ 	    /* FALLTHROUGH */
Karsten Hopp 2ef055
+ 
Karsten Hopp 2ef055
  	default:
Karsten Hopp 2ef055
  	    if (state->lastlist == l->id)
Karsten Hopp 2ef055
  	    {
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 3659,3682 ****
Karsten Hopp 2ef055
      return val == pos;
Karsten Hopp 2ef055
  }
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
! static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m, save_se_T *endp));
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
  /*
Karsten Hopp 2ef055
   * Main matching routine.
Karsten Hopp 2ef055
   *
Karsten Hopp 2ef055
   * Run NFA to determine whether it matches reginput.
Karsten Hopp 2ef055
   *
Karsten Hopp 2ef055
!  * When "endp" is not NULL it is a required end-of-match position.
Karsten Hopp 2ef055
   *
Karsten Hopp 2ef055
   * Return TRUE if there is a match, FALSE otherwise.
Karsten Hopp 2ef055
   * Note: Caller must ensure that: start != NULL.
Karsten Hopp 2ef055
   */
Karsten Hopp 2ef055
      static int
Karsten Hopp 2ef055
! nfa_regmatch(start, submatch, m, endp)
Karsten Hopp 2ef055
      nfa_state_T		*start;
Karsten Hopp 2ef055
      regsubs_T		*submatch;
Karsten Hopp 2ef055
      regsubs_T		*m;
Karsten Hopp 2ef055
-     save_se_T		*endp;
Karsten Hopp 2ef055
  {
Karsten Hopp 2ef055
      int		result;
Karsten Hopp 2ef055
      int		size = 0;
Karsten Hopp 2ef055
--- 3673,3695 ----
Karsten Hopp 2ef055
      return val == pos;
Karsten Hopp 2ef055
  }
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
! static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m));
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
  /*
Karsten Hopp 2ef055
   * Main matching routine.
Karsten Hopp 2ef055
   *
Karsten Hopp 2ef055
   * Run NFA to determine whether it matches reginput.
Karsten Hopp 2ef055
   *
Karsten Hopp 2ef055
!  * When "nfa_endp" is not NULL it is a required end-of-match position.
Karsten Hopp 2ef055
   *
Karsten Hopp 2ef055
   * Return TRUE if there is a match, FALSE otherwise.
Karsten Hopp 2ef055
   * Note: Caller must ensure that: start != NULL.
Karsten Hopp 2ef055
   */
Karsten Hopp 2ef055
      static int
Karsten Hopp 2ef055
! nfa_regmatch(start, submatch, m)
Karsten Hopp 2ef055
      nfa_state_T		*start;
Karsten Hopp 2ef055
      regsubs_T		*submatch;
Karsten Hopp 2ef055
      regsubs_T		*m;
Karsten Hopp 2ef055
  {
Karsten Hopp 2ef055
      int		result;
Karsten Hopp 2ef055
      int		size = 0;
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 3888,3913 ****
Karsten Hopp 2ef055
  		else
Karsten Hopp 2ef055
  		{
Karsten Hopp 2ef055
  #ifdef ENABLE_LOG
Karsten Hopp 2ef055
! 		    if (endp != NULL)
Karsten Hopp 2ef055
  		    {
Karsten Hopp 2ef055
  			if (REG_MULTI)
Karsten Hopp 2ef055
  			    fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n",
Karsten Hopp 2ef055
  				    (int)reglnum,
Karsten Hopp 2ef055
! 				    (int)endp->se_u.pos.lnum,
Karsten Hopp 2ef055
  				    (int)(reginput - regline),
Karsten Hopp 2ef055
! 				    endp->se_u.pos.col);
Karsten Hopp 2ef055
  			else
Karsten Hopp 2ef055
  			    fprintf(log_fd, "Current col: %d, endp col: %d\n",
Karsten Hopp 2ef055
  				    (int)(reginput - regline),
Karsten Hopp 2ef055
! 				    (int)(endp->se_u.ptr - reginput));
Karsten Hopp 2ef055
  		    }
Karsten Hopp 2ef055
  #endif
Karsten Hopp 2ef055
! 		    /* It's only a match if it ends at "endp" */
Karsten Hopp 2ef055
! 		    if (endp != NULL && (REG_MULTI
Karsten Hopp 2ef055
! 			    ? (reglnum != endp->se_u.pos.lnum
Karsten Hopp 2ef055
  				|| (int)(reginput - regline)
Karsten Hopp 2ef055
! 							!= endp->se_u.pos.col)
Karsten Hopp 2ef055
! 			    : reginput != endp->se_u.ptr))
Karsten Hopp 2ef055
  			break;
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
  		    /* do not set submatches for \@! */
Karsten Hopp 2ef055
--- 3901,3926 ----
Karsten Hopp 2ef055
  		else
Karsten Hopp 2ef055
  		{
Karsten Hopp 2ef055
  #ifdef ENABLE_LOG
Karsten Hopp 2ef055
! 		    if (nfa_endp != NULL)
Karsten Hopp 2ef055
  		    {
Karsten Hopp 2ef055
  			if (REG_MULTI)
Karsten Hopp 2ef055
  			    fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n",
Karsten Hopp 2ef055
  				    (int)reglnum,
Karsten Hopp 2ef055
! 				    (int)nfa_endp->se_u.pos.lnum,
Karsten Hopp 2ef055
  				    (int)(reginput - regline),
Karsten Hopp 2ef055
! 				    nfa_endp->se_u.pos.col);
Karsten Hopp 2ef055
  			else
Karsten Hopp 2ef055
  			    fprintf(log_fd, "Current col: %d, endp col: %d\n",
Karsten Hopp 2ef055
  				    (int)(reginput - regline),
Karsten Hopp 2ef055
! 				    (int)(nfa_endp->se_u.ptr - reginput));
Karsten Hopp 2ef055
  		    }
Karsten Hopp 2ef055
  #endif
Karsten Hopp 2ef055
! 		    /* It's only a match if it ends at "nfa_endp" */
Karsten Hopp 2ef055
! 		    if (nfa_endp != NULL && (REG_MULTI
Karsten Hopp 2ef055
! 			    ? (reglnum != nfa_endp->se_u.pos.lnum
Karsten Hopp 2ef055
  				|| (int)(reginput - regline)
Karsten Hopp 2ef055
! 						    != nfa_endp->se_u.pos.col)
Karsten Hopp 2ef055
! 			    : reginput != nfa_endp->se_u.ptr))
Karsten Hopp 2ef055
  			break;
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
  		    /* do not set submatches for \@! */
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 3929,3934 ****
Karsten Hopp 2ef055
--- 3942,3948 ----
Karsten Hopp 2ef055
  		char_u	    *save_regline = regline;
Karsten Hopp 2ef055
  		int	    save_reglnum = reglnum;
Karsten Hopp 2ef055
  		int	    save_nfa_match = nfa_match;
Karsten Hopp 2ef055
+ 		save_se_T   *save_nfa_endp = nfa_endp;
Karsten Hopp 2ef055
  		save_se_T   endpos;
Karsten Hopp 2ef055
  		save_se_T   *endposp = NULL;
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 4012,4018 ****
Karsten Hopp 2ef055
  		 * recursion. */
Karsten Hopp 2ef055
  		nfa_save_listids(start, listids);
Karsten Hopp 2ef055
  		nfa_set_null_listids(start);
Karsten Hopp 2ef055
! 		result = nfa_regmatch(t->state->out, submatch, m, endposp);
Karsten Hopp 2ef055
  		nfa_set_neg_listids(start);
Karsten Hopp 2ef055
  		nfa_restore_listids(start, listids);
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
--- 4026,4033 ----
Karsten Hopp 2ef055
  		 * recursion. */
Karsten Hopp 2ef055
  		nfa_save_listids(start, listids);
Karsten Hopp 2ef055
  		nfa_set_null_listids(start);
Karsten Hopp 2ef055
! 		nfa_endp = endposp;
Karsten Hopp 2ef055
! 		result = nfa_regmatch(t->state->out, submatch, m);
Karsten Hopp 2ef055
  		nfa_set_neg_listids(start);
Karsten Hopp 2ef055
  		nfa_restore_listids(start, listids);
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 4021,4026 ****
Karsten Hopp 2ef055
--- 4036,4042 ----
Karsten Hopp 2ef055
  		regline = save_regline;
Karsten Hopp 2ef055
  		reglnum = save_reglnum;
Karsten Hopp 2ef055
  		nfa_match = save_nfa_match;
Karsten Hopp 2ef055
+ 		nfa_endp = save_nfa_endp;
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
  #ifdef ENABLE_LOG
Karsten Hopp 2ef055
  		log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 4563,4569 ****
Karsten Hopp 2ef055
  	 * matters!
Karsten Hopp 2ef055
  	 * Do not add the start state in recursive calls of nfa_regmatch(),
Karsten Hopp 2ef055
  	 * because recursive calls should only start in the first position.
Karsten Hopp 2ef055
! 	 * Unless "endp" is not NULL, then we match the end position.
Karsten Hopp 2ef055
  	 * Also don't start a match past the first line. */
Karsten Hopp 2ef055
  	if (nfa_match == FALSE
Karsten Hopp 2ef055
  		&& ((start->c == NFA_MOPEN
Karsten Hopp 2ef055
--- 4579,4585 ----
Karsten Hopp 2ef055
  	 * matters!
Karsten Hopp 2ef055
  	 * Do not add the start state in recursive calls of nfa_regmatch(),
Karsten Hopp 2ef055
  	 * because recursive calls should only start in the first position.
Karsten Hopp 2ef055
! 	 * Unless "nfa_endp" is not NULL, then we match the end position.
Karsten Hopp 2ef055
  	 * Also don't start a match past the first line. */
Karsten Hopp 2ef055
  	if (nfa_match == FALSE
Karsten Hopp 2ef055
  		&& ((start->c == NFA_MOPEN
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 4571,4583 ****
Karsten Hopp 2ef055
  			&& clen != 0
Karsten Hopp 2ef055
  			&& (ireg_maxcol == 0
Karsten Hopp 2ef055
  			    || (colnr_T)(reginput - regline) < ireg_maxcol))
Karsten Hopp 2ef055
! 		    || (endp != NULL
Karsten Hopp 2ef055
  			&& (REG_MULTI
Karsten Hopp 2ef055
! 			    ? (reglnum < endp->se_u.pos.lnum
Karsten Hopp 2ef055
! 			       || (reglnum == endp->se_u.pos.lnum
Karsten Hopp 2ef055
  			           && (int)(reginput - regline)
Karsten Hopp 2ef055
! 						       < endp->se_u.pos.col))
Karsten Hopp 2ef055
! 			    : reginput < endp->se_u.ptr))))
Karsten Hopp 2ef055
  	{
Karsten Hopp 2ef055
  #ifdef ENABLE_LOG
Karsten Hopp 2ef055
  	    fprintf(log_fd, "(---) STARTSTATE\n");
Karsten Hopp 2ef055
--- 4587,4599 ----
Karsten Hopp 2ef055
  			&& clen != 0
Karsten Hopp 2ef055
  			&& (ireg_maxcol == 0
Karsten Hopp 2ef055
  			    || (colnr_T)(reginput - regline) < ireg_maxcol))
Karsten Hopp 2ef055
! 		    || (nfa_endp != NULL
Karsten Hopp 2ef055
  			&& (REG_MULTI
Karsten Hopp 2ef055
! 			    ? (reglnum < nfa_endp->se_u.pos.lnum
Karsten Hopp 2ef055
! 			       || (reglnum == nfa_endp->se_u.pos.lnum
Karsten Hopp 2ef055
  			           && (int)(reginput - regline)
Karsten Hopp 2ef055
! 						    < nfa_endp->se_u.pos.col))
Karsten Hopp 2ef055
! 			    : reginput < nfa_endp->se_u.ptr))))
Karsten Hopp 2ef055
  	{
Karsten Hopp 2ef055
  #ifdef ENABLE_LOG
Karsten Hopp 2ef055
  	    fprintf(log_fd, "(---) STARTSTATE\n");
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 4601,4608 ****
Karsten Hopp 2ef055
  	 * finish. */
Karsten Hopp 2ef055
  	if (clen != 0)
Karsten Hopp 2ef055
  	    reginput += clen;
Karsten Hopp 2ef055
! 	else if (go_to_nextline || (endp != NULL && REG_MULTI
Karsten Hopp 2ef055
! 					    && reglnum < endp->se_u.pos.lnum))
Karsten Hopp 2ef055
  	    reg_nextline();
Karsten Hopp 2ef055
  	else
Karsten Hopp 2ef055
  	    break;
Karsten Hopp 2ef055
--- 4617,4624 ----
Karsten Hopp 2ef055
  	 * finish. */
Karsten Hopp 2ef055
  	if (clen != 0)
Karsten Hopp 2ef055
  	    reginput += clen;
Karsten Hopp 2ef055
! 	else if (go_to_nextline || (nfa_endp != NULL && REG_MULTI
Karsten Hopp 2ef055
! 					&& reglnum < nfa_endp->se_u.pos.lnum))
Karsten Hopp 2ef055
  	    reg_nextline();
Karsten Hopp 2ef055
  	else
Karsten Hopp 2ef055
  	    break;
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 4678,4684 ****
Karsten Hopp 2ef055
      clear_sub(&m.synt);
Karsten Hopp 2ef055
  #endif
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
!     if (nfa_regmatch(start, &subs, &m, NULL) == FALSE)
Karsten Hopp 2ef055
  	return 0;
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
      cleanup_subexpr();
Karsten Hopp 2ef055
--- 4694,4700 ----
Karsten Hopp 2ef055
      clear_sub(&m.synt);
Karsten Hopp 2ef055
  #endif
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
!     if (nfa_regmatch(start, &subs, &m) == FALSE)
Karsten Hopp 2ef055
  	return 0;
Karsten Hopp 2ef055
  
Karsten Hopp 2ef055
      cleanup_subexpr();
Karsten Hopp 2ef055
*** ../vim-7.3.1093/src/version.c	2013-06-02 16:07:05.000000000 +0200
Karsten Hopp 2ef055
--- src/version.c	2013-06-02 16:25:02.000000000 +0200
Karsten Hopp 2ef055
***************
Karsten Hopp 2ef055
*** 730,731 ****
Karsten Hopp 2ef055
--- 730,733 ----
Karsten Hopp 2ef055
  {   /* Add new patch number below this line */
Karsten Hopp 2ef055
+ /**/
Karsten Hopp 2ef055
+     1094,
Karsten Hopp 2ef055
  /**/
Karsten Hopp 2ef055
Karsten Hopp 2ef055
-- 
Karsten Hopp 2ef055
The future isn't what it used to be.
Karsten Hopp 2ef055
Karsten Hopp 2ef055
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp 2ef055
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp 2ef055
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
Karsten Hopp 2ef055
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///