Blob Blame History Raw
To: vim_dev@googlegroups.com
Subject: Patch 7.3.1094
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------

Patch 7.3.1094
Problem:    New regexp engine: Attempts to match "^" at every character.
Solution:   Only try "^" at the start of a line.
Files:	    rc/regexp_nfa.c


*** ../vim-7.3.1093/src/regexp_nfa.c	2013-06-02 16:07:05.000000000 +0200
--- src/regexp_nfa.c	2013-06-02 16:24:04.000000000 +0200
***************
*** 249,254 ****
--- 249,256 ----
  			 * executing. */
  static int istate;	/* Index in the state vector, used in new_state() */
  
+ /* If not NULL match must end at this position */
+ static save_se_T *nfa_endp = NULL;
  
  static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags));
  static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
***************
*** 3080,3085 ****
--- 3082,3099 ----
  	    state->lastlist = l->id;
  	    break;
  
+ 	case NFA_BOL:
+ 	case NFA_BOF:
+ 	    /* "^" won't match past end-of-line, don't bother trying.
+ 	     * Except when we are going to the next line for a look-behind
+ 	     * match. */
+ 	    if (reginput > regline
+ 		    && (nfa_endp == NULL
+ 			|| !REG_MULTI
+ 			|| reglnum == nfa_endp->se_u.pos.lnum))
+ 		goto skip_add;
+ 	    /* FALLTHROUGH */
+ 
  	default:
  	    if (state->lastlist == l->id)
  	    {
***************
*** 3659,3682 ****
      return val == pos;
  }
  
! static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m, save_se_T *endp));
  
  /*
   * Main matching routine.
   *
   * Run NFA to determine whether it matches reginput.
   *
!  * When "endp" is not NULL it is a required end-of-match position.
   *
   * Return TRUE if there is a match, FALSE otherwise.
   * Note: Caller must ensure that: start != NULL.
   */
      static int
! nfa_regmatch(start, submatch, m, endp)
      nfa_state_T		*start;
      regsubs_T		*submatch;
      regsubs_T		*m;
-     save_se_T		*endp;
  {
      int		result;
      int		size = 0;
--- 3673,3695 ----
      return val == pos;
  }
  
! static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m));
  
  /*
   * Main matching routine.
   *
   * Run NFA to determine whether it matches reginput.
   *
!  * When "nfa_endp" is not NULL it is a required end-of-match position.
   *
   * Return TRUE if there is a match, FALSE otherwise.
   * Note: Caller must ensure that: start != NULL.
   */
      static int
! nfa_regmatch(start, submatch, m)
      nfa_state_T		*start;
      regsubs_T		*submatch;
      regsubs_T		*m;
  {
      int		result;
      int		size = 0;
***************
*** 3888,3913 ****
  		else
  		{
  #ifdef ENABLE_LOG
! 		    if (endp != NULL)
  		    {
  			if (REG_MULTI)
  			    fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n",
  				    (int)reglnum,
! 				    (int)endp->se_u.pos.lnum,
  				    (int)(reginput - regline),
! 				    endp->se_u.pos.col);
  			else
  			    fprintf(log_fd, "Current col: %d, endp col: %d\n",
  				    (int)(reginput - regline),
! 				    (int)(endp->se_u.ptr - reginput));
  		    }
  #endif
! 		    /* It's only a match if it ends at "endp" */
! 		    if (endp != NULL && (REG_MULTI
! 			    ? (reglnum != endp->se_u.pos.lnum
  				|| (int)(reginput - regline)
! 							!= endp->se_u.pos.col)
! 			    : reginput != endp->se_u.ptr))
  			break;
  
  		    /* do not set submatches for \@! */
--- 3901,3926 ----
  		else
  		{
  #ifdef ENABLE_LOG
! 		    if (nfa_endp != NULL)
  		    {
  			if (REG_MULTI)
  			    fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n",
  				    (int)reglnum,
! 				    (int)nfa_endp->se_u.pos.lnum,
  				    (int)(reginput - regline),
! 				    nfa_endp->se_u.pos.col);
  			else
  			    fprintf(log_fd, "Current col: %d, endp col: %d\n",
  				    (int)(reginput - regline),
! 				    (int)(nfa_endp->se_u.ptr - reginput));
  		    }
  #endif
! 		    /* It's only a match if it ends at "nfa_endp" */
! 		    if (nfa_endp != NULL && (REG_MULTI
! 			    ? (reglnum != nfa_endp->se_u.pos.lnum
  				|| (int)(reginput - regline)
! 						    != nfa_endp->se_u.pos.col)
! 			    : reginput != nfa_endp->se_u.ptr))
  			break;
  
  		    /* do not set submatches for \@! */
***************
*** 3929,3934 ****
--- 3942,3948 ----
  		char_u	    *save_regline = regline;
  		int	    save_reglnum = reglnum;
  		int	    save_nfa_match = nfa_match;
+ 		save_se_T   *save_nfa_endp = nfa_endp;
  		save_se_T   endpos;
  		save_se_T   *endposp = NULL;
  
***************
*** 4012,4018 ****
  		 * recursion. */
  		nfa_save_listids(start, listids);
  		nfa_set_null_listids(start);
! 		result = nfa_regmatch(t->state->out, submatch, m, endposp);
  		nfa_set_neg_listids(start);
  		nfa_restore_listids(start, listids);
  
--- 4026,4033 ----
  		 * recursion. */
  		nfa_save_listids(start, listids);
  		nfa_set_null_listids(start);
! 		nfa_endp = endposp;
! 		result = nfa_regmatch(t->state->out, submatch, m);
  		nfa_set_neg_listids(start);
  		nfa_restore_listids(start, listids);
  
***************
*** 4021,4026 ****
--- 4036,4042 ----
  		regline = save_regline;
  		reglnum = save_reglnum;
  		nfa_match = save_nfa_match;
+ 		nfa_endp = save_nfa_endp;
  
  #ifdef ENABLE_LOG
  		log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
***************
*** 4563,4569 ****
  	 * matters!
  	 * Do not add the start state in recursive calls of nfa_regmatch(),
  	 * because recursive calls should only start in the first position.
! 	 * Unless "endp" is not NULL, then we match the end position.
  	 * Also don't start a match past the first line. */
  	if (nfa_match == FALSE
  		&& ((start->c == NFA_MOPEN
--- 4579,4585 ----
  	 * matters!
  	 * Do not add the start state in recursive calls of nfa_regmatch(),
  	 * because recursive calls should only start in the first position.
! 	 * Unless "nfa_endp" is not NULL, then we match the end position.
  	 * Also don't start a match past the first line. */
  	if (nfa_match == FALSE
  		&& ((start->c == NFA_MOPEN
***************
*** 4571,4583 ****
  			&& clen != 0
  			&& (ireg_maxcol == 0
  			    || (colnr_T)(reginput - regline) < ireg_maxcol))
! 		    || (endp != NULL
  			&& (REG_MULTI
! 			    ? (reglnum < endp->se_u.pos.lnum
! 			       || (reglnum == endp->se_u.pos.lnum
  			           && (int)(reginput - regline)
! 						       < endp->se_u.pos.col))
! 			    : reginput < endp->se_u.ptr))))
  	{
  #ifdef ENABLE_LOG
  	    fprintf(log_fd, "(---) STARTSTATE\n");
--- 4587,4599 ----
  			&& clen != 0
  			&& (ireg_maxcol == 0
  			    || (colnr_T)(reginput - regline) < ireg_maxcol))
! 		    || (nfa_endp != NULL
  			&& (REG_MULTI
! 			    ? (reglnum < nfa_endp->se_u.pos.lnum
! 			       || (reglnum == nfa_endp->se_u.pos.lnum
  			           && (int)(reginput - regline)
! 						    < nfa_endp->se_u.pos.col))
! 			    : reginput < nfa_endp->se_u.ptr))))
  	{
  #ifdef ENABLE_LOG
  	    fprintf(log_fd, "(---) STARTSTATE\n");
***************
*** 4601,4608 ****
  	 * finish. */
  	if (clen != 0)
  	    reginput += clen;
! 	else if (go_to_nextline || (endp != NULL && REG_MULTI
! 					    && reglnum < endp->se_u.pos.lnum))
  	    reg_nextline();
  	else
  	    break;
--- 4617,4624 ----
  	 * finish. */
  	if (clen != 0)
  	    reginput += clen;
! 	else if (go_to_nextline || (nfa_endp != NULL && REG_MULTI
! 					&& reglnum < nfa_endp->se_u.pos.lnum))
  	    reg_nextline();
  	else
  	    break;
***************
*** 4678,4684 ****
      clear_sub(&m.synt);
  #endif
  
!     if (nfa_regmatch(start, &subs, &m, NULL) == FALSE)
  	return 0;
  
      cleanup_subexpr();
--- 4694,4700 ----
      clear_sub(&m.synt);
  #endif
  
!     if (nfa_regmatch(start, &subs, &m) == FALSE)
  	return 0;
  
      cleanup_subexpr();
*** ../vim-7.3.1093/src/version.c	2013-06-02 16:07:05.000000000 +0200
--- src/version.c	2013-06-02 16:25:02.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
  {   /* Add new patch number below this line */
+ /**/
+     1094,
  /**/

-- 
The future isn't what it used to be.

 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///