Karsten Hopp 3ea9d8
To: vim_dev@googlegroups.com
Karsten Hopp 3ea9d8
Subject: Patch 7.3.1147
Karsten Hopp 3ea9d8
Fcc: outbox
Karsten Hopp 3ea9d8
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp 3ea9d8
Mime-Version: 1.0
Karsten Hopp 3ea9d8
Content-Type: text/plain; charset=UTF-8
Karsten Hopp 3ea9d8
Content-Transfer-Encoding: 8bit
Karsten Hopp 3ea9d8
------------
Karsten Hopp 3ea9d8
Karsten Hopp 3ea9d8
Patch 7.3.1147
Karsten Hopp 3ea9d8
Problem:    New regexp engine: regstart is only used to find the first match.
Karsten Hopp 3ea9d8
Solution:   Use regstart whenever adding the start state.
Karsten Hopp 3ea9d8
Files:	    src/regexp_nfa.c
Karsten Hopp 3ea9d8
Karsten Hopp 3ea9d8
Karsten Hopp 3ea9d8
*** ../vim-7.3.1146/src/regexp_nfa.c	2013-06-08 13:33:32.000000000 +0200
Karsten Hopp 3ea9d8
--- src/regexp_nfa.c	2013-06-08 14:37:36.000000000 +0200
Karsten Hopp 3ea9d8
***************
Karsten Hopp 3ea9d8
*** 4153,4158 ****
Karsten Hopp 3ea9d8
--- 4153,4159 ----
Karsten Hopp 3ea9d8
  }
Karsten Hopp 3ea9d8
  
Karsten Hopp 3ea9d8
  static int failure_chance __ARGS((nfa_state_T *state, int depth));
Karsten Hopp 3ea9d8
+ static int skip_to_start __ARGS((int c, colnr_T *colp));
Karsten Hopp 3ea9d8
  
Karsten Hopp 3ea9d8
  /*
Karsten Hopp 3ea9d8
   * Estimate the chance of a match with "state" failing.
Karsten Hopp 3ea9d8
***************
Karsten Hopp 3ea9d8
*** 4305,4310 ****
Karsten Hopp 3ea9d8
--- 4306,4336 ----
Karsten Hopp 3ea9d8
  }
Karsten Hopp 3ea9d8
  
Karsten Hopp 3ea9d8
  /*
Karsten Hopp 3ea9d8
+  * Skip until the char "c" we know a match must start with.
Karsten Hopp 3ea9d8
+  */
Karsten Hopp 3ea9d8
+     static int
Karsten Hopp 3ea9d8
+ skip_to_start(c, colp)
Karsten Hopp 3ea9d8
+     int		c;
Karsten Hopp 3ea9d8
+     colnr_T	*colp;
Karsten Hopp 3ea9d8
+ {
Karsten Hopp 3ea9d8
+     char_u *s;
Karsten Hopp 3ea9d8
+ 
Karsten Hopp 3ea9d8
+     /* Used often, do some work to avoid call overhead. */
Karsten Hopp 3ea9d8
+     if (!ireg_ic
Karsten Hopp 3ea9d8
+ #ifdef FEAT_MBYTE
Karsten Hopp 3ea9d8
+ 		&& !has_mbyte
Karsten Hopp 3ea9d8
+ #endif
Karsten Hopp 3ea9d8
+ 		)
Karsten Hopp 3ea9d8
+ 	s = vim_strbyte(regline + *colp, c);
Karsten Hopp 3ea9d8
+     else
Karsten Hopp 3ea9d8
+ 	s = cstrchr(regline + *colp, c);
Karsten Hopp 3ea9d8
+     if (s == NULL)
Karsten Hopp 3ea9d8
+ 	return FAIL;
Karsten Hopp 3ea9d8
+     *colp = (int)(s - regline);
Karsten Hopp 3ea9d8
+     return OK;
Karsten Hopp 3ea9d8
+ }
Karsten Hopp 3ea9d8
+ 
Karsten Hopp 3ea9d8
+ /*
Karsten Hopp 3ea9d8
   * Main matching routine.
Karsten Hopp 3ea9d8
   *
Karsten Hopp 3ea9d8
   * Run NFA to determine whether it matches reginput.
Karsten Hopp 3ea9d8
***************
Karsten Hopp 3ea9d8
*** 5449,5460 ****
Karsten Hopp 3ea9d8
  	     * the first MOPEN. */
Karsten Hopp 3ea9d8
  	    if (toplevel)
Karsten Hopp 3ea9d8
  	    {
Karsten Hopp 3ea9d8
! 		if (REG_MULTI)
Karsten Hopp 3ea9d8
! 		    m->norm.list.multi[0].start.col =
Karsten Hopp 3ea9d8
  					 (colnr_T)(reginput - regline) + clen;
Karsten Hopp 3ea9d8
! 		else
Karsten Hopp 3ea9d8
! 		    m->norm.list.line[0].start = reginput + clen;
Karsten Hopp 3ea9d8
! 		addstate(nextlist, start->out, m, clen);
Karsten Hopp 3ea9d8
  	    }
Karsten Hopp 3ea9d8
  	    else
Karsten Hopp 3ea9d8
  		addstate(nextlist, start, m, clen);
Karsten Hopp 3ea9d8
--- 5475,5524 ----
Karsten Hopp 3ea9d8
  	     * the first MOPEN. */
Karsten Hopp 3ea9d8
  	    if (toplevel)
Karsten Hopp 3ea9d8
  	    {
Karsten Hopp 3ea9d8
! 		int add = TRUE;
Karsten Hopp 3ea9d8
! 		int c;
Karsten Hopp 3ea9d8
! 
Karsten Hopp 3ea9d8
! 		if (prog->regstart != NUL && clen != 0)
Karsten Hopp 3ea9d8
! 		{
Karsten Hopp 3ea9d8
! 		    if (nextlist->n == 0)
Karsten Hopp 3ea9d8
! 		    {
Karsten Hopp 3ea9d8
! 			colnr_T col = (colnr_T)(reginput - regline) + clen;
Karsten Hopp 3ea9d8
! 
Karsten Hopp 3ea9d8
! 			/* Nextlist is empty, we can skip ahead to the
Karsten Hopp 3ea9d8
! 			 * character that must appear at the start. */
Karsten Hopp 3ea9d8
! 			if (skip_to_start(prog->regstart, &col) == FAIL)
Karsten Hopp 3ea9d8
! 			    break;
Karsten Hopp 3ea9d8
! #ifdef ENABLE_LOG
Karsten Hopp 3ea9d8
! 			fprintf(log_fd, "  Skipping ahead %d bytes to regstart\n",
Karsten Hopp 3ea9d8
! 				col - ((colnr_T)(reginput - regline) + clen));
Karsten Hopp 3ea9d8
! #endif
Karsten Hopp 3ea9d8
! 			reginput = regline + col - clen;
Karsten Hopp 3ea9d8
! 		    }
Karsten Hopp 3ea9d8
! 		    else
Karsten Hopp 3ea9d8
! 		    {
Karsten Hopp 3ea9d8
! 			/* Checking if the required start character matches is
Karsten Hopp 3ea9d8
! 			 * cheaper than adding a state that won't match. */
Karsten Hopp 3ea9d8
! 			c = PTR2CHAR(reginput + clen);
Karsten Hopp 3ea9d8
! 			if (c != prog->regstart && (!ireg_ic || MB_TOLOWER(c)
Karsten Hopp 3ea9d8
! 					       != MB_TOLOWER(prog->regstart)))
Karsten Hopp 3ea9d8
! 			{
Karsten Hopp 3ea9d8
! #ifdef ENABLE_LOG
Karsten Hopp 3ea9d8
! 			    fprintf(log_fd, "  Skipping start state, regstart does not match\n");
Karsten Hopp 3ea9d8
! #endif
Karsten Hopp 3ea9d8
! 			    add = FALSE;
Karsten Hopp 3ea9d8
! 			}
Karsten Hopp 3ea9d8
! 		    }
Karsten Hopp 3ea9d8
! 		}
Karsten Hopp 3ea9d8
! 
Karsten Hopp 3ea9d8
! 		if (add)
Karsten Hopp 3ea9d8
! 		{
Karsten Hopp 3ea9d8
! 		    if (REG_MULTI)
Karsten Hopp 3ea9d8
! 			m->norm.list.multi[0].start.col =
Karsten Hopp 3ea9d8
  					 (colnr_T)(reginput - regline) + clen;
Karsten Hopp 3ea9d8
! 		    else
Karsten Hopp 3ea9d8
! 			m->norm.list.line[0].start = reginput + clen;
Karsten Hopp 3ea9d8
! 		    addstate(nextlist, start->out, m, clen);
Karsten Hopp 3ea9d8
! 		}
Karsten Hopp 3ea9d8
  	    }
Karsten Hopp 3ea9d8
  	    else
Karsten Hopp 3ea9d8
  		addstate(nextlist, start, m, clen);
Karsten Hopp 3ea9d8
***************
Karsten Hopp 3ea9d8
*** 5701,5723 ****
Karsten Hopp 3ea9d8
  	return 0L;
Karsten Hopp 3ea9d8
  
Karsten Hopp 3ea9d8
      if (prog->regstart != NUL)
Karsten Hopp 3ea9d8
!     {
Karsten Hopp 3ea9d8
! 	char_u *s;
Karsten Hopp 3ea9d8
! 
Karsten Hopp 3ea9d8
! 	/* Skip until the char we know it must start with.
Karsten Hopp 3ea9d8
! 	 * Used often, do some work to avoid call overhead. */
Karsten Hopp 3ea9d8
! 	if (!ireg_ic
Karsten Hopp 3ea9d8
! #ifdef FEAT_MBYTE
Karsten Hopp 3ea9d8
! 		    && !has_mbyte
Karsten Hopp 3ea9d8
! #endif
Karsten Hopp 3ea9d8
! 		    )
Karsten Hopp 3ea9d8
! 	    s = vim_strbyte(regline + col, prog->regstart);
Karsten Hopp 3ea9d8
! 	else
Karsten Hopp 3ea9d8
! 	    s = cstrchr(regline + col, prog->regstart);
Karsten Hopp 3ea9d8
! 	if (s == NULL)
Karsten Hopp 3ea9d8
  	    return 0L;
Karsten Hopp 3ea9d8
- 	col = (int)(s - regline);
Karsten Hopp 3ea9d8
-     }
Karsten Hopp 3ea9d8
  
Karsten Hopp 3ea9d8
      /* If the start column is past the maximum column: no need to try. */
Karsten Hopp 3ea9d8
      if (ireg_maxcol > 0 && col >= ireg_maxcol)
Karsten Hopp 3ea9d8
--- 5765,5774 ----
Karsten Hopp 3ea9d8
  	return 0L;
Karsten Hopp 3ea9d8
  
Karsten Hopp 3ea9d8
      if (prog->regstart != NUL)
Karsten Hopp 3ea9d8
! 	/* Skip ahead until a character we know the match must start with.
Karsten Hopp 3ea9d8
! 	 * When there is none there is no match. */
Karsten Hopp 3ea9d8
! 	if (skip_to_start(prog->regstart, &col) == FAIL)
Karsten Hopp 3ea9d8
  	    return 0L;
Karsten Hopp 3ea9d8
  
Karsten Hopp 3ea9d8
      /* If the start column is past the maximum column: no need to try. */
Karsten Hopp 3ea9d8
      if (ireg_maxcol > 0 && col >= ireg_maxcol)
Karsten Hopp 3ea9d8
*** ../vim-7.3.1146/src/version.c	2013-06-08 13:33:32.000000000 +0200
Karsten Hopp 3ea9d8
--- src/version.c	2013-06-08 14:35:54.000000000 +0200
Karsten Hopp 3ea9d8
***************
Karsten Hopp 3ea9d8
*** 730,731 ****
Karsten Hopp 3ea9d8
--- 730,733 ----
Karsten Hopp 3ea9d8
  {   /* Add new patch number below this line */
Karsten Hopp 3ea9d8
+ /**/
Karsten Hopp 3ea9d8
+     1147,
Karsten Hopp 3ea9d8
  /**/
Karsten Hopp 3ea9d8
Karsten Hopp 3ea9d8
-- 
Karsten Hopp 3ea9d8
Nobody will ever need more than 640 kB RAM.
Karsten Hopp 3ea9d8
		-- Bill Gates, 1983
Karsten Hopp 3ea9d8
Windows 98 requires 16 MB RAM.
Karsten Hopp 3ea9d8
		-- Bill Gates, 1999
Karsten Hopp 3ea9d8
Logical conclusion: Nobody will ever need Windows 98.
Karsten Hopp 3ea9d8
Karsten Hopp 3ea9d8
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp 3ea9d8
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp 3ea9d8
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
Karsten Hopp 3ea9d8
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///