Karsten Hopp be765d
To: vim_dev@googlegroups.com
Karsten Hopp be765d
Subject: Patch 7.3.1133
Karsten Hopp be765d
Fcc: outbox
Karsten Hopp be765d
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp be765d
Mime-Version: 1.0
Karsten Hopp be765d
Content-Type: text/plain; charset=UTF-8
Karsten Hopp be765d
Content-Transfer-Encoding: 8bit
Karsten Hopp be765d
------------
Karsten Hopp be765d
Karsten Hopp be765d
Patch 7.3.1133
Karsten Hopp be765d
Problem:    New regexp engine is a bit slow.
Karsten Hopp be765d
Solution:   Skip ahead to a character that must match.  Don't try matching a
Karsten Hopp be765d
	    "^" patter past the start of line.
Karsten Hopp be765d
Files:	    src/regexp_nfa.c, src/regexp.h
Karsten Hopp be765d
Karsten Hopp be765d
Karsten Hopp be765d
*** ../vim-7.3.1132/src/regexp_nfa.c	2013-06-06 18:04:47.000000000 +0200
Karsten Hopp be765d
--- src/regexp_nfa.c	2013-06-06 18:37:23.000000000 +0200
Karsten Hopp be765d
***************
Karsten Hopp be765d
*** 257,262 ****
Karsten Hopp be765d
--- 257,264 ----
Karsten Hopp be765d
  static int nfa_ll_index = 0;
Karsten Hopp be765d
  
Karsten Hopp be765d
  static int nfa_regcomp_start __ARGS((char_u *expr, int re_flags));
Karsten Hopp be765d
+ static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
Karsten Hopp be765d
+ static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
Karsten Hopp be765d
  static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
Karsten Hopp be765d
  static int nfa_emit_equi_class __ARGS((int c, int neg));
Karsten Hopp be765d
  static int nfa_regatom __ARGS((void));
Karsten Hopp be765d
***************
Karsten Hopp be765d
*** 331,336 ****
Karsten Hopp be765d
--- 333,487 ----
Karsten Hopp be765d
  }
Karsten Hopp be765d
  
Karsten Hopp be765d
  /*
Karsten Hopp be765d
+  * Figure out if the NFA state list starts with an anchor, must match at start
Karsten Hopp be765d
+  * of the line.
Karsten Hopp be765d
+  */
Karsten Hopp be765d
+     static int
Karsten Hopp be765d
+ nfa_get_reganch(start, depth)
Karsten Hopp be765d
+     nfa_state_T *start;
Karsten Hopp be765d
+     int		depth;
Karsten Hopp be765d
+ {
Karsten Hopp be765d
+     nfa_state_T *p = start;
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+     if (depth > 4)
Karsten Hopp be765d
+ 	return 0;
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+     while (p != NULL)
Karsten Hopp be765d
+     {
Karsten Hopp be765d
+ 	switch (p->c)
Karsten Hopp be765d
+ 	{
Karsten Hopp be765d
+ 	    case NFA_BOL:
Karsten Hopp be765d
+ 	    case NFA_BOF:
Karsten Hopp be765d
+ 		return 1; /* yes! */
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ 	    case NFA_ZSTART:
Karsten Hopp be765d
+ 	    case NFA_ZEND:
Karsten Hopp be765d
+ 	    case NFA_CURSOR:
Karsten Hopp be765d
+ 	    case NFA_VISUAL:
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ 	    case NFA_MOPEN:
Karsten Hopp be765d
+ 	    case NFA_MOPEN1:
Karsten Hopp be765d
+ 	    case NFA_MOPEN2:
Karsten Hopp be765d
+ 	    case NFA_MOPEN3:
Karsten Hopp be765d
+ 	    case NFA_MOPEN4:
Karsten Hopp be765d
+ 	    case NFA_MOPEN5:
Karsten Hopp be765d
+ 	    case NFA_MOPEN6:
Karsten Hopp be765d
+ 	    case NFA_MOPEN7:
Karsten Hopp be765d
+ 	    case NFA_MOPEN8:
Karsten Hopp be765d
+ 	    case NFA_MOPEN9:
Karsten Hopp be765d
+ 	    case NFA_NOPEN:
Karsten Hopp be765d
+ #ifdef FEAT_SYN_HL
Karsten Hopp be765d
+ 	    case NFA_ZOPEN:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN1:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN2:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN3:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN4:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN5:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN6:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN7:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN8:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN9:
Karsten Hopp be765d
+ #endif
Karsten Hopp be765d
+ 		p = p->out;
Karsten Hopp be765d
+ 		break;
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ 	    case NFA_SPLIT:
Karsten Hopp be765d
+ 		return nfa_get_reganch(p->out, depth + 1)
Karsten Hopp be765d
+ 				       && nfa_get_reganch(p->out1, depth + 1);
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ 	    default:
Karsten Hopp be765d
+ 		return 0; /* noooo */
Karsten Hopp be765d
+ 	}
Karsten Hopp be765d
+     }
Karsten Hopp be765d
+     return 0;
Karsten Hopp be765d
+ }
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ /*
Karsten Hopp be765d
+  * Figure out if the NFA state list starts with a character which must match
Karsten Hopp be765d
+  * at start of the match.
Karsten Hopp be765d
+  */
Karsten Hopp be765d
+     static int
Karsten Hopp be765d
+ nfa_get_regstart(start, depth)
Karsten Hopp be765d
+     nfa_state_T *start;
Karsten Hopp be765d
+     int		depth;
Karsten Hopp be765d
+ {
Karsten Hopp be765d
+     nfa_state_T *p = start;
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+     if (depth > 4)
Karsten Hopp be765d
+ 	return 0;
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+     while (p != NULL)
Karsten Hopp be765d
+     {
Karsten Hopp be765d
+ 	switch (p->c)
Karsten Hopp be765d
+ 	{
Karsten Hopp be765d
+ 	    /* all kinds of zero-width matches */
Karsten Hopp be765d
+ 	    case NFA_BOL:
Karsten Hopp be765d
+ 	    case NFA_BOF:
Karsten Hopp be765d
+ 	    case NFA_BOW:
Karsten Hopp be765d
+ 	    case NFA_EOW:
Karsten Hopp be765d
+ 	    case NFA_ZSTART:
Karsten Hopp be765d
+ 	    case NFA_ZEND:
Karsten Hopp be765d
+ 	    case NFA_CURSOR:
Karsten Hopp be765d
+ 	    case NFA_VISUAL:
Karsten Hopp be765d
+ 	    case NFA_LNUM:
Karsten Hopp be765d
+ 	    case NFA_LNUM_GT:
Karsten Hopp be765d
+ 	    case NFA_LNUM_LT:
Karsten Hopp be765d
+ 	    case NFA_COL:
Karsten Hopp be765d
+ 	    case NFA_COL_GT:
Karsten Hopp be765d
+ 	    case NFA_COL_LT:
Karsten Hopp be765d
+ 	    case NFA_VCOL:
Karsten Hopp be765d
+ 	    case NFA_VCOL_GT:
Karsten Hopp be765d
+ 	    case NFA_VCOL_LT:
Karsten Hopp be765d
+ 	    case NFA_MARK:
Karsten Hopp be765d
+ 	    case NFA_MARK_GT:
Karsten Hopp be765d
+ 	    case NFA_MARK_LT:
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ 	    case NFA_MOPEN:
Karsten Hopp be765d
+ 	    case NFA_MOPEN1:
Karsten Hopp be765d
+ 	    case NFA_MOPEN2:
Karsten Hopp be765d
+ 	    case NFA_MOPEN3:
Karsten Hopp be765d
+ 	    case NFA_MOPEN4:
Karsten Hopp be765d
+ 	    case NFA_MOPEN5:
Karsten Hopp be765d
+ 	    case NFA_MOPEN6:
Karsten Hopp be765d
+ 	    case NFA_MOPEN7:
Karsten Hopp be765d
+ 	    case NFA_MOPEN8:
Karsten Hopp be765d
+ 	    case NFA_MOPEN9:
Karsten Hopp be765d
+ 	    case NFA_NOPEN:
Karsten Hopp be765d
+ #ifdef FEAT_SYN_HL
Karsten Hopp be765d
+ 	    case NFA_ZOPEN:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN1:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN2:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN3:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN4:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN5:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN6:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN7:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN8:
Karsten Hopp be765d
+ 	    case NFA_ZOPEN9:
Karsten Hopp be765d
+ #endif
Karsten Hopp be765d
+ 		p = p->out;
Karsten Hopp be765d
+ 		break;
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ 	    case NFA_SPLIT:
Karsten Hopp be765d
+ 	    {
Karsten Hopp be765d
+ 		int c1 = nfa_get_regstart(p->out, depth + 1);
Karsten Hopp be765d
+ 		int c2 = nfa_get_regstart(p->out1, depth + 1);
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ 		if (c1 == c2)
Karsten Hopp be765d
+ 		    return c1; /* yes! */
Karsten Hopp be765d
+ 		return 0;
Karsten Hopp be765d
+ 	    }
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ 	    default:
Karsten Hopp be765d
+ 		if (p->c > 0 && !p->negated)
Karsten Hopp be765d
+ 		    return p->c; /* yes! */
Karsten Hopp be765d
+ 		return 0;
Karsten Hopp be765d
+ 	}
Karsten Hopp be765d
+     }
Karsten Hopp be765d
+     return 0;
Karsten Hopp be765d
+ }
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ /*
Karsten Hopp be765d
   * Allocate more space for post_start.  Called when
Karsten Hopp be765d
   * running above the estimated number of states.
Karsten Hopp be765d
   */
Karsten Hopp be765d
***************
Karsten Hopp be765d
*** 2121,2126 ****
Karsten Hopp be765d
--- 2272,2281 ----
Karsten Hopp be765d
      if (debugf != NULL)
Karsten Hopp be765d
      {
Karsten Hopp be765d
  	nfa_print_state(debugf, prog->start);
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ 	fprintf(debugf, "reganch: %d\n", prog->reganch);
Karsten Hopp be765d
+ 	fprintf(debugf, "regstart: %d\n", prog->regstart);
Karsten Hopp be765d
+ 
Karsten Hopp be765d
  	fclose(debugf);
Karsten Hopp be765d
      }
Karsten Hopp be765d
  }
Karsten Hopp be765d
***************
Karsten Hopp be765d
*** 4248,4253 ****
Karsten Hopp be765d
--- 4403,4412 ----
Karsten Hopp be765d
  		t = &neglist->t[0];
Karsten Hopp be765d
  		neglist->n--;
Karsten Hopp be765d
  		listidx--;
Karsten Hopp be765d
+ #ifdef ENABLE_LOG
Karsten Hopp be765d
+ 		fprintf(log_fd, "     using neglist entry, %d remaining\n",
Karsten Hopp be765d
+ 			neglist->n);
Karsten Hopp be765d
+ #endif
Karsten Hopp be765d
  	    }
Karsten Hopp be765d
  	    else
Karsten Hopp be765d
  		t = &thislist->t[listidx];
Karsten Hopp be765d
***************
Karsten Hopp be765d
*** 4688,4694 ****
Karsten Hopp be765d
  
Karsten Hopp be765d
  	    case NFA_END_NEG_RANGE:
Karsten Hopp be765d
  		/* This follows a series of negated nodes, like:
Karsten Hopp be765d
! 		 * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */
Karsten Hopp be765d
  		if (curc > 0)
Karsten Hopp be765d
  		{
Karsten Hopp be765d
  		    ll = nextlist;
Karsten Hopp be765d
--- 4847,4853 ----
Karsten Hopp be765d
  
Karsten Hopp be765d
  	    case NFA_END_NEG_RANGE:
Karsten Hopp be765d
  		/* This follows a series of negated nodes, like:
Karsten Hopp be765d
! 		 * NOT CHAR(x), NOT CHAR(y), etc. */
Karsten Hopp be765d
  		if (curc > 0)
Karsten Hopp be765d
  		{
Karsten Hopp be765d
  		    ll = nextlist;
Karsten Hopp be765d
***************
Karsten Hopp be765d
*** 5304,5316 ****
Karsten Hopp be765d
   * Returns 0 for failure, number of lines contained in the match otherwise.
Karsten Hopp be765d
   */
Karsten Hopp be765d
      static long
Karsten Hopp be765d
! nfa_regexec_both(line, col)
Karsten Hopp be765d
      char_u	*line;
Karsten Hopp be765d
!     colnr_T	col;		/* column to start looking for match */
Karsten Hopp be765d
  {
Karsten Hopp be765d
      nfa_regprog_T   *prog;
Karsten Hopp be765d
      long	    retval = 0L;
Karsten Hopp be765d
      int		    i;
Karsten Hopp be765d
  
Karsten Hopp be765d
      if (REG_MULTI)
Karsten Hopp be765d
      {
Karsten Hopp be765d
--- 5463,5476 ----
Karsten Hopp be765d
   * Returns 0 for failure, number of lines contained in the match otherwise.
Karsten Hopp be765d
   */
Karsten Hopp be765d
      static long
Karsten Hopp be765d
! nfa_regexec_both(line, startcol)
Karsten Hopp be765d
      char_u	*line;
Karsten Hopp be765d
!     colnr_T	startcol;	/* column to start looking for match */
Karsten Hopp be765d
  {
Karsten Hopp be765d
      nfa_regprog_T   *prog;
Karsten Hopp be765d
      long	    retval = 0L;
Karsten Hopp be765d
      int		    i;
Karsten Hopp be765d
+     colnr_T	    col = startcol;
Karsten Hopp be765d
  
Karsten Hopp be765d
      if (REG_MULTI)
Karsten Hopp be765d
      {
Karsten Hopp be765d
***************
Karsten Hopp be765d
*** 5333,5342 ****
Karsten Hopp be765d
  	goto theend;
Karsten Hopp be765d
      }
Karsten Hopp be765d
  
Karsten Hopp be765d
-     /* If the start column is past the maximum column: no need to try. */
Karsten Hopp be765d
-     if (ireg_maxcol > 0 && col >= ireg_maxcol)
Karsten Hopp be765d
- 	goto theend;
Karsten Hopp be765d
- 
Karsten Hopp be765d
      /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
Karsten Hopp be765d
      if (prog->regflags & RF_ICASE)
Karsten Hopp be765d
  	ireg_ic = TRUE;
Karsten Hopp be765d
--- 5493,5498 ----
Karsten Hopp be765d
***************
Karsten Hopp be765d
*** 5361,5366 ****
Karsten Hopp be765d
--- 5517,5548 ----
Karsten Hopp be765d
      nfa_regengine.expr = prog->pattern;
Karsten Hopp be765d
  #endif
Karsten Hopp be765d
  
Karsten Hopp be765d
+     if (prog->reganch && col > 0)
Karsten Hopp be765d
+ 	return 0L;
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+     if (prog->regstart != NUL)
Karsten Hopp be765d
+     {
Karsten Hopp be765d
+ 	char_u *s;
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+ 	/* Skip until the char we know it must start with.
Karsten Hopp be765d
+ 	 * Used often, do some work to avoid call overhead. */
Karsten Hopp be765d
+ 	if (!ireg_ic
Karsten Hopp be765d
+ #ifdef FEAT_MBYTE
Karsten Hopp be765d
+ 		    && !has_mbyte
Karsten Hopp be765d
+ #endif
Karsten Hopp be765d
+ 		    )
Karsten Hopp be765d
+ 	    s = vim_strbyte(regline + col, prog->regstart);
Karsten Hopp be765d
+ 	else
Karsten Hopp be765d
+ 	    s = cstrchr(regline + col, prog->regstart);
Karsten Hopp be765d
+ 	if (s == NULL)
Karsten Hopp be765d
+ 	    return 0L;
Karsten Hopp be765d
+ 	col = (int)(s - regline);
Karsten Hopp be765d
+     }
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+     /* If the start column is past the maximum column: no need to try. */
Karsten Hopp be765d
+     if (ireg_maxcol > 0 && col >= ireg_maxcol)
Karsten Hopp be765d
+ 	goto theend;
Karsten Hopp be765d
+ 
Karsten Hopp be765d
      nstate = prog->nstate;
Karsten Hopp be765d
      for (i = 0; i < nstate; ++i)
Karsten Hopp be765d
      {
Karsten Hopp be765d
***************
Karsten Hopp be765d
*** 5459,5464 ****
Karsten Hopp be765d
--- 5641,5650 ----
Karsten Hopp be765d
      prog->has_zend = nfa_has_zend;
Karsten Hopp be765d
      prog->has_backref = nfa_has_backref;
Karsten Hopp be765d
      prog->nsubexp = regnpar;
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+     prog->reganch = nfa_get_reganch(prog->start, 0);
Karsten Hopp be765d
+     prog->regstart = nfa_get_regstart(prog->start, 0);
Karsten Hopp be765d
+ 
Karsten Hopp be765d
  #ifdef ENABLE_LOG
Karsten Hopp be765d
      nfa_postfix_dump(expr, OK);
Karsten Hopp be765d
      nfa_dump(prog);
Karsten Hopp be765d
*** ../vim-7.3.1132/src/regexp.h	2013-06-03 12:17:00.000000000 +0200
Karsten Hopp be765d
--- src/regexp.h	2013-06-06 17:19:23.000000000 +0200
Karsten Hopp be765d
***************
Karsten Hopp be765d
*** 87,92 ****
Karsten Hopp be765d
--- 87,96 ----
Karsten Hopp be765d
      unsigned		regflags;
Karsten Hopp be765d
  
Karsten Hopp be765d
      nfa_state_T		*start;		/* points into state[] */
Karsten Hopp be765d
+ 
Karsten Hopp be765d
+     int			reganch;	/* pattern starts with ^ */
Karsten Hopp be765d
+     int			regstart;	/* char at start of pattern */
Karsten Hopp be765d
+ 
Karsten Hopp be765d
      int			has_zend;	/* pattern contains \ze */
Karsten Hopp be765d
      int			has_backref;	/* pattern contains \1 .. \9 */
Karsten Hopp be765d
  #ifdef FEAT_SYN_HL
Karsten Hopp be765d
*** ../vim-7.3.1132/src/version.c	2013-06-06 18:04:47.000000000 +0200
Karsten Hopp be765d
--- src/version.c	2013-06-06 18:43:55.000000000 +0200
Karsten Hopp be765d
***************
Karsten Hopp be765d
*** 730,731 ****
Karsten Hopp be765d
--- 730,733 ----
Karsten Hopp be765d
  {   /* Add new patch number below this line */
Karsten Hopp be765d
+ /**/
Karsten Hopp be765d
+     1133,
Karsten Hopp be765d
  /**/
Karsten Hopp be765d
Karsten Hopp be765d
-- 
Karsten Hopp be765d
From "know your smileys":
Karsten Hopp be765d
 %	Bike accident.  A bit far-fetched, I suppose; although...
Karsten Hopp be765d
             o      _     _         _
Karsten Hopp be765d
     _o     /\_   _ \\o  (_)\__/o  (_)
Karsten Hopp be765d
   _< \_   _>(_) (_)/<_    \_| \   _|/' \/
Karsten Hopp be765d
  (_)>(_) (_)        (_)   (_)    (_)'  _\o_
Karsten Hopp be765d
Karsten Hopp be765d
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp be765d
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp be765d
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
Karsten Hopp be765d
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///