Karsten Hopp c80075
To: vim_dev@googlegroups.com
Karsten Hopp c80075
Subject: Patch 7.3.1029
Karsten Hopp c80075
Fcc: outbox
Karsten Hopp c80075
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp c80075
Mime-Version: 1.0
Karsten Hopp c80075
Content-Type: text/plain; charset=UTF-8
Karsten Hopp c80075
Content-Transfer-Encoding: 8bit
Karsten Hopp c80075
------------
Karsten Hopp c80075
Karsten Hopp c80075
Patch 7.3.1029
Karsten Hopp c80075
Problem:    New regexp performance: Unused position state being copied.
Karsten Hopp c80075
Solution:   Keep track of which positions are actually valid.
Karsten Hopp c80075
Files:	    src/regexp_nfa.c
Karsten Hopp c80075
Karsten Hopp c80075
Karsten Hopp c80075
*** ../vim-7.3.1028/src/regexp_nfa.c	2013-05-26 21:47:22.000000000 +0200
Karsten Hopp c80075
--- src/regexp_nfa.c	2013-05-26 22:45:26.000000000 +0200
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 1649,1670 ****
Karsten Hopp c80075
      return OK;
Karsten Hopp c80075
  }
Karsten Hopp c80075
  
Karsten Hopp c80075
- typedef union
Karsten Hopp c80075
- {
Karsten Hopp c80075
-     struct multipos
Karsten Hopp c80075
-     {
Karsten Hopp c80075
- 	lpos_T	start;
Karsten Hopp c80075
- 	lpos_T	end;
Karsten Hopp c80075
-     } multilist[NSUBEXP];
Karsten Hopp c80075
-     struct linepos
Karsten Hopp c80075
-     {
Karsten Hopp c80075
- 	char_u	*start;
Karsten Hopp c80075
- 	char_u	*end;
Karsten Hopp c80075
-     } linelist[NSUBEXP];
Karsten Hopp c80075
- } regsub_T;
Karsten Hopp c80075
- 
Karsten Hopp c80075
- static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m));
Karsten Hopp c80075
- 
Karsten Hopp c80075
  #ifdef DEBUG
Karsten Hopp c80075
  static char_u code[50];
Karsten Hopp c80075
  
Karsten Hopp c80075
--- 1649,1654 ----
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 2489,2494 ****
Karsten Hopp c80075
--- 2473,2498 ----
Karsten Hopp c80075
   * NFA execution code.
Karsten Hopp c80075
   ****************************************************************/
Karsten Hopp c80075
  
Karsten Hopp c80075
+ typedef struct
Karsten Hopp c80075
+ {
Karsten Hopp c80075
+     int	    in_use; /* number of subexpr with useful info */
Karsten Hopp c80075
+ 
Karsten Hopp c80075
+     /* When REG_MULTI is TRUE multilist is used, otherwise linelist. */
Karsten Hopp c80075
+     union
Karsten Hopp c80075
+     {
Karsten Hopp c80075
+ 	struct multipos
Karsten Hopp c80075
+ 	{
Karsten Hopp c80075
+ 	    lpos_T	start;
Karsten Hopp c80075
+ 	    lpos_T	end;
Karsten Hopp c80075
+ 	} multilist[NSUBEXP];
Karsten Hopp c80075
+ 	struct linepos
Karsten Hopp c80075
+ 	{
Karsten Hopp c80075
+ 	    char_u	*start;
Karsten Hopp c80075
+ 	    char_u	*end;
Karsten Hopp c80075
+ 	} linelist[NSUBEXP];
Karsten Hopp c80075
+     };
Karsten Hopp c80075
+ } regsub_T;
Karsten Hopp c80075
+ 
Karsten Hopp c80075
  /* nfa_thread_T contains execution information of a NFA state */
Karsten Hopp c80075
  typedef struct
Karsten Hopp c80075
  {
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 2507,2513 ****
Karsten Hopp c80075
  static int nfa_match;
Karsten Hopp c80075
  
Karsten Hopp c80075
  static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid));
Karsten Hopp c80075
- 
Karsten Hopp c80075
  static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *ip));
Karsten Hopp c80075
  
Karsten Hopp c80075
      static void
Karsten Hopp c80075
--- 2511,2516 ----
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 2521,2527 ****
Karsten Hopp c80075
--- 2524,2532 ----
Karsten Hopp c80075
      int			subidx;
Karsten Hopp c80075
      nfa_thread_T	*lastthread;
Karsten Hopp c80075
      lpos_T		save_lpos;
Karsten Hopp c80075
+     int			save_in_use;
Karsten Hopp c80075
      char_u		*save_ptr;
Karsten Hopp c80075
+     int			i;
Karsten Hopp c80075
  
Karsten Hopp c80075
      if (l == NULL || state == NULL)
Karsten Hopp c80075
  	return;
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 2557,2572 ****
Karsten Hopp c80075
  		state->lastlist = lid;
Karsten Hopp c80075
  		lastthread = &l->t[l->n++];
Karsten Hopp c80075
  		lastthread->state = state;
Karsten Hopp c80075
! 
Karsten Hopp c80075
! 		/* Copy the match start and end positions. */
Karsten Hopp c80075
! 		if (REG_MULTI)
Karsten Hopp c80075
! 		    mch_memmove(&lastthread->sub.multilist[0],
Karsten Hopp c80075
! 			        &m->multilist[0],
Karsten Hopp c80075
! 				sizeof(struct multipos) * nfa_nsubexpr);
Karsten Hopp c80075
! 		else
Karsten Hopp c80075
! 		    mch_memmove(&lastthread->sub.linelist[0],
Karsten Hopp c80075
! 			        &m->linelist[0],
Karsten Hopp c80075
! 			        sizeof(struct linepos) * nfa_nsubexpr);
Karsten Hopp c80075
  	    }
Karsten Hopp c80075
      }
Karsten Hopp c80075
  
Karsten Hopp c80075
--- 2562,2580 ----
Karsten Hopp c80075
  		state->lastlist = lid;
Karsten Hopp c80075
  		lastthread = &l->t[l->n++];
Karsten Hopp c80075
  		lastthread->state = state;
Karsten Hopp c80075
! 		lastthread->sub.in_use = m->in_use;
Karsten Hopp c80075
! 		if (m->in_use > 0)
Karsten Hopp c80075
! 		{
Karsten Hopp c80075
! 		    /* Copy the match start and end positions. */
Karsten Hopp c80075
! 		    if (REG_MULTI)
Karsten Hopp c80075
! 			mch_memmove(&lastthread->sub.multilist[0],
Karsten Hopp c80075
! 				    &m->multilist[0],
Karsten Hopp c80075
! 				    sizeof(struct multipos) * m->in_use);
Karsten Hopp c80075
! 		    else
Karsten Hopp c80075
! 			mch_memmove(&lastthread->sub.linelist[0],
Karsten Hopp c80075
! 				    &m->linelist[0],
Karsten Hopp c80075
! 				    sizeof(struct linepos) * m->in_use);
Karsten Hopp c80075
! 		}
Karsten Hopp c80075
  	    }
Karsten Hopp c80075
      }
Karsten Hopp c80075
  
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 2636,2644 ****
Karsten Hopp c80075
  	    else
Karsten Hopp c80075
  		subidx = state->c - NFA_MOPEN;
Karsten Hopp c80075
  
Karsten Hopp c80075
  	    if (REG_MULTI)
Karsten Hopp c80075
  	    {
Karsten Hopp c80075
! 		save_lpos = m->multilist[subidx].start;
Karsten Hopp c80075
  		if (off == -1)
Karsten Hopp c80075
  		{
Karsten Hopp c80075
  		    m->multilist[subidx].start.lnum = reglnum + 1;
Karsten Hopp c80075
--- 2644,2668 ----
Karsten Hopp c80075
  	    else
Karsten Hopp c80075
  		subidx = state->c - NFA_MOPEN;
Karsten Hopp c80075
  
Karsten Hopp c80075
+ 	    /* Set the position (with "off") in the subexpression.  Save and
Karsten Hopp c80075
+ 	     * restore it when it was in use.  Otherwise fill any gap. */
Karsten Hopp c80075
  	    if (REG_MULTI)
Karsten Hopp c80075
  	    {
Karsten Hopp c80075
! 		if (subidx < m->in_use)
Karsten Hopp c80075
! 		{
Karsten Hopp c80075
! 		    save_lpos = m->multilist[subidx].start;
Karsten Hopp c80075
! 		    save_in_use = -1;
Karsten Hopp c80075
! 		}
Karsten Hopp c80075
! 		else
Karsten Hopp c80075
! 		{
Karsten Hopp c80075
! 		    save_in_use = m->in_use;
Karsten Hopp c80075
! 		    for (i = m->in_use; i < subidx; ++i)
Karsten Hopp c80075
! 		    {
Karsten Hopp c80075
! 			m->multilist[i].start.lnum = -1;
Karsten Hopp c80075
! 			m->multilist[i].end.lnum = -1;
Karsten Hopp c80075
! 		    }
Karsten Hopp c80075
! 		    m->in_use = subidx + 1;
Karsten Hopp c80075
! 		}
Karsten Hopp c80075
  		if (off == -1)
Karsten Hopp c80075
  		{
Karsten Hopp c80075
  		    m->multilist[subidx].start.lnum = reglnum + 1;
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 2653,2668 ****
Karsten Hopp c80075
  	    }
Karsten Hopp c80075
  	    else
Karsten Hopp c80075
  	    {
Karsten Hopp c80075
! 		save_ptr = m->linelist[subidx].start;
Karsten Hopp c80075
  		m->linelist[subidx].start = reginput + off;
Karsten Hopp c80075
  	    }
Karsten Hopp c80075
  
Karsten Hopp c80075
  	    addstate(l, state->out, m, off, lid);
Karsten Hopp c80075
  
Karsten Hopp c80075
! 	    if (REG_MULTI)
Karsten Hopp c80075
! 		m->multilist[subidx].start = save_lpos;
Karsten Hopp c80075
  	    else
Karsten Hopp c80075
! 		m->linelist[subidx].start = save_ptr;
Karsten Hopp c80075
  	    break;
Karsten Hopp c80075
  
Karsten Hopp c80075
  	case NFA_MCLOSE + 0:
Karsten Hopp c80075
--- 2677,2711 ----
Karsten Hopp c80075
  	    }
Karsten Hopp c80075
  	    else
Karsten Hopp c80075
  	    {
Karsten Hopp c80075
! 		if (subidx < m->in_use)
Karsten Hopp c80075
! 		{
Karsten Hopp c80075
! 		    save_ptr = m->linelist[subidx].start;
Karsten Hopp c80075
! 		    save_in_use = -1;
Karsten Hopp c80075
! 		}
Karsten Hopp c80075
! 		else
Karsten Hopp c80075
! 		{
Karsten Hopp c80075
! 		    save_in_use = m->in_use;
Karsten Hopp c80075
! 		    for (i = m->in_use; i < subidx; ++i)
Karsten Hopp c80075
! 		    {
Karsten Hopp c80075
! 			m->linelist[i].start = NULL;
Karsten Hopp c80075
! 			m->linelist[i].end = NULL;
Karsten Hopp c80075
! 		    }
Karsten Hopp c80075
! 		    m->in_use = subidx + 1;
Karsten Hopp c80075
! 		}
Karsten Hopp c80075
  		m->linelist[subidx].start = reginput + off;
Karsten Hopp c80075
  	    }
Karsten Hopp c80075
  
Karsten Hopp c80075
  	    addstate(l, state->out, m, off, lid);
Karsten Hopp c80075
  
Karsten Hopp c80075
! 	    if (save_in_use == -1)
Karsten Hopp c80075
! 	    {
Karsten Hopp c80075
! 		if (REG_MULTI)
Karsten Hopp c80075
! 		    m->multilist[subidx].start = save_lpos;
Karsten Hopp c80075
! 		else
Karsten Hopp c80075
! 		    m->linelist[subidx].start = save_ptr;
Karsten Hopp c80075
! 	    }
Karsten Hopp c80075
  	    else
Karsten Hopp c80075
! 		m->in_use = save_in_use;
Karsten Hopp c80075
  	    break;
Karsten Hopp c80075
  
Karsten Hopp c80075
  	case NFA_MCLOSE + 0:
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 2686,2691 ****
Karsten Hopp c80075
--- 2729,2739 ----
Karsten Hopp c80075
  	    else
Karsten Hopp c80075
  		subidx = state->c - NFA_MCLOSE;
Karsten Hopp c80075
  
Karsten Hopp c80075
+ 	    /* We don't fill in gaps here, there must have been an MOPEN that
Karsten Hopp c80075
+ 	     * has done that. */
Karsten Hopp c80075
+ 	    save_in_use = m->in_use;
Karsten Hopp c80075
+ 	    if (m->in_use <= subidx)
Karsten Hopp c80075
+ 		m->in_use = subidx + 1;
Karsten Hopp c80075
  	    if (REG_MULTI)
Karsten Hopp c80075
  	    {
Karsten Hopp c80075
  		save_lpos = m->multilist[subidx].end;
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 2713,2718 ****
Karsten Hopp c80075
--- 2761,2767 ----
Karsten Hopp c80075
  		m->multilist[subidx].end = save_lpos;
Karsten Hopp c80075
  	    else
Karsten Hopp c80075
  		m->linelist[subidx].end = save_ptr;
Karsten Hopp c80075
+ 	    m->in_use = save_in_use;
Karsten Hopp c80075
  	    break;
Karsten Hopp c80075
      }
Karsten Hopp c80075
  }
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 2917,2922 ****
Karsten Hopp c80075
--- 2966,2973 ----
Karsten Hopp c80075
      }
Karsten Hopp c80075
  }
Karsten Hopp c80075
  
Karsten Hopp c80075
+ static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m));
Karsten Hopp c80075
+ 
Karsten Hopp c80075
  /*
Karsten Hopp c80075
   * Main matching routine.
Karsten Hopp c80075
   *
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 2960,2966 ****
Karsten Hopp c80075
  #endif
Karsten Hopp c80075
      nfa_match = FALSE;
Karsten Hopp c80075
  
Karsten Hopp c80075
!     /* Allocate memory for the lists of nodes */
Karsten Hopp c80075
      size = (nstate + 1) * sizeof(nfa_thread_T);
Karsten Hopp c80075
      list[0].t = (nfa_thread_T *)lalloc(size, TRUE);
Karsten Hopp c80075
      list[1].t = (nfa_thread_T *)lalloc(size, TRUE);
Karsten Hopp c80075
--- 3011,3017 ----
Karsten Hopp c80075
  #endif
Karsten Hopp c80075
      nfa_match = FALSE;
Karsten Hopp c80075
  
Karsten Hopp c80075
!     /* Allocate memory for the lists of nodes. */
Karsten Hopp c80075
      size = (nstate + 1) * sizeof(nfa_thread_T);
Karsten Hopp c80075
      list[0].t = (nfa_thread_T *)lalloc(size, TRUE);
Karsten Hopp c80075
      list[1].t = (nfa_thread_T *)lalloc(size, TRUE);
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 3099,3105 ****
Karsten Hopp c80075
  	    {
Karsten Hopp c80075
  	    case NFA_MATCH:
Karsten Hopp c80075
  		nfa_match = TRUE;
Karsten Hopp c80075
! 		*submatch = t->sub;
Karsten Hopp c80075
  #ifdef ENABLE_LOG
Karsten Hopp c80075
  		for (j = 0; j < 4; j++)
Karsten Hopp c80075
  		    if (REG_MULTI)
Karsten Hopp c80075
--- 3150,3168 ----
Karsten Hopp c80075
  	    {
Karsten Hopp c80075
  	    case NFA_MATCH:
Karsten Hopp c80075
  		nfa_match = TRUE;
Karsten Hopp c80075
! 		submatch->in_use = t->sub.in_use;
Karsten Hopp c80075
! 		if (REG_MULTI)
Karsten Hopp c80075
! 		    for (j = 0; j < submatch->in_use; j++)
Karsten Hopp c80075
! 		    {
Karsten Hopp c80075
! 			submatch->multilist[j].start = t->sub.multilist[j].start;
Karsten Hopp c80075
! 			submatch->multilist[j].end = t->sub.multilist[j].end;
Karsten Hopp c80075
! 		    }
Karsten Hopp c80075
! 		else
Karsten Hopp c80075
! 		    for (j = 0; j < submatch->in_use; j++)
Karsten Hopp c80075
! 		    {
Karsten Hopp c80075
! 			submatch->linelist[j].start = t->sub.linelist[j].start;
Karsten Hopp c80075
! 			submatch->linelist[j].end = t->sub.linelist[j].end;
Karsten Hopp c80075
! 		    }
Karsten Hopp c80075
  #ifdef ENABLE_LOG
Karsten Hopp c80075
  		for (j = 0; j < 4; j++)
Karsten Hopp c80075
  		    if (REG_MULTI)
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 3194,3210 ****
Karsten Hopp c80075
  		    reglnum = old_reglnum;
Karsten Hopp c80075
  		    /* Copy submatch info from the recursive call */
Karsten Hopp c80075
  		    if (REG_MULTI)
Karsten Hopp c80075
! 			for (j = 1; j < nfa_nsubexpr; j++)
Karsten Hopp c80075
  			{
Karsten Hopp c80075
  			    t->sub.multilist[j].start = m->multilist[j].start;
Karsten Hopp c80075
  			    t->sub.multilist[j].end = m->multilist[j].end;
Karsten Hopp c80075
  			}
Karsten Hopp c80075
  		    else
Karsten Hopp c80075
! 			for (j = 1; j < nfa_nsubexpr; j++)
Karsten Hopp c80075
  			{
Karsten Hopp c80075
  			    t->sub.linelist[j].start = m->linelist[j].start;
Karsten Hopp c80075
  			    t->sub.linelist[j].end = m->linelist[j].end;
Karsten Hopp c80075
  			}
Karsten Hopp c80075
  		    /* t->state->out1 is the corresponding END_INVISIBLE node */
Karsten Hopp c80075
  		    addstate_here(thislist, t->state->out1->out, &t->sub,
Karsten Hopp c80075
  							    listid, &listidx);
Karsten Hopp c80075
--- 3257,3275 ----
Karsten Hopp c80075
  		    reglnum = old_reglnum;
Karsten Hopp c80075
  		    /* Copy submatch info from the recursive call */
Karsten Hopp c80075
  		    if (REG_MULTI)
Karsten Hopp c80075
! 			for (j = 1; j < m->in_use; j++)
Karsten Hopp c80075
  			{
Karsten Hopp c80075
  			    t->sub.multilist[j].start = m->multilist[j].start;
Karsten Hopp c80075
  			    t->sub.multilist[j].end = m->multilist[j].end;
Karsten Hopp c80075
  			}
Karsten Hopp c80075
  		    else
Karsten Hopp c80075
! 			for (j = 1; j < m->in_use; j++)
Karsten Hopp c80075
  			{
Karsten Hopp c80075
  			    t->sub.linelist[j].start = m->linelist[j].start;
Karsten Hopp c80075
  			    t->sub.linelist[j].end = m->linelist[j].end;
Karsten Hopp c80075
  			}
Karsten Hopp c80075
+ 		    t->sub.in_use = m->in_use;
Karsten Hopp c80075
+ 
Karsten Hopp c80075
  		    /* t->state->out1 is the corresponding END_INVISIBLE node */
Karsten Hopp c80075
  		    addstate_here(thislist, t->state->out1->out, &t->sub,
Karsten Hopp c80075
  							    listid, &listidx);
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 3703,3708 ****
Karsten Hopp c80075
--- 3768,3775 ----
Karsten Hopp c80075
  	vim_memset(sub.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr);
Karsten Hopp c80075
  	vim_memset(m.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr);
Karsten Hopp c80075
      }
Karsten Hopp c80075
+     sub.in_use = 0;
Karsten Hopp c80075
+     m.in_use = 0;
Karsten Hopp c80075
  
Karsten Hopp c80075
      if (nfa_regmatch(start, &sub, &m) == FALSE)
Karsten Hopp c80075
  	return 0;
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 3710,3716 ****
Karsten Hopp c80075
      cleanup_subexpr();
Karsten Hopp c80075
      if (REG_MULTI)
Karsten Hopp c80075
      {
Karsten Hopp c80075
! 	for (i = 0; i < nfa_nsubexpr; i++)
Karsten Hopp c80075
  	{
Karsten Hopp c80075
  	    reg_startpos[i] = sub.multilist[i].start;
Karsten Hopp c80075
  	    reg_endpos[i] = sub.multilist[i].end;
Karsten Hopp c80075
--- 3777,3783 ----
Karsten Hopp c80075
      cleanup_subexpr();
Karsten Hopp c80075
      if (REG_MULTI)
Karsten Hopp c80075
      {
Karsten Hopp c80075
! 	for (i = 0; i < sub.in_use; i++)
Karsten Hopp c80075
  	{
Karsten Hopp c80075
  	    reg_startpos[i] = sub.multilist[i].start;
Karsten Hopp c80075
  	    reg_endpos[i] = sub.multilist[i].end;
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 3732,3738 ****
Karsten Hopp c80075
      }
Karsten Hopp c80075
      else
Karsten Hopp c80075
      {
Karsten Hopp c80075
! 	for (i = 0; i < nfa_nsubexpr; i++)
Karsten Hopp c80075
  	{
Karsten Hopp c80075
  	    reg_startp[i] = sub.linelist[i].start;
Karsten Hopp c80075
  	    reg_endp[i] = sub.linelist[i].end;
Karsten Hopp c80075
--- 3799,3805 ----
Karsten Hopp c80075
      }
Karsten Hopp c80075
      else
Karsten Hopp c80075
      {
Karsten Hopp c80075
! 	for (i = 0; i < sub.in_use; i++)
Karsten Hopp c80075
  	{
Karsten Hopp c80075
  	    reg_startp[i] = sub.linelist[i].start;
Karsten Hopp c80075
  	    reg_endp[i] = sub.linelist[i].end;
Karsten Hopp c80075
*** ../vim-7.3.1028/src/version.c	2013-05-26 21:47:22.000000000 +0200
Karsten Hopp c80075
--- src/version.c	2013-05-26 22:53:55.000000000 +0200
Karsten Hopp c80075
***************
Karsten Hopp c80075
*** 730,731 ****
Karsten Hopp c80075
--- 730,733 ----
Karsten Hopp c80075
  {   /* Add new patch number below this line */
Karsten Hopp c80075
+ /**/
Karsten Hopp c80075
+     1029,
Karsten Hopp c80075
  /**/
Karsten Hopp c80075
Karsten Hopp c80075
-- 
Karsten Hopp c80075
I used to be indecisive, now I'm not sure.
Karsten Hopp c80075
Karsten Hopp c80075
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp c80075
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp c80075
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
Karsten Hopp c80075
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///