Blob Blame History Raw
To: vim_dev@googlegroups.com
Subject: Patch 7.3.1029
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------

Patch 7.3.1029
Problem:    New regexp performance: Unused position state being copied.
Solution:   Keep track of which positions are actually valid.
Files:	    src/regexp_nfa.c


*** ../vim-7.3.1028/src/regexp_nfa.c	2013-05-26 21:47:22.000000000 +0200
--- src/regexp_nfa.c	2013-05-26 22:45:26.000000000 +0200
***************
*** 1649,1670 ****
      return OK;
  }
  
- typedef union
- {
-     struct multipos
-     {
- 	lpos_T	start;
- 	lpos_T	end;
-     } multilist[NSUBEXP];
-     struct linepos
-     {
- 	char_u	*start;
- 	char_u	*end;
-     } linelist[NSUBEXP];
- } regsub_T;
- 
- static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m));
- 
  #ifdef DEBUG
  static char_u code[50];
  
--- 1649,1654 ----
***************
*** 2489,2494 ****
--- 2473,2498 ----
   * NFA execution code.
   ****************************************************************/
  
+ typedef struct
+ {
+     int	    in_use; /* number of subexpr with useful info */
+ 
+     /* When REG_MULTI is TRUE multilist is used, otherwise linelist. */
+     union
+     {
+ 	struct multipos
+ 	{
+ 	    lpos_T	start;
+ 	    lpos_T	end;
+ 	} multilist[NSUBEXP];
+ 	struct linepos
+ 	{
+ 	    char_u	*start;
+ 	    char_u	*end;
+ 	} linelist[NSUBEXP];
+     };
+ } regsub_T;
+ 
  /* nfa_thread_T contains execution information of a NFA state */
  typedef struct
  {
***************
*** 2507,2513 ****
  static int nfa_match;
  
  static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid));
- 
  static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *ip));
  
      static void
--- 2511,2516 ----
***************
*** 2521,2527 ****
--- 2524,2532 ----
      int			subidx;
      nfa_thread_T	*lastthread;
      lpos_T		save_lpos;
+     int			save_in_use;
      char_u		*save_ptr;
+     int			i;
  
      if (l == NULL || state == NULL)
  	return;
***************
*** 2557,2572 ****
  		state->lastlist = lid;
  		lastthread = &l->t[l->n++];
  		lastthread->state = state;
! 
! 		/* Copy the match start and end positions. */
! 		if (REG_MULTI)
! 		    mch_memmove(&lastthread->sub.multilist[0],
! 			        &m->multilist[0],
! 				sizeof(struct multipos) * nfa_nsubexpr);
! 		else
! 		    mch_memmove(&lastthread->sub.linelist[0],
! 			        &m->linelist[0],
! 			        sizeof(struct linepos) * nfa_nsubexpr);
  	    }
      }
  
--- 2562,2580 ----
  		state->lastlist = lid;
  		lastthread = &l->t[l->n++];
  		lastthread->state = state;
! 		lastthread->sub.in_use = m->in_use;
! 		if (m->in_use > 0)
! 		{
! 		    /* Copy the match start and end positions. */
! 		    if (REG_MULTI)
! 			mch_memmove(&lastthread->sub.multilist[0],
! 				    &m->multilist[0],
! 				    sizeof(struct multipos) * m->in_use);
! 		    else
! 			mch_memmove(&lastthread->sub.linelist[0],
! 				    &m->linelist[0],
! 				    sizeof(struct linepos) * m->in_use);
! 		}
  	    }
      }
  
***************
*** 2636,2644 ****
  	    else
  		subidx = state->c - NFA_MOPEN;
  
  	    if (REG_MULTI)
  	    {
! 		save_lpos = m->multilist[subidx].start;
  		if (off == -1)
  		{
  		    m->multilist[subidx].start.lnum = reglnum + 1;
--- 2644,2668 ----
  	    else
  		subidx = state->c - NFA_MOPEN;
  
+ 	    /* Set the position (with "off") in the subexpression.  Save and
+ 	     * restore it when it was in use.  Otherwise fill any gap. */
  	    if (REG_MULTI)
  	    {
! 		if (subidx < m->in_use)
! 		{
! 		    save_lpos = m->multilist[subidx].start;
! 		    save_in_use = -1;
! 		}
! 		else
! 		{
! 		    save_in_use = m->in_use;
! 		    for (i = m->in_use; i < subidx; ++i)
! 		    {
! 			m->multilist[i].start.lnum = -1;
! 			m->multilist[i].end.lnum = -1;
! 		    }
! 		    m->in_use = subidx + 1;
! 		}
  		if (off == -1)
  		{
  		    m->multilist[subidx].start.lnum = reglnum + 1;
***************
*** 2653,2668 ****
  	    }
  	    else
  	    {
! 		save_ptr = m->linelist[subidx].start;
  		m->linelist[subidx].start = reginput + off;
  	    }
  
  	    addstate(l, state->out, m, off, lid);
  
! 	    if (REG_MULTI)
! 		m->multilist[subidx].start = save_lpos;
  	    else
! 		m->linelist[subidx].start = save_ptr;
  	    break;
  
  	case NFA_MCLOSE + 0:
--- 2677,2711 ----
  	    }
  	    else
  	    {
! 		if (subidx < m->in_use)
! 		{
! 		    save_ptr = m->linelist[subidx].start;
! 		    save_in_use = -1;
! 		}
! 		else
! 		{
! 		    save_in_use = m->in_use;
! 		    for (i = m->in_use; i < subidx; ++i)
! 		    {
! 			m->linelist[i].start = NULL;
! 			m->linelist[i].end = NULL;
! 		    }
! 		    m->in_use = subidx + 1;
! 		}
  		m->linelist[subidx].start = reginput + off;
  	    }
  
  	    addstate(l, state->out, m, off, lid);
  
! 	    if (save_in_use == -1)
! 	    {
! 		if (REG_MULTI)
! 		    m->multilist[subidx].start = save_lpos;
! 		else
! 		    m->linelist[subidx].start = save_ptr;
! 	    }
  	    else
! 		m->in_use = save_in_use;
  	    break;
  
  	case NFA_MCLOSE + 0:
***************
*** 2686,2691 ****
--- 2729,2739 ----
  	    else
  		subidx = state->c - NFA_MCLOSE;
  
+ 	    /* We don't fill in gaps here, there must have been an MOPEN that
+ 	     * has done that. */
+ 	    save_in_use = m->in_use;
+ 	    if (m->in_use <= subidx)
+ 		m->in_use = subidx + 1;
  	    if (REG_MULTI)
  	    {
  		save_lpos = m->multilist[subidx].end;
***************
*** 2713,2718 ****
--- 2761,2767 ----
  		m->multilist[subidx].end = save_lpos;
  	    else
  		m->linelist[subidx].end = save_ptr;
+ 	    m->in_use = save_in_use;
  	    break;
      }
  }
***************
*** 2917,2922 ****
--- 2966,2973 ----
      }
  }
  
+ static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m));
+ 
  /*
   * Main matching routine.
   *
***************
*** 2960,2966 ****
  #endif
      nfa_match = FALSE;
  
!     /* Allocate memory for the lists of nodes */
      size = (nstate + 1) * sizeof(nfa_thread_T);
      list[0].t = (nfa_thread_T *)lalloc(size, TRUE);
      list[1].t = (nfa_thread_T *)lalloc(size, TRUE);
--- 3011,3017 ----
  #endif
      nfa_match = FALSE;
  
!     /* Allocate memory for the lists of nodes. */
      size = (nstate + 1) * sizeof(nfa_thread_T);
      list[0].t = (nfa_thread_T *)lalloc(size, TRUE);
      list[1].t = (nfa_thread_T *)lalloc(size, TRUE);
***************
*** 3099,3105 ****
  	    {
  	    case NFA_MATCH:
  		nfa_match = TRUE;
! 		*submatch = t->sub;
  #ifdef ENABLE_LOG
  		for (j = 0; j < 4; j++)
  		    if (REG_MULTI)
--- 3150,3168 ----
  	    {
  	    case NFA_MATCH:
  		nfa_match = TRUE;
! 		submatch->in_use = t->sub.in_use;
! 		if (REG_MULTI)
! 		    for (j = 0; j < submatch->in_use; j++)
! 		    {
! 			submatch->multilist[j].start = t->sub.multilist[j].start;
! 			submatch->multilist[j].end = t->sub.multilist[j].end;
! 		    }
! 		else
! 		    for (j = 0; j < submatch->in_use; j++)
! 		    {
! 			submatch->linelist[j].start = t->sub.linelist[j].start;
! 			submatch->linelist[j].end = t->sub.linelist[j].end;
! 		    }
  #ifdef ENABLE_LOG
  		for (j = 0; j < 4; j++)
  		    if (REG_MULTI)
***************
*** 3194,3210 ****
  		    reglnum = old_reglnum;
  		    /* Copy submatch info from the recursive call */
  		    if (REG_MULTI)
! 			for (j = 1; j < nfa_nsubexpr; j++)
  			{
  			    t->sub.multilist[j].start = m->multilist[j].start;
  			    t->sub.multilist[j].end = m->multilist[j].end;
  			}
  		    else
! 			for (j = 1; j < nfa_nsubexpr; j++)
  			{
  			    t->sub.linelist[j].start = m->linelist[j].start;
  			    t->sub.linelist[j].end = m->linelist[j].end;
  			}
  		    /* t->state->out1 is the corresponding END_INVISIBLE node */
  		    addstate_here(thislist, t->state->out1->out, &t->sub,
  							    listid, &listidx);
--- 3257,3275 ----
  		    reglnum = old_reglnum;
  		    /* Copy submatch info from the recursive call */
  		    if (REG_MULTI)
! 			for (j = 1; j < m->in_use; j++)
  			{
  			    t->sub.multilist[j].start = m->multilist[j].start;
  			    t->sub.multilist[j].end = m->multilist[j].end;
  			}
  		    else
! 			for (j = 1; j < m->in_use; j++)
  			{
  			    t->sub.linelist[j].start = m->linelist[j].start;
  			    t->sub.linelist[j].end = m->linelist[j].end;
  			}
+ 		    t->sub.in_use = m->in_use;
+ 
  		    /* t->state->out1 is the corresponding END_INVISIBLE node */
  		    addstate_here(thislist, t->state->out1->out, &t->sub,
  							    listid, &listidx);
***************
*** 3703,3708 ****
--- 3768,3775 ----
  	vim_memset(sub.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr);
  	vim_memset(m.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr);
      }
+     sub.in_use = 0;
+     m.in_use = 0;
  
      if (nfa_regmatch(start, &sub, &m) == FALSE)
  	return 0;
***************
*** 3710,3716 ****
      cleanup_subexpr();
      if (REG_MULTI)
      {
! 	for (i = 0; i < nfa_nsubexpr; i++)
  	{
  	    reg_startpos[i] = sub.multilist[i].start;
  	    reg_endpos[i] = sub.multilist[i].end;
--- 3777,3783 ----
      cleanup_subexpr();
      if (REG_MULTI)
      {
! 	for (i = 0; i < sub.in_use; i++)
  	{
  	    reg_startpos[i] = sub.multilist[i].start;
  	    reg_endpos[i] = sub.multilist[i].end;
***************
*** 3732,3738 ****
      }
      else
      {
! 	for (i = 0; i < nfa_nsubexpr; i++)
  	{
  	    reg_startp[i] = sub.linelist[i].start;
  	    reg_endp[i] = sub.linelist[i].end;
--- 3799,3805 ----
      }
      else
      {
! 	for (i = 0; i < sub.in_use; i++)
  	{
  	    reg_startp[i] = sub.linelist[i].start;
  	    reg_endp[i] = sub.linelist[i].end;
*** ../vim-7.3.1028/src/version.c	2013-05-26 21:47:22.000000000 +0200
--- src/version.c	2013-05-26 22:53:55.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
  {   /* Add new patch number below this line */
+ /**/
+     1029,
  /**/

-- 
I used to be indecisive, now I'm not sure.

 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///