To: vim_dev@googlegroups.com
Subject: Patch 7.3.1103
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------

Patch 7.3.1103
Problem:    New regexp engine: overhead in saving and restoring.
Solution:   Make saving and restoring list IDs faster.  Don't copy or check \z
	    subexpressions when they are not used.
Files:	    src/regexp_nfa.c


*** ../vim-7.3.1102/src/regexp_nfa.c	2013-06-02 16:40:44.000000000 +0200
--- src/regexp_nfa.c	2013-06-02 21:00:41.000000000 +0200
***************
*** 237,242 ****
--- 237,245 ----
  /* NFA regexp \1 .. \9 encountered. */
  static int nfa_has_backref;
  
+ /* NFA regexp has \z( ), set zsubexpr. */
+ static int nfa_has_zsubexpr;
+ 
  /* Number of sub expressions actually being used during execution. 1 if only
   * the whole match (subexpr 0) is used. */
  static int nfa_nsubexpr;
***************
*** 272,281 ****
  static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size));
  static int check_char_class __ARGS((int class, int c));
  static void st_error __ARGS((int *postfix, int *end, int *p));
! static void nfa_set_neg_listids __ARGS((nfa_state_T *start));
! static void nfa_set_null_listids __ARGS((nfa_state_T *start));
! static void nfa_save_listids __ARGS((nfa_state_T *start, int *list));
! static void nfa_restore_listids __ARGS((nfa_state_T *start, int *list));
  static int nfa_re_num_cmp __ARGS((long_u val, int op, long_u pos));
  static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col));
  static long nfa_regexec_both __ARGS((char_u *line, colnr_T col));
--- 275,282 ----
  static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size));
  static int check_char_class __ARGS((int class, int c));
  static void st_error __ARGS((int *postfix, int *end, int *p));
! static void nfa_save_listids __ARGS((nfa_regprog_T *prog, int *list));
! static void nfa_restore_listids __ARGS((nfa_regprog_T *prog, int *list));
  static int nfa_re_num_cmp __ARGS((long_u val, int op, long_u pos));
  static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col));
  static long nfa_regexec_both __ARGS((char_u *line, colnr_T col));
***************
*** 3000,3005 ****
--- 3001,3024 ----
      return TRUE;
  }
  
+ #ifdef ENABLE_LOG
+     static void
+ report_state(char *action, regsub_T *sub, nfa_state_T *state, int lid);
+ {
+     int col;
+ 
+     if (sub->in_use <= 0)
+ 	col = -1;
+     else if (REG_MULTI)
+ 	col = sub->list.multi[0].start.col;
+     else
+ 	col = (int)(sub->list.line[0].start - regline);
+     nfa_set_code(state->c);
+     fprintf(log_fd, "> %s state %d to list %d. char %d: %s (start col %d)\n",
+ 	    action, abs(state->id), lid, state->c, code, col);
+ }
+ #endif
+ 
      static void
  addstate(l, state, subs, off)
      nfa_list_T		*l;	/* runtime state list */
***************
*** 3118,3124 ****
  		    if (thread->state->id == state->id
  			    && sub_equal(&thread->subs.norm, &subs->norm)
  #ifdef FEAT_SYN_HL
! 			    && sub_equal(&thread->subs.synt, &subs->synt)
  #endif
  					  )
  			goto skip_add;
--- 3137,3144 ----
  		    if (thread->state->id == state->id
  			    && sub_equal(&thread->subs.norm, &subs->norm)
  #ifdef FEAT_SYN_HL
! 			    && (!nfa_has_zsubexpr ||
! 				   sub_equal(&thread->subs.synt, &subs->synt))
  #endif
  					  )
  			goto skip_add;
***************
*** 3141,3181 ****
  	    thread->state = state;
  	    copy_sub(&thread->subs.norm, &subs->norm);
  #ifdef FEAT_SYN_HL
! 	    copy_sub(&thread->subs.synt, &subs->synt);
  #endif
  #ifdef ENABLE_LOG
! 	    {
! 		int col;
! 
! 		if (thread->subs.norm.in_use <= 0)
! 		    col = -1;
! 		else if (REG_MULTI)
! 		    col = thread->subs.norm.list.multi[0].start.col;
! 		else
! 		    col = (int)(thread->subs.norm.list.line[0].start - regline);
! 		nfa_set_code(state->c);
! 		fprintf(log_fd, "> Adding state %d to list %d. char %d: %s (start col %d)\n",
! 		        abs(state->id), l->id, state->c, code, col);
! 		did_print = TRUE;
! 	    }
  #endif
      }
  
  #ifdef ENABLE_LOG
      if (!did_print)
!     {
! 	int col;
! 
! 	if (subs->norm.in_use <= 0)
! 	    col = -1;
! 	else if (REG_MULTI)
! 	    col = subs->norm.list.multi[0].start.col;
! 	else
! 	    col = (int)(subs->norm.list.line[0].start - regline);
! 	nfa_set_code(state->c);
! 	fprintf(log_fd, "> Processing state %d for list %d. char %d: %s (start col %d)\n",
! 		abs(state->id), l->id, state->c, code, col);
!     }
  #endif
      switch (state->c)
      {
--- 3161,3178 ----
  	    thread->state = state;
  	    copy_sub(&thread->subs.norm, &subs->norm);
  #ifdef FEAT_SYN_HL
! 	    if (nfa_has_zsubexpr)
! 		copy_sub(&thread->subs.synt, &subs->synt);
  #endif
  #ifdef ENABLE_LOG
! 	    report_state("Adding", &thread->subs.norm, state, l->id);
! 	    did_print = TRUE;
  #endif
      }
  
  #ifdef ENABLE_LOG
      if (!did_print)
! 	report_state("Processing", &subs->norm, state, l->id);
  #endif
      switch (state->c)
      {
***************
*** 3600,3648 ****
  #endif
  
  /*
!  * Set all NFA nodes' list ID equal to -1.
   */
      static void
! nfa_set_neg_listids(start)
!     nfa_state_T	    *start;
! {
!     if (start != NULL && start->lastlist >= 0)
!     {
! 	start->lastlist = -1;
! 	nfa_set_neg_listids(start->out);
! 	nfa_set_neg_listids(start->out1);
!     }
! }
! 
! /*
!  * Set all NFA nodes' list ID equal to 0.
!  */
!     static void
! nfa_set_null_listids(start)
!     nfa_state_T	    *start;
! {
!     if (start != NULL && start->lastlist == -1)
!     {
! 	start->lastlist = 0;
! 	nfa_set_null_listids(start->out);
! 	nfa_set_null_listids(start->out1);
!     }
! }
! 
! /*
!  * Save list IDs for all NFA states in "list".
!  */
!     static void
! nfa_save_listids(start, list)
!     nfa_state_T	    *start;
      int		    *list;
  {
!     if (start != NULL && start->lastlist != -1)
!     {
! 	list[abs(start->id)] = start->lastlist;
! 	start->lastlist = -1;
! 	nfa_save_listids(start->out, list);
! 	nfa_save_listids(start->out1, list);
      }
  }
  
--- 3597,3620 ----
  #endif
  
  /*
!  * Save list IDs for all NFA states of "prog" into "list".
!  * Also reset the IDs to zero.
   */
      static void
! nfa_save_listids(prog, list)
!     nfa_regprog_T   *prog;
      int		    *list;
  {
!     int		    i;
!     nfa_state_T	    *p;
! 
!     /* Order in the list is reverse, it's a bit faster that way. */
!     p = &prog->state[0];
!     for (i = prog->nstate; --i >= 0; )
!     {
! 	list[i] = p->lastlist;
! 	p->lastlist = 0;
! 	++p;
      }
  }
  
***************
*** 3650,3664 ****
   * Restore list IDs from "list" to all NFA states.
   */
      static void
! nfa_restore_listids(start, list)
!     nfa_state_T	    *start;
      int		    *list;
  {
!     if (start != NULL && start->lastlist == -1)
      {
! 	start->lastlist = list[abs(start->id)];
! 	nfa_restore_listids(start->out, list);
! 	nfa_restore_listids(start->out1, list);
      }
  }
  
--- 3622,3639 ----
   * Restore list IDs from "list" to all NFA states.
   */
      static void
! nfa_restore_listids(prog, list)
!     nfa_regprog_T   *prog;
      int		    *list;
  {
!     int		    i;
!     nfa_state_T	    *p;
! 
!     p = &prog->state[0];
!     for (i = prog->nstate; --i >= 0; )
      {
! 	p->lastlist = list[i];
! 	++p;
      }
  }
  
***************
*** 3673,3679 ****
      return val == pos;
  }
  
! static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m));
  
  /*
   * Main matching routine.
--- 3648,3654 ----
      return val == pos;
  }
  
! static int nfa_regmatch __ARGS((nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *submatch, regsubs_T *m));
  
  /*
   * Main matching routine.
***************
*** 3686,3692 ****
   * Note: Caller must ensure that: start != NULL.
   */
      static int
! nfa_regmatch(start, submatch, m)
      nfa_state_T		*start;
      regsubs_T		*submatch;
      regsubs_T		*m;
--- 3661,3668 ----
   * Note: Caller must ensure that: start != NULL.
   */
      static int
! nfa_regmatch(prog, start, submatch, m)
!     nfa_regprog_T	*prog;
      nfa_state_T		*start;
      regsubs_T		*submatch;
      regsubs_T		*m;
***************
*** 3872,3878 ****
  		nfa_match = TRUE;
  		copy_sub(&submatch->norm, &t->subs.norm);
  #ifdef FEAT_SYN_HL
! 		copy_sub(&submatch->synt, &t->subs.synt);
  #endif
  #ifdef ENABLE_LOG
  		log_subsexpr(&t->subs);
--- 3848,3855 ----
  		nfa_match = TRUE;
  		copy_sub(&submatch->norm, &t->subs.norm);
  #ifdef FEAT_SYN_HL
! 		if (nfa_has_zsubexpr)
! 		    copy_sub(&submatch->synt, &t->subs.synt);
  #endif
  #ifdef ENABLE_LOG
  		log_subsexpr(&t->subs);
***************
*** 3928,3934 ****
  		    {
  			copy_sub(&m->norm, &t->subs.norm);
  #ifdef FEAT_SYN_HL
! 			copy_sub(&m->synt, &t->subs.synt);
  #endif
  		    }
  		    nfa_match = TRUE;
--- 3905,3912 ----
  		    {
  			copy_sub(&m->norm, &t->subs.norm);
  #ifdef FEAT_SYN_HL
! 			if (nfa_has_zsubexpr)
! 			    copy_sub(&m->synt, &t->subs.synt);
  #endif
  		    }
  		    nfa_match = TRUE;
***************
*** 4024,4035 ****
  		/* Have to clear the listid field of the NFA nodes, so that
  		 * nfa_regmatch() and addstate() can run properly after
  		 * recursion. */
! 		nfa_save_listids(start, listids);
! 		nfa_set_null_listids(start);
  		nfa_endp = endposp;
! 		result = nfa_regmatch(t->state->out, submatch, m);
! 		nfa_set_neg_listids(start);
! 		nfa_restore_listids(start, listids);
  
  		/* restore position in input text */
  		reginput = save_reginput;
--- 4002,4011 ----
  		/* Have to clear the listid field of the NFA nodes, so that
  		 * nfa_regmatch() and addstate() can run properly after
  		 * recursion. */
! 		nfa_save_listids(prog, listids);
  		nfa_endp = endposp;
! 		result = nfa_regmatch(prog, t->state->out, submatch, m);
! 		nfa_restore_listids(prog, listids);
  
  		/* restore position in input text */
  		reginput = save_reginput;
***************
*** 4665,4671 ****
--- 4641,4652 ----
  #ifdef FEAT_SYN_HL
      /* Clear the external match subpointers if necessary. */
      if (prog->reghasz == REX_SET)
+     {
+ 	nfa_has_zsubexpr = TRUE;
  	need_clear_zsubexpr = TRUE;
+     }
+     else
+ 	nfa_has_zsubexpr = FALSE;
  #endif
  
  #ifdef ENABLE_LOG
***************
*** 4694,4700 ****
      clear_sub(&m.synt);
  #endif
  
!     if (nfa_regmatch(start, &subs, &m) == FALSE)
  	return 0;
  
      cleanup_subexpr();
--- 4675,4681 ----
      clear_sub(&m.synt);
  #endif
  
!     if (nfa_regmatch(prog, start, &subs, &m) == FALSE)
  	return 0;
  
      cleanup_subexpr();
*** ../vim-7.3.1102/src/version.c	2013-06-02 19:22:05.000000000 +0200
--- src/version.c	2013-06-02 21:24:50.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
  {   /* Add new patch number below this line */
+ /**/
+     1103,
  /**/

-- 
hundred-and-one symptoms of being an internet addict:
53. To find out what time it is, you send yourself an e-mail and check the
    "Date:" field.

 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///