Blob Blame History Raw
To: vim_dev@googlegroups.com
Subject: Patch 7.3.1105
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------

Patch 7.3.1105
Problem:    New regexp engine: too much code in one function.  Dead code.
Solution:   Move the recursive nfa_regmatch call to a separate function.
	    Remove the dead code.
Files:	    src/regexp_nfa.c


*** ../vim-7.3.1104/src/regexp_nfa.c	2013-06-02 22:07:57.000000000 +0200
--- src/regexp_nfa.c	2013-06-02 22:35:43.000000000 +0200
***************
*** 3665,3673 ****
--- 3665,3802 ----
      return val == pos;
  }
  
+ static int recursive_regmatch __ARGS((nfa_state_T *state, nfa_regprog_T *prog, regsubs_T *submatch, regsubs_T *m, int **listids));
  static int nfa_regmatch __ARGS((nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *submatch, regsubs_T *m));
  
  /*
+  * Recursively call nfa_regmatch()
+  */
+     static int
+ recursive_regmatch(state, prog, submatch, m, listids)
+     nfa_state_T	    *state;
+     nfa_regprog_T   *prog;
+     regsubs_T	    *submatch;
+     regsubs_T	    *m;
+     int		    **listids;
+ {
+     char_u	*save_reginput = reginput;
+     char_u	*save_regline = regline;
+     int		save_reglnum = reglnum;
+     int		save_nfa_match = nfa_match;
+     save_se_T   *save_nfa_endp = nfa_endp;
+     save_se_T   endpos;
+     save_se_T   *endposp = NULL;
+     int		result;
+ 
+     if (state->c == NFA_START_INVISIBLE_BEFORE)
+     {
+ 	/* The recursive match must end at the current position. */
+ 	endposp = &endpos;
+ 	if (REG_MULTI)
+ 	{
+ 	    endpos.se_u.pos.col = (int)(reginput - regline);
+ 	    endpos.se_u.pos.lnum = reglnum;
+ 	}
+ 	else
+ 	    endpos.se_u.ptr = reginput;
+ 
+ 	/* Go back the specified number of bytes, or as far as the
+ 	 * start of the previous line, to try matching "\@<=" or
+ 	 * not matching "\@<!".
+ 	 * TODO: This is very inefficient! Would be better to
+ 	 * first check for a match with what follows. */
+ 	if (state->val <= 0)
+ 	{
+ 	    if (REG_MULTI)
+ 	    {
+ 		regline = reg_getline(--reglnum);
+ 		if (regline == NULL)
+ 		    /* can't go before the first line */
+ 		    regline = reg_getline(++reglnum);
+ 	    }
+ 	    reginput = regline;
+ 	}
+ 	else
+ 	{
+ 	    if (REG_MULTI && (int)(reginput - regline) < state->val)
+ 	    {
+ 		/* Not enough bytes in this line, go to end of
+ 		 * previous line. */
+ 		regline = reg_getline(--reglnum);
+ 		if (regline == NULL)
+ 		{
+ 		    /* can't go before the first line */
+ 		    regline = reg_getline(++reglnum);
+ 		    reginput = regline;
+ 		}
+ 		else
+ 		    reginput = regline + STRLEN(regline);
+ 	    }
+ 	    if ((int)(reginput - regline) >= state->val)
+ 	    {
+ 		reginput -= state->val;
+ #ifdef FEAT_MBYTE
+ 		if (has_mbyte)
+ 		    reginput -= mb_head_off(regline, reginput);
+ #endif
+ 	    }
+ 	    else
+ 		reginput = regline;
+ 	}
+     }
+ 
+     /* Call nfa_regmatch() to check if the current concat matches
+      * at this position. The concat ends with the node
+      * NFA_END_INVISIBLE */
+     if (*listids == NULL)
+     {
+ 	*listids = (int *)lalloc(sizeof(int) * nstate, TRUE);
+ 	if (*listids == NULL)
+ 	{
+ 	    EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!"));
+ 	    return 0;
+ 	}
+     }
+ #ifdef ENABLE_LOG
+     if (log_fd != stderr)
+ 	fclose(log_fd);
+     log_fd = NULL;
+ #endif
+     /* Have to clear the listid field of the NFA nodes, so that
+      * nfa_regmatch() and addstate() can run properly after
+      * recursion. */
+     nfa_save_listids(prog, *listids);
+     nfa_endp = endposp;
+     result = nfa_regmatch(prog, state->out, submatch, m);
+     nfa_restore_listids(prog, *listids);
+ 
+     /* restore position in input text */
+     reginput = save_reginput;
+     regline = save_regline;
+     reglnum = save_reglnum;
+     nfa_match = save_nfa_match;
+     nfa_endp = save_nfa_endp;
+ 
+ #ifdef ENABLE_LOG
+     log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
+     if (log_fd != NULL)
+     {
+ 	fprintf(log_fd, "****************************\n");
+ 	fprintf(log_fd, "FINISHED RUNNING nfa_regmatch() recursively\n");
+ 	fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE");
+ 	fprintf(log_fd, "****************************\n");
+     }
+     else
+     {
+ 	EMSG(_("Could not open temporary log file for writing, displaying on stderr ... "));
+ 	log_fd = stderr;
+     }
+ #endif
+ 
+     return result;
+ }
+ 
+ /*
   * Main matching routine.
   *
   * Run NFA to determine whether it matches reginput.
***************
*** 3881,4051 ****
  	      }
  
  	    case NFA_END_INVISIBLE:
! 		/* This is only encountered after a NFA_START_INVISIBLE or
  		 * NFA_START_INVISIBLE_BEFORE node.
  		 * They surround a zero-width group, used with "\@=", "\&",
  		 * "\@!", "\@<=" and "\@<!".
  		 * If we got here, it means that the current "invisible" group
  		 * finished successfully, so return control to the parent
! 		 * nfa_regmatch().  Submatches are stored in *m, and used in
! 		 * the parent call. */
! 		if (start->c == NFA_MOPEN)
! 		    /* TODO: do we ever get here? */
! 		    addstate_here(thislist, t->state->out, &t->subs, &listidx);
! 		else
! 		{
  #ifdef ENABLE_LOG
! 		    if (nfa_endp != NULL)
! 		    {
! 			if (REG_MULTI)
! 			    fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n",
! 				    (int)reglnum,
! 				    (int)nfa_endp->se_u.pos.lnum,
! 				    (int)(reginput - regline),
! 				    nfa_endp->se_u.pos.col);
! 			else
! 			    fprintf(log_fd, "Current col: %d, endp col: %d\n",
! 				    (int)(reginput - regline),
! 				    (int)(nfa_endp->se_u.ptr - reginput));
! 		    }
! #endif
! 		    /* It's only a match if it ends at "nfa_endp" */
! 		    if (nfa_endp != NULL && (REG_MULTI
! 			    ? (reglnum != nfa_endp->se_u.pos.lnum
! 				|| (int)(reginput - regline)
! 						    != nfa_endp->se_u.pos.col)
! 			    : reginput != nfa_endp->se_u.ptr))
! 			break;
! 
! 		    /* do not set submatches for \@! */
! 		    if (!t->state->negated)
! 		    {
! 			copy_sub(&m->norm, &t->subs.norm);
! #ifdef FEAT_SYN_HL
! 			if (nfa_has_zsubexpr)
! 			    copy_sub(&m->synt, &t->subs.synt);
! #endif
! 		    }
! 		    nfa_match = TRUE;
! 		}
! 		break;
! 
! 	    case NFA_START_INVISIBLE:
! 	    case NFA_START_INVISIBLE_BEFORE:
! 	      {
! 		char_u	    *save_reginput = reginput;
! 		char_u	    *save_regline = regline;
! 		int	    save_reglnum = reglnum;
! 		int	    save_nfa_match = nfa_match;
! 		save_se_T   *save_nfa_endp = nfa_endp;
! 		save_se_T   endpos;
! 		save_se_T   *endposp = NULL;
! 
! 		if (t->state->c == NFA_START_INVISIBLE_BEFORE)
  		{
- 		    /* The recursive match must end at the current position. */
- 		    endposp = &endpos;
  		    if (REG_MULTI)
! 		    {
! 			endpos.se_u.pos.col = (int)(reginput - regline);
! 			endpos.se_u.pos.lnum = reglnum;
! 		    }
  		    else
! 			endpos.se_u.ptr = reginput;
! 
! 		    /* Go back the specified number of bytes, or as far as the
! 		     * start of the previous line, to try matching "\@<=" or
! 		     * not matching "\@<!".
! 		     * TODO: This is very inefficient! Would be better to
! 		     * first check for a match with what follows. */
! 		    if (t->state->val <= 0)
! 		    {
! 			if (REG_MULTI)
! 			{
! 			    regline = reg_getline(--reglnum);
! 			    if (regline == NULL)
! 				/* can't go before the first line */
! 				regline = reg_getline(++reglnum);
! 			}
! 			reginput = regline;
! 		    }
! 		    else
! 		    {
! 			if (REG_MULTI
! 				&& (int)(reginput - regline) < t->state->val)
! 			{
! 			    /* Not enough bytes in this line, go to end of
! 			     * previous line. */
! 			    regline = reg_getline(--reglnum);
! 			    if (regline == NULL)
! 			    {
! 				/* can't go before the first line */
! 				regline = reg_getline(++reglnum);
! 				reginput = regline;
! 			    }
! 			    else
! 				reginput = regline + STRLEN(regline);
! 			}
! 			if ((int)(reginput - regline) >= t->state->val)
! 			{
! 			    reginput -= t->state->val;
! #ifdef FEAT_MBYTE
! 			    if (has_mbyte)
! 				reginput -= mb_head_off(regline, reginput);
! #endif
! 			}
! 			else
! 			    reginput = regline;
! 		    }
  		}
  
! 		/* Call nfa_regmatch() to check if the current concat matches
! 		 * at this position. The concat ends with the node
! 		 * NFA_END_INVISIBLE */
! 		if (listids == NULL)
  		{
! 		    listids = (int *)lalloc(sizeof(int) * nstate, TRUE);
! 		    if (listids == NULL)
! 		    {
! 			EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!"));
! 			return 0;
! 		    }
  		}
! #ifdef ENABLE_LOG
! 		if (log_fd != stderr)
! 		    fclose(log_fd);
! 		log_fd = NULL;
! #endif
! 		/* Have to clear the listid field of the NFA nodes, so that
! 		 * nfa_regmatch() and addstate() can run properly after
! 		 * recursion. */
! 		nfa_save_listids(prog, listids);
! 		nfa_endp = endposp;
! 		result = nfa_regmatch(prog, t->state->out, submatch, m);
! 		nfa_restore_listids(prog, listids);
! 
! 		/* restore position in input text */
! 		reginput = save_reginput;
! 		regline = save_regline;
! 		reglnum = save_reglnum;
! 		nfa_match = save_nfa_match;
! 		nfa_endp = save_nfa_endp;
  
- #ifdef ENABLE_LOG
- 		log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
- 		if (log_fd != NULL)
- 		{
- 		    fprintf(log_fd, "****************************\n");
- 		    fprintf(log_fd, "FINISHED RUNNING nfa_regmatch() recursively\n");
- 		    fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE");
- 		    fprintf(log_fd, "****************************\n");
- 		}
- 		else
- 		{
- 		    EMSG(_("Could not open temporary log file for writing, displaying on stderr ... "));
- 		    log_fd = stderr;
- 		}
- #endif
  		/* for \@! it is a match when result is FALSE */
  		if (result != t->state->negated)
  		{
--- 4010,4066 ----
  	      }
  
  	    case NFA_END_INVISIBLE:
! 		/*
! 		 * This is only encountered after a NFA_START_INVISIBLE or
  		 * NFA_START_INVISIBLE_BEFORE node.
  		 * They surround a zero-width group, used with "\@=", "\&",
  		 * "\@!", "\@<=" and "\@<!".
  		 * If we got here, it means that the current "invisible" group
  		 * finished successfully, so return control to the parent
! 		 * nfa_regmatch().  For a look-behind match only when it ends
! 		 * in the position in "nfa_endp".
! 		 * Submatches are stored in *m, and used in the parent call.
! 		 */
  #ifdef ENABLE_LOG
! 		if (nfa_endp != NULL)
  		{
  		    if (REG_MULTI)
! 			fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n",
! 				(int)reglnum,
! 				(int)nfa_endp->se_u.pos.lnum,
! 				(int)(reginput - regline),
! 				nfa_endp->se_u.pos.col);
  		    else
! 			fprintf(log_fd, "Current col: %d, endp col: %d\n",
! 				(int)(reginput - regline),
! 				(int)(nfa_endp->se_u.ptr - reginput));
  		}
+ #endif
+ 		/* It's only a match if it ends at "nfa_endp" */
+ 		if (nfa_endp != NULL && (REG_MULTI
+ 			? (reglnum != nfa_endp->se_u.pos.lnum
+ 			    || (int)(reginput - regline)
+ 						!= nfa_endp->se_u.pos.col)
+ 			: reginput != nfa_endp->se_u.ptr))
+ 		    break;
  
! 		/* do not set submatches for \@! */
! 		if (!t->state->negated)
  		{
! 		    copy_sub(&m->norm, &t->subs.norm);
! #ifdef FEAT_SYN_HL
! 		    if (nfa_has_zsubexpr)
! 			copy_sub(&m->synt, &t->subs.synt);
! #endif
  		}
! 		nfa_match = TRUE;
! 		break;
! 
! 	    case NFA_START_INVISIBLE:
! 	    case NFA_START_INVISIBLE_BEFORE:
! 		result = recursive_regmatch(t->state, prog, submatch, m,
! 								    &listids);
  
  		/* for \@! it is a match when result is FALSE */
  		if (result != t->state->negated)
  		{
***************
*** 4056,4067 ****
  #endif
  
  		    /* t->state->out1 is the corresponding END_INVISIBLE node;
! 		     * Add it to the current list (zero-width match). */
  		    addstate_here(thislist, t->state->out1->out, &t->subs,
  								    &listidx);
  		}
  		break;
- 	      }
  
  	    case NFA_BOL:
  		if (reginput == regline)
--- 4071,4081 ----
  #endif
  
  		    /* t->state->out1 is the corresponding END_INVISIBLE node;
! 		     * Add its out to the current list (zero-width match). */
  		    addstate_here(thislist, t->state->out1->out, &t->subs,
  								    &listidx);
  		}
  		break;
  
  	    case NFA_BOL:
  		if (reginput == regline)
*** ../vim-7.3.1104/src/version.c	2013-06-02 22:07:57.000000000 +0200
--- src/version.c	2013-06-02 22:37:02.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
  {   /* Add new patch number below this line */
+ /**/
+     1105,
  /**/

-- 
"I can't complain, but sometimes I still do."   (Joe Walsh)

 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///