Blob Blame History Raw
To: vim_dev@googlegroups.com
Subject: Patch 7.3.1139
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------

Patch 7.3.1139
Problem:    New regexp engine: negated flag is hardly used.
Solution:   Add separate _NEG states, remove negated flag.
Files:	    src/regexp_nfa.c, src/regexp.h


*** ../vim-7.3.1138/src/regexp_nfa.c	2013-06-07 14:59:14.000000000 +0200
--- src/regexp_nfa.c	2013-06-07 16:31:29.000000000 +0200
***************
*** 64,72 ****
--- 64,75 ----
      NFA_NOPEN,			    /* Start of subexpression marked with \%( */
      NFA_NCLOSE,			    /* End of subexpr. marked with \%( ... \) */
      NFA_START_INVISIBLE,
+     NFA_START_INVISIBLE_NEG,
      NFA_START_INVISIBLE_BEFORE,
+     NFA_START_INVISIBLE_BEFORE_NEG,
      NFA_START_PATTERN,
      NFA_END_INVISIBLE,
+     NFA_END_INVISIBLE_NEG,
      NFA_END_PATTERN,
      NFA_COMPOSING,		    /* Next nodes in NFA are part of the
  				       composing multibyte char */
***************
*** 481,487 ****
  	    }
  
  	    default:
! 		if (p->c > 0 && !p->negated)
  		    return p->c; /* yes! */
  		return 0;
  	}
--- 484,490 ----
  	    }
  
  	    default:
! 		if (p->c > 0)
  		    return p->c; /* yes! */
  		return 0;
  	}
***************
*** 1991,2000 ****
--- 1994,2008 ----
  	case NFA_NOPEN:		    STRCPY(code, "NFA_NOPEN"); break;
  	case NFA_NCLOSE:	    STRCPY(code, "NFA_NCLOSE"); break;
  	case NFA_START_INVISIBLE:   STRCPY(code, "NFA_START_INVISIBLE"); break;
+ 	case NFA_START_INVISIBLE_NEG:
+ 			       STRCPY(code, "NFA_START_INVISIBLE_NEG"); break;
  	case NFA_START_INVISIBLE_BEFORE:
  			    STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break;
+ 	case NFA_START_INVISIBLE_BEFORE_NEG:
+ 			STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG"); break;
  	case NFA_START_PATTERN:   STRCPY(code, "NFA_START_PATTERN"); break;
  	case NFA_END_INVISIBLE:	    STRCPY(code, "NFA_END_INVISIBLE"); break;
+ 	case NFA_END_INVISIBLE_NEG: STRCPY(code, "NFA_END_INVISIBLE_NEG"); break;
  	case NFA_END_PATTERN:	    STRCPY(code, "NFA_END_PATTERN"); break;
  
  	case NFA_COMPOSING:	    STRCPY(code, "NFA_COMPOSING"); break;
***************
*** 2227,2234 ****
  	fprintf(debugf, " %s", p);
  
      nfa_set_code(state->c);
!     fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n",
! 		 state->negated ? "NOT " : "",
  		 code,
  		 state->c,
  		 abs(state->id),
--- 2235,2241 ----
  	fprintf(debugf, " %s", p);
  
      nfa_set_code(state->c);
!     fprintf(debugf, "%s (%d) (id=%d) val=%d\n",
  		 code,
  		 state->c,
  		 abs(state->id),
***************
*** 2330,2336 ****
      s->id   = istate;
      s->lastlist[0] = 0;
      s->lastlist[1] = 0;
-     s->negated = FALSE;
  
      return s;
  }
--- 2337,2342 ----
***************
*** 2741,2763 ****
  	case NFA_PREV_ATOM_JUST_BEFORE_NEG:
  	case NFA_PREV_ATOM_LIKE_PATTERN:
  	  {
- 	    int neg = (*p == NFA_PREV_ATOM_NO_WIDTH_NEG
- 				      || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG);
  	    int before = (*p == NFA_PREV_ATOM_JUST_BEFORE
  				      || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG);
  	    int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN);
! 	    int start_state = NFA_START_INVISIBLE;
! 	    int end_state = NFA_END_INVISIBLE;
  	    int n = 0;
  	    nfa_state_T *zend;
  	    nfa_state_T *skip;
  
! 	    if (before)
! 		start_state = NFA_START_INVISIBLE_BEFORE;
! 	    else if (pattern)
  	    {
! 		start_state = NFA_START_PATTERN;
! 		end_state = NFA_END_PATTERN;
  	    }
  
  	    if (before)
--- 2747,2783 ----
  	case NFA_PREV_ATOM_JUST_BEFORE_NEG:
  	case NFA_PREV_ATOM_LIKE_PATTERN:
  	  {
  	    int before = (*p == NFA_PREV_ATOM_JUST_BEFORE
  				      || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG);
  	    int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN);
! 	    int start_state;
! 	    int end_state;
  	    int n = 0;
  	    nfa_state_T *zend;
  	    nfa_state_T *skip;
  
! 	    switch (*p)
  	    {
! 		case NFA_PREV_ATOM_NO_WIDTH:
! 		    start_state = NFA_START_INVISIBLE;
! 		    end_state = NFA_END_INVISIBLE;
! 		    break;
! 		case NFA_PREV_ATOM_NO_WIDTH_NEG:
! 		    start_state = NFA_START_INVISIBLE_NEG;
! 		    end_state = NFA_END_INVISIBLE_NEG;
! 		    break;
! 		case NFA_PREV_ATOM_JUST_BEFORE:
! 		    start_state = NFA_START_INVISIBLE_BEFORE;
! 		    end_state = NFA_END_INVISIBLE;
! 		    break;
! 		case NFA_PREV_ATOM_JUST_BEFORE_NEG:
! 		    start_state = NFA_START_INVISIBLE_BEFORE_NEG;
! 		    end_state = NFA_END_INVISIBLE_NEG;
! 		    break;
! 		case NFA_PREV_ATOM_LIKE_PATTERN:
! 		    start_state = NFA_START_PATTERN;
! 		    end_state = NFA_END_PATTERN;
! 		    break;
  	    }
  
  	    if (before)
***************
*** 2783,2793 ****
  	    s = alloc_state(start_state, e.start, s1);
  	    if (s == NULL)
  		goto theend;
- 	    if (neg)
- 	    {
- 		s->negated = TRUE;
- 		s1->negated = TRUE;
- 	    }
  	    if (before)
  		s->val = n; /* store the count */
  	    if (pattern)
--- 2803,2808 ----
***************
*** 3009,3015 ****
      matchstate = &state_ptr[istate++]; /* the match state */
      matchstate->c = NFA_MATCH;
      matchstate->out = matchstate->out1 = NULL;
-     matchstate->negated = FALSE;
      matchstate->id = 0;
  
      patch(e.out, matchstate);
--- 3024,3029 ----
***************
*** 3772,3778 ****
  		return OK;
  	    break;
  	case NFA_CLASS_SPACE:
! 	    if ((c >=9 && c <= 13) || (c == ' '))
  		return OK;
  	    break;
  	case NFA_CLASS_UPPER:
--- 3786,3792 ----
  		return OK;
  	    break;
  	case NFA_CLASS_SPACE:
! 	    if ((c >= 9 && c <= 13) || (c == ' '))
  		return OK;
  	    break;
  	case NFA_CLASS_UPPER:
***************
*** 3971,3977 ****
      int		result;
      int		need_restore = FALSE;
  
!     if (state->c == NFA_START_INVISIBLE_BEFORE)
      {
  	/* The recursive match must end at the current position. */
  	endposp = &endpos;
--- 3985,3992 ----
      int		result;
      int		need_restore = FALSE;
  
!     if (state->c == NFA_START_INVISIBLE_BEFORE
!         || state->c == NFA_START_INVISIBLE_BEFORE_NEG)
      {
  	/* The recursive match must end at the current position. */
  	endposp = &endpos;
***************
*** 4452,4457 ****
--- 4467,4473 ----
  	      }
  
  	    case NFA_END_INVISIBLE:
+ 	    case NFA_END_INVISIBLE_NEG:
  	    case NFA_END_PATTERN:
  		/*
  		 * This is only encountered after a NFA_START_INVISIBLE or
***************
*** 4489,4495 ****
  		    break;
  
  		/* do not set submatches for \@! */
! 		if (!t->state->negated)
  		{
  		    copy_sub(&m->norm, &t->subs.norm);
  #ifdef FEAT_SYN_HL
--- 4505,4511 ----
  		    break;
  
  		/* do not set submatches for \@! */
! 		if (t->state->c != NFA_END_INVISIBLE_NEG)
  		{
  		    copy_sub(&m->norm, &t->subs.norm);
  #ifdef FEAT_SYN_HL
***************
*** 4505,4511 ****
--- 4521,4529 ----
  		break;
  
  	    case NFA_START_INVISIBLE:
+ 	    case NFA_START_INVISIBLE_NEG:
  	    case NFA_START_INVISIBLE_BEFORE:
+ 	    case NFA_START_INVISIBLE_BEFORE_NEG:
  		{
  		    nfa_pim_T *pim;
  		    int cout = t->state->out1->out->c;
***************
*** 4524,4529 ****
--- 4542,4548 ----
  			    || cout == NFA_NCLOSE
  			    || t->pim != NULL
  			    || (t->state->c != NFA_START_INVISIBLE_BEFORE
+ 			        && t->state->c != NFA_START_INVISIBLE_BEFORE_NEG
  				&& failure_chance(t->state->out1->out, 0)
  					  < failure_chance(t->state->out, 0)))
  		    {
***************
*** 4534,4541 ****
  			result = recursive_regmatch(t->state, prog,
  						       submatch, m, &listids);
  
! 			/* for \@! it is a match when result is FALSE */
! 			if (result != t->state->negated)
  			{
  			    /* Copy submatch info from the recursive call */
  			    copy_sub_off(&t->subs.norm, &m->norm);
--- 4553,4563 ----
  			result = recursive_regmatch(t->state, prog,
  						       submatch, m, &listids);
  
! 			/* for \@! and \@<! it is a match when the result is
! 			 * FALSE */
! 			if (result != (t->state->c == NFA_START_INVISIBLE_NEG
! 			            || t->state->c
! 					   == NFA_START_INVISIBLE_BEFORE_NEG))
  			{
  			    /* Copy submatch info from the recursive call */
  			    copy_sub_off(&t->subs.norm, &m->norm);
***************
*** 4646,4656 ****
  		break;
  
  	    case NFA_BOW:
! 	    {
! 		int bow = TRUE;
  
  		if (curc == NUL)
! 		    bow = FALSE;
  #ifdef FEAT_MBYTE
  		else if (has_mbyte)
  		{
--- 4668,4677 ----
  		break;
  
  	    case NFA_BOW:
! 		result = TRUE;
  
  		if (curc == NUL)
! 		    result = FALSE;
  #ifdef FEAT_MBYTE
  		else if (has_mbyte)
  		{
***************
*** 4659,4685 ****
  		    /* Get class of current and previous char (if it exists). */
  		    this_class = mb_get_class_buf(reginput, reg_buf);
  		    if (this_class <= 1)
! 			bow = FALSE;
  		    else if (reg_prev_class() == this_class)
! 			bow = FALSE;
  		}
  #endif
  		else if (!vim_iswordc_buf(curc, reg_buf)
  			   || (reginput > regline
  				   && vim_iswordc_buf(reginput[-1], reg_buf)))
! 		    bow = FALSE;
! 		if (bow)
  		    addstate_here(thislist, t->state->out, &t->subs,
  							    t->pim, &listidx);
  		break;
- 	    }
  
  	    case NFA_EOW:
! 	    {
! 		int eow = TRUE;
! 
  		if (reginput == regline)
! 		    eow = FALSE;
  #ifdef FEAT_MBYTE
  		else if (has_mbyte)
  		{
--- 4680,4703 ----
  		    /* Get class of current and previous char (if it exists). */
  		    this_class = mb_get_class_buf(reginput, reg_buf);
  		    if (this_class <= 1)
! 			result = FALSE;
  		    else if (reg_prev_class() == this_class)
! 			result = FALSE;
  		}
  #endif
  		else if (!vim_iswordc_buf(curc, reg_buf)
  			   || (reginput > regline
  				   && vim_iswordc_buf(reginput[-1], reg_buf)))
! 		    result = FALSE;
! 		if (result)
  		    addstate_here(thislist, t->state->out, &t->subs,
  							    t->pim, &listidx);
  		break;
  
  	    case NFA_EOW:
! 		result = TRUE;
  		if (reginput == regline)
! 		    result = FALSE;
  #ifdef FEAT_MBYTE
  		else if (has_mbyte)
  		{
***************
*** 4690,4707 ****
  		    prev_class = reg_prev_class();
  		    if (this_class == prev_class
  					|| prev_class == 0 || prev_class == 1)
! 			eow = FALSE;
  		}
  #endif
  		else if (!vim_iswordc_buf(reginput[-1], reg_buf)
  			|| (reginput[0] != NUL
  					   && vim_iswordc_buf(curc, reg_buf)))
! 		    eow = FALSE;
! 		if (eow)
  		    addstate_here(thislist, t->state->out, &t->subs,
  							    t->pim, &listidx);
  		break;
- 	    }
  
  	    case NFA_BOF:
  		if (reglnum == 0 && reginput == regline
--- 4708,4724 ----
  		    prev_class = reg_prev_class();
  		    if (this_class == prev_class
  					|| prev_class == 0 || prev_class == 1)
! 			result = FALSE;
  		}
  #endif
  		else if (!vim_iswordc_buf(reginput[-1], reg_buf)
  			|| (reginput[0] != NUL
  					   && vim_iswordc_buf(curc, reg_buf)))
! 		    result = FALSE;
! 		if (result)
  		    addstate_here(thislist, t->state->out, &t->subs,
  							    t->pim, &listidx);
  		break;
  
  	    case NFA_BOF:
  		if (reglnum == 0 && reginput == regline
***************
*** 4740,4746 ****
  		{
  		    /* If \Z was present, then ignore composing characters.
  		     * When ignoring the base character this always matches. */
- 		    /* TODO: How about negated? */
  		    if (len == 0 && sta->c != curc)
  			result = FAIL;
  		    else
--- 4757,4762 ----
***************
*** 4813,4838 ****
  		}
  		break;
  
- 	    case NFA_CLASS_ALNUM:
- 	    case NFA_CLASS_ALPHA:
- 	    case NFA_CLASS_BLANK:
- 	    case NFA_CLASS_CNTRL:
- 	    case NFA_CLASS_DIGIT:
- 	    case NFA_CLASS_GRAPH:
- 	    case NFA_CLASS_LOWER:
- 	    case NFA_CLASS_PRINT:
- 	    case NFA_CLASS_PUNCT:
- 	    case NFA_CLASS_SPACE:
- 	    case NFA_CLASS_UPPER:
- 	    case NFA_CLASS_XDIGIT:
- 	    case NFA_CLASS_TAB:
- 	    case NFA_CLASS_RETURN:
- 	    case NFA_CLASS_BACKSPACE:
- 	    case NFA_CLASS_ESCAPE:
- 		result = check_char_class(t->state->c, curc);
- 		ADD_STATE_IF_MATCH(t->state);
- 		break;
- 
  	    case NFA_START_COLL:
  	    case NFA_START_NEG_COLL:
  	      {
--- 4829,4834 ----
***************
*** 5212,5221 ****
  		int c = t->state->c;
  
  		/* TODO: put this in #ifdef later */
! 		if (c < -256)
  		    EMSGN("INTERNAL: Negative state char: %ld", c);
- 		if (is_Magic(c))
- 		    c = un_Magic(c);
  		result = (c == curc);
  
  		if (!result && ireg_ic)
--- 5208,5215 ----
  		int c = t->state->c;
  
  		/* TODO: put this in #ifdef later */
! 		if (c < 0)
  		    EMSGN("INTERNAL: Negative state char: %ld", c);
  		result = (c == curc);
  
  		if (!result && ireg_ic)
***************
*** 5252,5259 ****
  						 prog, submatch, m, &listids);
  			t->pim->result = result ? NFA_PIM_MATCH
  							    : NFA_PIM_NOMATCH;
! 			/* for \@! it is a match when result is FALSE */
! 			if (result != t->pim->state->negated)
  			{
  			    /* Copy submatch info from the recursive call */
  			    copy_sub_off(&t->pim->subs.norm, &m->norm);
--- 5246,5257 ----
  						 prog, submatch, m, &listids);
  			t->pim->result = result ? NFA_PIM_MATCH
  							    : NFA_PIM_NOMATCH;
! 			/* for \@! and \@<! it is a match when the result is
! 			 * FALSE */
! 			if (result != (t->pim->state->c
! 						    == NFA_START_INVISIBLE_NEG
! 			            || t->pim->state->c
! 					   == NFA_START_INVISIBLE_BEFORE_NEG))
  			{
  			    /* Copy submatch info from the recursive call */
  			    copy_sub_off(&t->pim->subs.norm, &m->norm);
***************
*** 5274,5281 ****
  #endif
  		    }
  
! 		    /* for \@! it is a match when result is FALSE */
! 		    if (result != t->pim->state->negated)
  		    {
  			/* Copy submatch info from the recursive call */
  			copy_sub_off(&t->subs.norm, &t->pim->subs.norm);
--- 5272,5281 ----
  #endif
  		    }
  
! 		    /* for \@! and \@<! it is a match when result is FALSE */
! 		    if (result != (t->pim->state->c == NFA_START_INVISIBLE_NEG
! 			        || t->pim->state->c
! 					   == NFA_START_INVISIBLE_BEFORE_NEG))
  		    {
  			/* Copy submatch info from the recursive call */
  			copy_sub_off(&t->subs.norm, &t->pim->subs.norm);
*** ../vim-7.3.1138/src/regexp.h	2013-06-06 18:46:00.000000000 +0200
--- src/regexp.h	2013-06-07 16:11:12.000000000 +0200
***************
*** 73,79 ****
      nfa_state_T		*out1;
      int			id;
      int			lastlist[2]; /* 0: normal, 1: recursive */
-     int			negated;
      int			val;
  };
  
--- 73,78 ----
*** ../vim-7.3.1138/src/version.c	2013-06-07 14:59:14.000000000 +0200
--- src/version.c	2013-06-07 16:11:59.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
  {   /* Add new patch number below this line */
+ /**/
+     1139,
  /**/

-- 
Common sense is what tells you that the world is flat.

 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///