To: vim_dev@googlegroups.com
Subject: Patch 7.3.1137
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------

Patch 7.3.1137
Problem:    New regexp engine: collections are slow.
Solution:   Handle all characters in one go.
Files:	    src/regexp_nfa.c


*** ../vim-7.3.1136/src/regexp_nfa.c	2013-06-06 18:46:00.000000000 +0200
--- src/regexp_nfa.c	2013-06-07 13:40:58.000000000 +0200
***************
*** 34,48 ****
      NFA_SPLIT = -1024,
      NFA_MATCH,
      NFA_SKIP_CHAR,		    /* matches a 0-length char */
-     NFA_END_NEG_RANGE,		    /* Used when expanding [^ab] */
  
!     NFA_CONCAT,
      NFA_OR,
      NFA_STAR,			    /* greedy * */
      NFA_STAR_NONGREEDY,		    /* non-greedy * */
      NFA_QUEST,			    /* greedy \? */
      NFA_QUEST_NONGREEDY,	    /* non-greedy \? */
-     NFA_NOT,			    /* used for [^ab] negated char ranges */
  
      NFA_BOL,			    /* ^    Begin line */
      NFA_EOL,			    /* $    End line */
--- 34,56 ----
      NFA_SPLIT = -1024,
      NFA_MATCH,
      NFA_SKIP_CHAR,		    /* matches a 0-length char */
  
!     NFA_START_COLL,		    /* [abc] start */
!     NFA_END_COLL,		    /* [abc] end */
!     NFA_START_NEG_COLL,		    /* [^abc] start */
!     NFA_END_NEG_COLL,		    /* [^abc] end (only used in postfix) */
!     NFA_RANGE,			    /* range of the two previous items (only
! 				     * used in postfix) */
!     NFA_RANGE_MIN,		    /* low end of a range  */
!     NFA_RANGE_MAX,		    /* high end of a range  */
! 
!     NFA_CONCAT,			    /* concatenate two previous items (only
! 				     * used in postfix) */
      NFA_OR,
      NFA_STAR,			    /* greedy * */
      NFA_STAR_NONGREEDY,		    /* non-greedy * */
      NFA_QUEST,			    /* greedy \? */
      NFA_QUEST_NONGREEDY,	    /* non-greedy \? */
  
      NFA_BOL,			    /* ^    Begin line */
      NFA_EOL,			    /* $    End line */
***************
*** 260,266 ****
  static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
  static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
  static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
! static int nfa_emit_equi_class __ARGS((int c, int neg));
  static int nfa_regatom __ARGS((void));
  static int nfa_regpiece __ARGS((void));
  static int nfa_regconcat __ARGS((void));
--- 268,274 ----
  static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
  static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
  static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
! static int nfa_emit_equi_class __ARGS((int c));
  static int nfa_regatom __ARGS((void));
  static int nfa_regpiece __ARGS((void));
  static int nfa_regconcat __ARGS((void));
***************
*** 664,684 ****
   * NOTE! When changing this function, also update reg_equi_class()
   */
      static int
! nfa_emit_equi_class(c, neg)
      int	    c;
-     int	    neg;
  {
!     int	first = TRUE;
!     int	glue = neg == TRUE ? NFA_CONCAT : NFA_OR;
! #define EMIT2(c)		\
! 	EMIT(c);		\
! 	if (neg == TRUE) {	\
! 	    EMIT(NFA_NOT);	\
! 	}			\
! 	if (first == FALSE)	\
! 	    EMIT(glue);		\
! 	else			\
! 	    first = FALSE;	\
  
  #ifdef FEAT_MBYTE
      if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
--- 672,681 ----
   * NOTE! When changing this function, also update reg_equi_class()
   */
      static int
! nfa_emit_equi_class(c)
      int	    c;
  {
! #define EMIT2(c)   EMIT(c); EMIT(NFA_CONCAT);
  
  #ifdef FEAT_MBYTE
      if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
***************
*** 687,770 ****
      {
  	switch (c)
  	{
! 	    case 'A': case '\300': case '\301': case '\302':
! 	    case '\303': case '\304': case '\305':
! 		    EMIT2('A');	    EMIT2('\300');  EMIT2('\301');
! 		    EMIT2('\302');  EMIT2('\303');  EMIT2('\304');
! 		    EMIT2('\305');
  		    return OK;
  
! 	    case 'C': case '\307':
! 		    EMIT2('C');	    EMIT2('\307');
  		    return OK;
  
! 	    case 'E': case '\310': case '\311': case '\312': case '\313':
! 		    EMIT2('E');	    EMIT2('\310');  EMIT2('\311');
! 		    EMIT2('\312');  EMIT2('\313');
  		    return OK;
  
! 	    case 'I': case '\314': case '\315': case '\316': case '\317':
! 		    EMIT2('I');	    EMIT2('\314');  EMIT2('\315');
! 		    EMIT2('\316');  EMIT2('\317');
  		    return OK;
  
! 	    case 'N': case '\321':
! 		    EMIT2('N');	    EMIT2('\321');
  		    return OK;
  
! 	    case 'O': case '\322': case '\323': case '\324': case '\325':
! 	    case '\326':
! 		    EMIT2('O');	    EMIT2('\322');  EMIT2('\323');
! 		    EMIT2('\324');  EMIT2('\325');  EMIT2('\326');
  		    return OK;
  
! 	    case 'U': case '\331': case '\332': case '\333': case '\334':
! 		    EMIT2('U');	    EMIT2('\331');  EMIT2('\332');
! 		    EMIT2('\333');  EMIT2('\334');
  		    return OK;
  
! 	    case 'Y': case '\335':
! 		    EMIT2('Y');	    EMIT2('\335');
  		    return OK;
  
! 	    case 'a': case '\340': case '\341': case '\342':
! 	    case '\343': case '\344': case '\345':
! 		    EMIT2('a');	    EMIT2('\340');  EMIT2('\341');
! 		    EMIT2('\342');  EMIT2('\343');  EMIT2('\344');
! 		    EMIT2('\345');
  		    return OK;
  
! 	    case 'c': case '\347':
! 		    EMIT2('c');	    EMIT2('\347');
  		    return OK;
  
! 	    case 'e': case '\350': case '\351': case '\352': case '\353':
! 		    EMIT2('e');	    EMIT2('\350');  EMIT2('\351');
! 		    EMIT2('\352');  EMIT2('\353');
  		    return OK;
  
! 	    case 'i': case '\354': case '\355': case '\356': case '\357':
! 		    EMIT2('i');	    EMIT2('\354');  EMIT2('\355');
! 		    EMIT2('\356');  EMIT2('\357');
  		    return OK;
  
! 	    case 'n': case '\361':
! 		    EMIT2('n');	    EMIT2('\361');
  		    return OK;
  
! 	    case 'o': case '\362': case '\363': case '\364': case '\365':
! 	    case '\366':
! 		    EMIT2('o');	    EMIT2('\362');  EMIT2('\363');
! 		    EMIT2('\364');  EMIT2('\365');  EMIT2('\366');
  		    return OK;
  
! 	    case 'u': case '\371': case '\372': case '\373': case '\374':
! 		    EMIT2('u');	    EMIT2('\371');  EMIT2('\372');
! 		    EMIT2('\373');  EMIT2('\374');
  		    return OK;
  
! 	    case 'y': case '\375': case '\377':
! 		    EMIT2('y');	    EMIT2('\375');  EMIT2('\377');
  		    return OK;
  
  	    default:
--- 684,767 ----
      {
  	switch (c)
  	{
! 	    case 'A': case 0300: case 0301: case 0302:
! 	    case 0303: case 0304: case 0305:
! 		    EMIT2('A');	    EMIT2(0300);  EMIT2(0301);
! 		    EMIT2(0302);  EMIT2(0303);  EMIT2(0304);
! 		    EMIT2(0305);
  		    return OK;
  
! 	    case 'C': case 0307:
! 		    EMIT2('C');	    EMIT2(0307);
  		    return OK;
  
! 	    case 'E': case 0310: case 0311: case 0312: case 0313:
! 		    EMIT2('E');	    EMIT2(0310);  EMIT2(0311);
! 		    EMIT2(0312);  EMIT2(0313);
  		    return OK;
  
! 	    case 'I': case 0314: case 0315: case 0316: case 0317:
! 		    EMIT2('I');	    EMIT2(0314);  EMIT2(0315);
! 		    EMIT2(0316);  EMIT2(0317);
  		    return OK;
  
! 	    case 'N': case 0321:
! 		    EMIT2('N');	    EMIT2(0321);
  		    return OK;
  
! 	    case 'O': case 0322: case 0323: case 0324: case 0325:
! 	    case 0326:
! 		    EMIT2('O');	    EMIT2(0322);  EMIT2(0323);
! 		    EMIT2(0324);  EMIT2(0325);  EMIT2(0326);
  		    return OK;
  
! 	    case 'U': case 0331: case 0332: case 0333: case 0334:
! 		    EMIT2('U');	    EMIT2(0331);  EMIT2(0332);
! 		    EMIT2(0333);  EMIT2(0334);
  		    return OK;
  
! 	    case 'Y': case 0335:
! 		    EMIT2('Y');	    EMIT2(0335);
  		    return OK;
  
! 	    case 'a': case 0340: case 0341: case 0342:
! 	    case 0343: case 0344: case 0345:
! 		    EMIT2('a');	    EMIT2(0340);  EMIT2(0341);
! 		    EMIT2(0342);  EMIT2(0343);  EMIT2(0344);
! 		    EMIT2(0345);
  		    return OK;
  
! 	    case 'c': case 0347:
! 		    EMIT2('c');	    EMIT2(0347);
  		    return OK;
  
! 	    case 'e': case 0350: case 0351: case 0352: case 0353:
! 		    EMIT2('e');	    EMIT2(0350);  EMIT2(0351);
! 		    EMIT2(0352);  EMIT2(0353);
  		    return OK;
  
! 	    case 'i': case 0354: case 0355: case 0356: case 0357:
! 		    EMIT2('i');	    EMIT2(0354);  EMIT2(0355);
! 		    EMIT2(0356);  EMIT2(0357);
  		    return OK;
  
! 	    case 'n': case 0361:
! 		    EMIT2('n');	    EMIT2(0361);
  		    return OK;
  
! 	    case 'o': case 0362: case 0363: case 0364: case 0365:
! 	    case 0366:
! 		    EMIT2('o');	    EMIT2(0362);  EMIT2(0363);
! 		    EMIT2(0364);  EMIT2(0365);  EMIT2(0366);
  		    return OK;
  
! 	    case 'u': case 0371: case 0372: case 0373: case 0374:
! 		    EMIT2('u');	    EMIT2(0371);  EMIT2(0372);
! 		    EMIT2(0373);  EMIT2(0374);
  		    return OK;
  
! 	    case 'y': case 0375: case 0377:
! 		    EMIT2('y');	    EMIT2(0375);  EMIT2(0377);
  		    return OK;
  
  	    default:
***************
*** 811,824 ****
      char_u	*old_regparse = regparse;
  #endif
      int		extra = 0;
-     int		first;
      int		emit_range;
      int		negated;
      int		result;
      int		startc = -1;
      int		endc = -1;
      int		oldstartc = -1;
-     int		glue;		/* ID that will "glue" nodes together */
  
      c = getchr();
      switch (c)
--- 808,819 ----
***************
*** 927,934 ****
  
  	case Magic('n'):
  	    if (reg_string)
! 	    /* In a string "\n" matches a newline character. */
! 	    EMIT(NL);
  	    else
  	    {
  		/* In buffer text "\n" matches the end of a line. */
--- 922,929 ----
  
  	case Magic('n'):
  	    if (reg_string)
! 		/* In a string "\n" matches a newline character. */
! 		EMIT(NL);
  	    else
  	    {
  		/* In buffer text "\n" matches the end of a line. */
***************
*** 1160,1191 ****
  	case Magic('['):
  collection:
  	    /*
! 	     * Glue is emitted between several atoms from the [].
! 	     * It is either NFA_OR, or NFA_CONCAT.
! 	     *
! 	     * [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation)
! 	     * [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT
! 	     *		NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix
! 	     *		notation)
! 	     *
  	     */
  
- 
- /* Emit negation atoms, if needed.
-  * The CONCAT below merges the NOT with the previous node. */
- #define TRY_NEG()		    \
- 	    if (negated == TRUE)    \
- 	    {			    \
- 		EMIT(NFA_NOT);	    \
- 	    }
- 
- /* Emit glue between important nodes : CONCAT or OR. */
- #define EMIT_GLUE()		    \
- 	    if (first == FALSE)	    \
- 		EMIT(glue);	    \
- 	    else		    \
- 		first = FALSE;
- 
  	    p = regparse;
  	    endp = skip_anyof(p);
  	    if (*endp == ']')
--- 1155,1169 ----
  	case Magic('['):
  collection:
  	    /*
! 	     * [abc]  uses NFA_START_COLL - NFA_END_COLL
! 	     * [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
! 	     * Each character is produced as a regular state, using
! 	     * NFA_CONCAT to bind them together.
! 	     * Besides normal characters there can be:
! 	     * - character classes  NFA_CLASS_*
! 	     * - ranges, two characters followed by NFA_RANGE.
  	     */
  
  	    p = regparse;
  	    endp = skip_anyof(p);
  	    if (*endp == ']')
***************
*** 1216,1236 ****
  		 * version that turns [abc] into 'a' OR 'b' OR 'c'
  		 */
  		startc = endc = oldstartc = -1;
- 		first = TRUE;	    /* Emitting first atom in this sequence? */
  		negated = FALSE;
- 		glue = NFA_OR;
  		if (*regparse == '^')			/* negated range */
  		{
  		    negated = TRUE;
- 		    glue = NFA_CONCAT;
  		    mb_ptr_adv(regparse);
  		}
  		if (*regparse == '-')
  		{
  		    startc = '-';
  		    EMIT(startc);
! 		    TRY_NEG();
! 		    EMIT_GLUE();
  		    mb_ptr_adv(regparse);
  		}
  		/* Emit the OR branches for each character in the [] */
--- 1194,1213 ----
  		 * version that turns [abc] into 'a' OR 'b' OR 'c'
  		 */
  		startc = endc = oldstartc = -1;
  		negated = FALSE;
  		if (*regparse == '^')			/* negated range */
  		{
  		    negated = TRUE;
  		    mb_ptr_adv(regparse);
+ 		    EMIT(NFA_START_NEG_COLL);
  		}
+ 		else
+ 		    EMIT(NFA_START_COLL);
  		if (*regparse == '-')
  		{
  		    startc = '-';
  		    EMIT(startc);
! 		    EMIT(NFA_CONCAT);
  		    mb_ptr_adv(regparse);
  		}
  		/* Emit the OR branches for each character in the [] */
***************
*** 1306,1325 ****
  				    EMIT(NFA_CLASS_ESCAPE);
  				    break;
  			    }
! 			    TRY_NEG();
! 			    EMIT_GLUE();
  			    continue;
  			}
  			/* Try equivalence class [=a=] and the like */
  			if (equiclass != 0)
  			{
! 			    result = nfa_emit_equi_class(equiclass, negated);
  			    if (result == FAIL)
  			    {
  				/* should never happen */
  				EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
  			    }
- 			    EMIT_GLUE();
  			    continue;
  			}
  			/* Try collating class like [. .]  */
--- 1283,1300 ----
  				    EMIT(NFA_CLASS_ESCAPE);
  				    break;
  			    }
! 			    EMIT(NFA_CONCAT);
  			    continue;
  			}
  			/* Try equivalence class [=a=] and the like */
  			if (equiclass != 0)
  			{
! 			    result = nfa_emit_equi_class(equiclass);
  			    if (result == FAIL)
  			    {
  				/* should never happen */
  				EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
  			    }
  			    continue;
  			}
  			/* Try collating class like [. .]  */
***************
*** 1391,1409 ****
  			startc = oldstartc;
  			if (startc > endc)
  			    EMSG_RET_FAIL(_(e_invrange));
  #ifdef FEAT_MBYTE
! 			if (has_mbyte && ((*mb_char2len)(startc) > 1
  				    || (*mb_char2len)(endc) > 1))
  			{
! 			    if (endc > startc + 256)
! 				EMSG_RET_FAIL(_(e_invrange));
! 			    /* Emit the range. "startc" was already emitted, so
! 			     * skip it. */
  			    for (c = startc + 1; c <= endc; c++)
  			    {
  				EMIT(c);
! 				TRY_NEG();
! 				EMIT_GLUE();
  			    }
  			}
  			else
--- 1366,1397 ----
  			startc = oldstartc;
  			if (startc > endc)
  			    EMSG_RET_FAIL(_(e_invrange));
+ 
+ 			if (endc > startc + 2)
+ 			{
+ 			    /* Emit a range instead of the sequence of
+ 			     * individual characters. */
+ 			    if (startc == 0)
+ 				/* \x00 is translated to \x0a, start at \x01. */
+ 				EMIT(1);
+ 			    else
+ 				--post_ptr; /* remove NFA_CONCAT */
+ 			    EMIT(endc);
+ 			    EMIT(NFA_RANGE);
+ 			    EMIT(NFA_CONCAT);
+ 			}
+ 			else
  #ifdef FEAT_MBYTE
! 			     if (has_mbyte && ((*mb_char2len)(startc) > 1
  				    || (*mb_char2len)(endc) > 1))
  			{
! 			    /* Emit the characters in the range.
! 			     * "startc" was already emitted, so skip it.
! 			     * */
  			    for (c = startc + 1; c <= endc; c++)
  			    {
  				EMIT(c);
! 				EMIT(NFA_CONCAT);
  			    }
  			}
  			else
***************
*** 1425,1432 ****
  #endif
  				{
  				    EMIT(c);
! 				    TRY_NEG();
! 				    EMIT_GLUE();
  				}
  			}
  			emit_range = FALSE;
--- 1413,1419 ----
  #endif
  				{
  				    EMIT(c);
! 				    EMIT(NFA_CONCAT);
  				}
  			}
  			emit_range = FALSE;
***************
*** 1434,1456 ****
  		    }
  		    else
  		    {
! 			/*
! 			 * This char (startc) is not part of a range. Just
  			 * emit it.
- 			 *
  			 * Normally, simply emit startc. But if we get char
  			 * code=0 from a collating char, then replace it with
  			 * 0x0a.
- 			 *
  			 * This is needed to completely mimic the behaviour of
! 			 * the backtracking engine.
! 			 */
! 			if (got_coll_char == TRUE && startc == 0)
! 			    EMIT(0x0a);
  			else
! 			    EMIT(startc);
! 			TRY_NEG();
! 			EMIT_GLUE();
  		    }
  
  		    mb_ptr_adv(regparse);
--- 1421,1449 ----
  		    }
  		    else
  		    {
! 			/* This char (startc) is not part of a range. Just
  			 * emit it.
  			 * Normally, simply emit startc. But if we get char
  			 * code=0 from a collating char, then replace it with
  			 * 0x0a.
  			 * This is needed to completely mimic the behaviour of
! 			 * the backtracking engine. */
! 			if (startc == NFA_NEWL)
! 			{
! 			    /* Line break can't be matched as part of the
! 			     * collection, add an OR below. But not for negated
! 			     * range. */
! 			    if (!negated)
! 				extra = ADD_NL;
! 			}
  			else
! 			{
! 			    if (got_coll_char == TRUE && startc == 0)
! 				EMIT(0x0a);
! 			    else
! 				EMIT(startc);
! 			    EMIT(NFA_CONCAT);
! 			}
  		    }
  
  		    mb_ptr_adv(regparse);
***************
*** 1460,1479 ****
  		if (*regparse == '-')	    /* if last, '-' is just a char */
  		{
  		    EMIT('-');
! 		    TRY_NEG();
! 		    EMIT_GLUE();
  		}
  		mb_ptr_adv(regparse);
  
  		/* skip the trailing ] */
  		regparse = endp;
  		mb_ptr_adv(regparse);
  		if (negated == TRUE)
! 		{
! 		    /* Mark end of negated char range */
! 		    EMIT(NFA_END_NEG_RANGE);
! 		    EMIT(NFA_CONCAT);
! 		}
  
  		/* \_[] also matches \n but it's not negated */
  		if (extra == ADD_NL)
--- 1453,1471 ----
  		if (*regparse == '-')	    /* if last, '-' is just a char */
  		{
  		    EMIT('-');
! 		    EMIT(NFA_CONCAT);
  		}
  		mb_ptr_adv(regparse);
  
  		/* skip the trailing ] */
  		regparse = endp;
  		mb_ptr_adv(regparse);
+ 
+ 		/* Mark end of the collection. */
  		if (negated == TRUE)
! 		    EMIT(NFA_END_NEG_COLL);
! 		else
! 		    EMIT(NFA_END_COLL);
  
  		/* \_[] also matches \n but it's not negated */
  		if (extra == ADD_NL)
***************
*** 1532,1540 ****
  	    }
      }
  
- #undef TRY_NEG
- #undef EMIT_GLUE
- 
      return OK;
  }
  
--- 1524,1529 ----
***************
*** 2091,2100 ****
  	case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
  	case NFA_QUEST:		STRCPY(code, "NFA_QUEST"); break;
  	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
- 	case NFA_NOT:		STRCPY(code, "NFA_NOT "); break;
  	case NFA_SKIP_CHAR:	STRCPY(code, "NFA_SKIP_CHAR"); break;
  	case NFA_OR:		STRCPY(code, "NFA_OR"); break;
! 	case NFA_END_NEG_RANGE:	STRCPY(code, "NFA_END_NEG_RANGE"); break;
  	case NFA_CLASS_ALNUM:	STRCPY(code, "NFA_CLASS_ALNUM"); break;
  	case NFA_CLASS_ALPHA:	STRCPY(code, "NFA_CLASS_ALPHA"); break;
  	case NFA_CLASS_BLANK:	STRCPY(code, "NFA_CLASS_BLANK"); break;
--- 2080,2096 ----
  	case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
  	case NFA_QUEST:		STRCPY(code, "NFA_QUEST"); break;
  	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
  	case NFA_SKIP_CHAR:	STRCPY(code, "NFA_SKIP_CHAR"); break;
  	case NFA_OR:		STRCPY(code, "NFA_OR"); break;
! 
! 	case NFA_START_COLL:	STRCPY(code, "NFA_START_COLL"); break;
! 	case NFA_END_COLL:	STRCPY(code, "NFA_END_COLL"); break;
! 	case NFA_START_NEG_COLL: STRCPY(code, "NFA_START_NEG_COLL"); break;
! 	case NFA_END_NEG_COLL:	STRCPY(code, "NFA_END_NEG_COLL"); break;
! 	case NFA_RANGE:		STRCPY(code, "NFA_RANGE"); break;
! 	case NFA_RANGE_MIN:	STRCPY(code, "NFA_RANGE_MIN"); break;
! 	case NFA_RANGE_MAX:	STRCPY(code, "NFA_RANGE_MAX"); break;
! 
  	case NFA_CLASS_ALNUM:	STRCPY(code, "NFA_CLASS_ALNUM"); break;
  	case NFA_CLASS_ALPHA:	STRCPY(code, "NFA_CLASS_ALPHA"); break;
  	case NFA_CLASS_BLANK:	STRCPY(code, "NFA_CLASS_BLANK"); break;
***************
*** 2231,2238 ****
  	fprintf(debugf, " %s", p);
  
      nfa_set_code(state->c);
!     fprintf(debugf, "%s%s (%d) (id=%d)\n",
! 		 state->negated ? "NOT " : "", code, state->c, abs(state->id));
      if (state->id < 0)
  	return;
  
--- 2227,2238 ----
  	fprintf(debugf, " %s", p);
  
      nfa_set_code(state->c);
!     fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n",
! 		 state->negated ? "NOT " : "",
! 		 code,
! 		 state->c,
! 		 abs(state->id),
! 		 state->val);
      if (state->id < 0)
  	return;
  
***************
*** 2325,2330 ****
--- 2325,2331 ----
      s->c    = c;
      s->out  = out;
      s->out1 = out1;
+     s->val  = 0;
  
      s->id   = istate;
      s->lastlist[0] = 0;
***************
*** 2565,2577 ****
  	switch (*p)
  	{
  	case NFA_CONCAT:
! 	    /* Catenation.
! 	     * Pay attention: this operator does not exist
! 	     * in the r.e. itself (it is implicit, really).
! 	     * It is added when r.e. is translated to postfix
! 	     * form in re2post().
! 	     *
! 	     * No new state added here. */
  	    if (nfa_calc_size == TRUE)
  	    {
  		/* nstate += 0; */
--- 2566,2575 ----
  	switch (*p)
  	{
  	case NFA_CONCAT:
! 	    /* Concatenation.
! 	     * Pay attention: this operator does not exist in the r.e. itself
! 	     * (it is implicit, really).  It is added when r.e. is translated
! 	     * to postfix form in re2post(). */
  	    if (nfa_calc_size == TRUE)
  	    {
  		/* nstate += 0; */
***************
*** 2583,2604 ****
  	    PUSH(frag(e1.start, e2.out));
  	    break;
  
- 	case NFA_NOT:
- 	    /* Negation of a character */
- 	    if (nfa_calc_size == TRUE)
- 	    {
- 		/* nstate += 0; */
- 		break;
- 	    }
- 	    e1 = POP();
- 	    e1.start->negated = TRUE;
- #ifdef FEAT_MBYTE
- 	    if (e1.start->c == NFA_COMPOSING)
- 		e1.start->out1->negated = TRUE;
- #endif
- 	    PUSH(e1);
- 	    break;
- 
  	case NFA_OR:
  	    /* Alternation */
  	    if (nfa_calc_size == TRUE)
--- 2581,2586 ----
***************
*** 2672,2677 ****
--- 2654,2696 ----
  	    PUSH(frag(s, append(e.out, list1(&s->out))));
  	    break;
  
+ 	case NFA_END_COLL:
+ 	case NFA_END_NEG_COLL:
+ 	    /* On the stack is the sequence starting with NFA_START_COLL or
+ 	     * NFA_START_NEG_COLL and all possible characters. Patch it to
+ 	     * add the output to the start. */
+ 	    if (nfa_calc_size == TRUE)
+ 	    {
+ 		nstate++;
+ 		break;
+ 	    }
+ 	    e = POP();
+ 	    s = alloc_state(NFA_END_COLL, NULL, NULL);
+ 	    if (s == NULL)
+ 		goto theend;
+ 	    patch(e.out, s);
+ 	    e.start->out1 = s;
+ 	    PUSH(frag(e.start, list1(&s->out)));
+ 	    break;
+ 
+ 	case NFA_RANGE:
+ 	    /* Before this are two characters, the low and high end of a
+ 	     * range.  Turn them into two states with MIN and MAX. */
+ 	    if (nfa_calc_size == TRUE)
+ 	    {
+ 		/* nstate += 0; */
+ 		break;
+ 	    }
+ 	    e2 = POP();
+ 	    e1 = POP();
+ 	    e2.start->val = e2.start->c;
+ 	    e2.start->c = NFA_RANGE_MAX;
+ 	    e1.start->val = e1.start->c;
+ 	    e1.start->c = NFA_RANGE_MIN;
+ 	    patch(e1.out, e2.start);
+ 	    PUSH(frag(e1.start, e2.out));
+ 	    break;
+ 
  	case NFA_SKIP_CHAR:
  	    /* Symbol of 0-length, Used in a repetition
  	     * with max/min count of 0 */
***************
*** 2990,2995 ****
--- 3009,3016 ----
      matchstate = &state_ptr[istate++]; /* the match state */
      matchstate->c = NFA_MATCH;
      matchstate->out = matchstate->out1 = NULL;
+     matchstate->negated = FALSE;
+     matchstate->id = 0;
  
      patch(e.out, matchstate);
      ret = e.start;
***************
*** 3308,3314 ****
      switch (state->c)
      {
  	case NFA_SPLIT:
- 	case NFA_NOT:
  	case NFA_NOPEN:
  	case NFA_SKIP_CHAR:
  	case NFA_NCLOSE:
--- 3329,3334 ----
***************
*** 3782,3788 ****
  
  	default:
  	    /* should not be here :P */
! 	    EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class "));
      }
      return FAIL;
  }
--- 3802,3809 ----
  
  	default:
  	    /* should not be here :P */
! 	    EMSGN("E877: (NFA regexp) Invalid character class: %ld", class);
! 	    return FAIL;
      }
      return FAIL;
  }
***************
*** 4320,4327 ****
      addstate(thislist, start, m, 0);
  
      /* There are two cases when the NFA advances: 1. input char matches the
!      * NFA node and 2. input char does not match the NFA node, but the next
!      * node is NFA_NOT. The following macro calls addstate() according to
       * these rules. It is used A LOT, so use the "listtbl" table for speed */
      listtbl[0][0] = NULL;
      listtbl[0][1] = neglist;
--- 4341,4348 ----
      addstate(thislist, start, m, 0);
  
      /* There are two cases when the NFA advances: 1. input char matches the
!      * NFA node and 2. input char does not match the NFA node and the state
!      * has the negated flag. The following macro calls addstate() according to
       * these rules. It is used A LOT, so use the "listtbl" table for speed */
      listtbl[0][0] = NULL;
      listtbl[0][1] = neglist;
***************
*** 4845,4860 ****
  		ADD_POS_NEG_STATE(t->state);
  		break;
  
! 	    case NFA_END_NEG_RANGE:
! 		/* This follows a series of negated nodes, like:
! 		 * NOT CHAR(x), NOT CHAR(y), etc. */
! 		if (curc > 0)
  		{
  		    ll = nextlist;
! 		    add_state = t->state->out;
  		    add_off = clen;
  		}
  		break;
  
  	    case NFA_ANY:
  		/* Any char except '\0', (end of input) does not match. */
--- 4866,4944 ----
  		ADD_POS_NEG_STATE(t->state);
  		break;
  
! 	    case NFA_START_COLL:
! 	    case NFA_START_NEG_COLL:
! 	      {
! 		/* What follows is a list of characters, until NFA_END_COLL.
! 		 * One of them must match or none of them must match. */
! 		nfa_state_T	*state;
! 		int		result_if_matched;
! 		int		c1, c2;
! 
! 		/* Never match EOL. If it's part of the collection it is added
! 		 * as a separate state with an OR. */
! 		if (curc == NUL)
! 		    break;
! 
! 		state = t->state->out;
! 		result_if_matched = (t->state->c == NFA_START_COLL);
! 		for (;;)
  		{
+ 		    if (state->c == NFA_END_COLL)
+ 		    {
+ 			result = !result_if_matched;
+ 			break;
+ 		    }
+ 		    if (state->c == NFA_RANGE_MIN)
+ 		    {
+ 			c1 = state->val;
+ 			state = state->out; /* advance to NFA_RANGE_MAX */
+ 			c2 = state->val;
+ #ifdef ENABLE_LOG
+ 			fprintf(log_fd, "NFA_RANGE_MIN curc=%d c1=%d c2=%d\n",
+ 				curc, c1, c2);
+ #endif
+ 			if (curc >= c1 && curc <= c2)
+ 			{
+ 			    result = result_if_matched;
+ 			    break;
+ 			}
+ 			if (ireg_ic)
+ 			{
+ 			    int curc_low = MB_TOLOWER(curc);
+ 			    int done = FALSE;
+ 
+ 			    for ( ; c1 <= c2; ++c1)
+ 				if (MB_TOLOWER(c1) == curc_low)
+ 				{
+ 				    result = result_if_matched;
+ 				    done = TRUE;
+ 				    break;
+ 				}
+ 			    if (done)
+ 				break;
+ 			}
+ 		    }
+ 		    else if (state->c < 0 ? check_char_class(state->c, curc)
+ 			        : (curc == state->c
+ 				   || (ireg_ic && MB_TOLOWER(curc)
+ 						    == MB_TOLOWER(state->c))))
+ 		    {
+ 			result = result_if_matched;
+ 			break;
+ 		    }
+ 		    state = state->out;
+ 		}
+ 		if (result)
+ 		{
+ 		    /* next state is in out of the NFA_END_COLL, out1 of
+ 		     * START points to the END state */
  		    ll = nextlist;
! 		    add_state = t->state->out1->out;
  		    add_off = clen;
  		}
  		break;
+ 	      }
  
  	    case NFA_ANY:
  		/* Any char except '\0', (end of input) does not match. */
*** ../vim-7.3.1136/src/version.c	2013-06-06 21:31:02.000000000 +0200
--- src/version.c	2013-06-07 13:21:57.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
  {   /* Add new patch number below this line */
+ /**/
+     1137,
  /**/

-- 
From "know your smileys":
 :.-(	Crying

 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///