Karsten Hopp ca9c19
To: vim_dev@googlegroups.com
Karsten Hopp ca9c19
Subject: Patch 7.3.1011
Karsten Hopp ca9c19
Fcc: outbox
Karsten Hopp ca9c19
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp ca9c19
Mime-Version: 1.0
Karsten Hopp ca9c19
Content-Type: text/plain; charset=UTF-8
Karsten Hopp ca9c19
Content-Transfer-Encoding: 8bit
Karsten Hopp ca9c19
------------
Karsten Hopp ca9c19
Karsten Hopp ca9c19
Patch 7.3.1011
Karsten Hopp ca9c19
Problem:    New regexp engine is inefficient with multi-byte characters.
Karsten Hopp ca9c19
Solution:   Handle a character at a time instead of a byte at a time.  Also
Karsten Hopp ca9c19
            make \Z partly work.
Karsten Hopp ca9c19
Files:      src/regexp_nfa.c, src/testdir/test95.in, src/testdir/test95.ok
Karsten Hopp ca9c19
Karsten Hopp ca9c19
Karsten Hopp ca9c19
*** ../vim-7.3.1010/src/regexp_nfa.c	2013-05-24 20:25:28.000000000 +0200
Karsten Hopp ca9c19
--- src/regexp_nfa.c	2013-05-24 21:49:43.000000000 +0200
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 46,54 ****
Karsten Hopp ca9c19
      NFA_NCLOSE,			    /* End of subexpr. marked with \%( ... \) */
Karsten Hopp ca9c19
      NFA_START_INVISIBLE,
Karsten Hopp ca9c19
      NFA_END_INVISIBLE,
Karsten Hopp ca9c19
-     NFA_MULTIBYTE,		    /* Next nodes in NFA are part of the same
Karsten Hopp ca9c19
- 				       multibyte char */
Karsten Hopp ca9c19
-     NFA_END_MULTIBYTE,		    /* End of multibyte char in the NFA */
Karsten Hopp ca9c19
      NFA_COMPOSING,		    /* Next nodes in NFA are part of the
Karsten Hopp ca9c19
  				       composing multibyte char */
Karsten Hopp ca9c19
      NFA_END_COMPOSING,		    /* End of a composing char in the NFA */
Karsten Hopp ca9c19
--- 46,51 ----
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 195,220 ****
Karsten Hopp ca9c19
  		    *post_ptr++ = c;		\
Karsten Hopp ca9c19
  		} while (0)
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
- #define EMIT_MBYTE(c)					    \
Karsten Hopp ca9c19
- 			len = (*mb_char2bytes)(c, buf);	    \
Karsten Hopp ca9c19
- 			EMIT(buf[0]);			    \
Karsten Hopp ca9c19
- 			for (i = 1; i < len; i++)	    \
Karsten Hopp ca9c19
- 			{				    \
Karsten Hopp ca9c19
- 			    EMIT(buf[i]);		    \
Karsten Hopp ca9c19
- 			    EMIT(NFA_CONCAT);		    \
Karsten Hopp ca9c19
- 			}				    \
Karsten Hopp ca9c19
- 			EMIT(NFA_MULTIBYTE);
Karsten Hopp ca9c19
- 
Karsten Hopp ca9c19
- #define EMIT_COMPOSING_UTF(input)			    \
Karsten Hopp ca9c19
- 			len = utfc_ptr2len(input);	    \
Karsten Hopp ca9c19
- 			EMIT(input[0]);			    \
Karsten Hopp ca9c19
- 			for (i = 1; i < len; i++)	    \
Karsten Hopp ca9c19
- 			{				    \
Karsten Hopp ca9c19
- 			    EMIT(input[i]);		    \
Karsten Hopp ca9c19
- 			    EMIT(NFA_CONCAT);		    \
Karsten Hopp ca9c19
- 			}				    \
Karsten Hopp ca9c19
- 			EMIT(NFA_COMPOSING);
Karsten Hopp ca9c19
- 
Karsten Hopp ca9c19
  /*
Karsten Hopp ca9c19
   * Initialize internal variables before NFA compilation.
Karsten Hopp ca9c19
   * Return OK on success, FAIL otherwise.
Karsten Hopp ca9c19
--- 192,197 ----
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 611,618 ****
Karsten Hopp ca9c19
  #ifdef FEAT_MBYTE
Karsten Hopp ca9c19
      char_u	*old_regparse = regparse;
Karsten Hopp ca9c19
      int		clen;
Karsten Hopp ca9c19
-     int		len;
Karsten Hopp ca9c19
-     static char_u	buf[30];
Karsten Hopp ca9c19
      int		i;
Karsten Hopp ca9c19
  #endif
Karsten Hopp ca9c19
      int		extra = 0;
Karsten Hopp ca9c19
--- 588,593 ----
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 845,858 ****
Karsten Hopp ca9c19
  		    return FAIL;
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  		    c = coll_get_char();
Karsten Hopp ca9c19
! #ifdef FEAT_MBYTE
Karsten Hopp ca9c19
! 		    if ((*mb_char2len)(c) > 1)
Karsten Hopp ca9c19
! 		    {
Karsten Hopp ca9c19
! 			EMIT_MBYTE(c);
Karsten Hopp ca9c19
! 		    }
Karsten Hopp ca9c19
! 		    else
Karsten Hopp ca9c19
! #endif
Karsten Hopp ca9c19
! 			EMIT(c);
Karsten Hopp ca9c19
  		    break;
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  		/* Catch \%^ and \%$ regardless of where they appear in the
Karsten Hopp ca9c19
--- 820,826 ----
Karsten Hopp ca9c19
  		    return FAIL;
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  		    c = coll_get_char();
Karsten Hopp ca9c19
! 		    EMIT(c);
Karsten Hopp ca9c19
  		    break;
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  		/* Catch \%^ and \%$ regardless of where they appear in the
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 1135,1146 ****
Karsten Hopp ca9c19
  			     * skip it. */
Karsten Hopp ca9c19
  			    for (c = startc + 1; c <= endc; c++)
Karsten Hopp ca9c19
  			    {
Karsten Hopp ca9c19
! 				if ((*mb_char2len)(c) > 1)
Karsten Hopp ca9c19
! 				{
Karsten Hopp ca9c19
! 				    EMIT_MBYTE(c);
Karsten Hopp ca9c19
! 				}
Karsten Hopp ca9c19
! 				else
Karsten Hopp ca9c19
! 				    EMIT(c);
Karsten Hopp ca9c19
  				TRY_NEG();
Karsten Hopp ca9c19
  				EMIT_GLUE();
Karsten Hopp ca9c19
  			    }
Karsten Hopp ca9c19
--- 1103,1109 ----
Karsten Hopp ca9c19
  			     * skip it. */
Karsten Hopp ca9c19
  			    for (c = startc + 1; c <= endc; c++)
Karsten Hopp ca9c19
  			    {
Karsten Hopp ca9c19
! 				EMIT(c);
Karsten Hopp ca9c19
  				TRY_NEG();
Karsten Hopp ca9c19
  				EMIT_GLUE();
Karsten Hopp ca9c19
  			    }
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 1187,1200 ****
Karsten Hopp ca9c19
  			if (got_coll_char == TRUE && startc == 0)
Karsten Hopp ca9c19
  			    EMIT(0x0a);
Karsten Hopp ca9c19
  			else
Karsten Hopp ca9c19
! #ifdef FEAT_MBYTE
Karsten Hopp ca9c19
! 			    if ((*mb_char2len)(startc) > 1)
Karsten Hopp ca9c19
! 			    {
Karsten Hopp ca9c19
! 				EMIT_MBYTE(startc);
Karsten Hopp ca9c19
! 			    }
Karsten Hopp ca9c19
! 			    else
Karsten Hopp ca9c19
! #endif
Karsten Hopp ca9c19
! 				EMIT(startc);
Karsten Hopp ca9c19
  			TRY_NEG();
Karsten Hopp ca9c19
  			EMIT_GLUE();
Karsten Hopp ca9c19
  		    }
Karsten Hopp ca9c19
--- 1150,1156 ----
Karsten Hopp ca9c19
  			if (got_coll_char == TRUE && startc == 0)
Karsten Hopp ca9c19
  			    EMIT(0x0a);
Karsten Hopp ca9c19
  			else
Karsten Hopp ca9c19
! 			    EMIT(startc);
Karsten Hopp ca9c19
  			TRY_NEG();
Karsten Hopp ca9c19
  			EMIT_GLUE();
Karsten Hopp ca9c19
  		    }
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 1242,1271 ****
Karsten Hopp ca9c19
  		int	plen;
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  nfa_do_multibyte:
Karsten Hopp ca9c19
! 		/* length of current char, with composing chars,
Karsten Hopp ca9c19
! 		 * from pointer */
Karsten Hopp ca9c19
! 		plen = (*mb_ptr2len)(old_regparse);
Karsten Hopp ca9c19
! 		if (enc_utf8 && clen != plen)
Karsten Hopp ca9c19
! 		{
Karsten Hopp ca9c19
! 		    /* A composing character is always handled as a
Karsten Hopp ca9c19
! 		     * separate atom, surrounded by NFA_COMPOSING and
Karsten Hopp ca9c19
! 		     * NFA_END_COMPOSING. Note that right now we are
Karsten Hopp ca9c19
  		     * building the postfix form, not the NFA itself;
Karsten Hopp ca9c19
  		     * a composing char could be: a, b, c, NFA_COMPOSING
Karsten Hopp ca9c19
! 		     * where 'a', 'b', 'c' are chars with codes > 256.
Karsten Hopp ca9c19
! 		     */
Karsten Hopp ca9c19
! 		    EMIT_COMPOSING_UTF(old_regparse);
Karsten Hopp ca9c19
  		    regparse = old_regparse + plen;
Karsten Hopp ca9c19
  		}
Karsten Hopp ca9c19
  		else
Karsten Hopp ca9c19
- 		    /* A multi-byte character is always handled as a
Karsten Hopp ca9c19
- 		     * separate atom, surrounded by NFA_MULTIBYTE and
Karsten Hopp ca9c19
- 		     * NFA_END_MULTIBYTE */
Karsten Hopp ca9c19
- 		    if (plen > 1)
Karsten Hopp ca9c19
- 		    {
Karsten Hopp ca9c19
- 			EMIT_MBYTE(c);
Karsten Hopp ca9c19
- 		    }
Karsten Hopp ca9c19
- 		    else
Karsten Hopp ca9c19
  #endif
Karsten Hopp ca9c19
  		{
Karsten Hopp ca9c19
  		    c = no_Magic(c);
Karsten Hopp ca9c19
--- 1198,1227 ----
Karsten Hopp ca9c19
  		int	plen;
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  nfa_do_multibyte:
Karsten Hopp ca9c19
! 		/* Length of current char with composing chars. */
Karsten Hopp ca9c19
! 		if (enc_utf8 && clen != (plen = (*mb_ptr2len)(old_regparse)))
Karsten Hopp ca9c19
! 		{
Karsten Hopp ca9c19
! 		    /* A base character plus composing characters.
Karsten Hopp ca9c19
! 		     * This requires creating a separate atom as if enclosing
Karsten Hopp ca9c19
! 		     * the characters in (), where NFA_COMPOSING is the ( and
Karsten Hopp ca9c19
! 		     * NFA_END_COMPOSING is the ). Note that right now we are
Karsten Hopp ca9c19
  		     * building the postfix form, not the NFA itself;
Karsten Hopp ca9c19
  		     * a composing char could be: a, b, c, NFA_COMPOSING
Karsten Hopp ca9c19
! 		     * where 'b' and 'c' are chars with codes > 256. */
Karsten Hopp ca9c19
! 		    i = 0;
Karsten Hopp ca9c19
! 		    for (;;)
Karsten Hopp ca9c19
! 		    {
Karsten Hopp ca9c19
! 			EMIT(c);
Karsten Hopp ca9c19
! 			if (i > 0)
Karsten Hopp ca9c19
! 			    EMIT(NFA_CONCAT);
Karsten Hopp ca9c19
! 			if (i += utf_char2len(c) >= plen)
Karsten Hopp ca9c19
! 			    break;
Karsten Hopp ca9c19
! 			c = utf_ptr2char(old_regparse + i);
Karsten Hopp ca9c19
! 		    }
Karsten Hopp ca9c19
! 		    EMIT(NFA_COMPOSING);
Karsten Hopp ca9c19
  		    regparse = old_regparse + plen;
Karsten Hopp ca9c19
  		}
Karsten Hopp ca9c19
  		else
Karsten Hopp ca9c19
  #endif
Karsten Hopp ca9c19
  		{
Karsten Hopp ca9c19
  		    c = no_Magic(c);
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 1702,1710 ****
Karsten Hopp ca9c19
  	case NFA_START_INVISIBLE:   STRCPY(code, "NFA_START_INVISIBLE"); break;
Karsten Hopp ca9c19
  	case NFA_END_INVISIBLE:	    STRCPY(code, "NFA_END_INVISIBLE"); break;
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
- 	case NFA_MULTIBYTE:	    STRCPY(code, "NFA_MULTIBYTE"); break;
Karsten Hopp ca9c19
- 	case NFA_END_MULTIBYTE:	    STRCPY(code, "NFA_END_MULTIBYTE"); break;
Karsten Hopp ca9c19
- 
Karsten Hopp ca9c19
  	case NFA_COMPOSING:	    STRCPY(code, "NFA_COMPOSING"); break;
Karsten Hopp ca9c19
  	case NFA_END_COMPOSING:	    STRCPY(code, "NFA_END_COMPOSING"); break;
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
--- 1658,1663 ----
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 2194,2200 ****
Karsten Hopp ca9c19
  	    }
Karsten Hopp ca9c19
  	    e1 = POP();
Karsten Hopp ca9c19
  	    e1.start->negated = TRUE;
Karsten Hopp ca9c19
! 	    if (e1.start->c == NFA_MULTIBYTE || e1.start->c == NFA_COMPOSING)
Karsten Hopp ca9c19
  		e1.start->out1->negated = TRUE;
Karsten Hopp ca9c19
  	    PUSH(e1);
Karsten Hopp ca9c19
  	    break;
Karsten Hopp ca9c19
--- 2147,2153 ----
Karsten Hopp ca9c19
  	    }
Karsten Hopp ca9c19
  	    e1 = POP();
Karsten Hopp ca9c19
  	    e1.start->negated = TRUE;
Karsten Hopp ca9c19
! 	    if (e1.start->c == NFA_COMPOSING)
Karsten Hopp ca9c19
  		e1.start->out1->negated = TRUE;
Karsten Hopp ca9c19
  	    PUSH(e1);
Karsten Hopp ca9c19
  	    break;
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 2311,2316 ****
Karsten Hopp ca9c19
--- 2264,2279 ----
Karsten Hopp ca9c19
  	    PUSH(frag(s, list1(&s1->out)));
Karsten Hopp ca9c19
  	    break;
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
+ 	case NFA_COMPOSING:	/* char with composing char */
Karsten Hopp ca9c19
+ #if 0
Karsten Hopp ca9c19
+ 	    /* TODO */
Karsten Hopp ca9c19
+ 	    if (regflags & RF_ICOMBINE)
Karsten Hopp ca9c19
+ 	    {
Karsten Hopp ca9c19
+ 		goto normalchar;
Karsten Hopp ca9c19
+ 	    }
Karsten Hopp ca9c19
+ #endif
Karsten Hopp ca9c19
+ 	    /* FALLTHROUGH */
Karsten Hopp ca9c19
+ 
Karsten Hopp ca9c19
  	case NFA_MOPEN + 0:	/* Submatch */
Karsten Hopp ca9c19
  	case NFA_MOPEN + 1:
Karsten Hopp ca9c19
  	case NFA_MOPEN + 2:
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 2322,2329 ****
Karsten Hopp ca9c19
  	case NFA_MOPEN + 8:
Karsten Hopp ca9c19
  	case NFA_MOPEN + 9:
Karsten Hopp ca9c19
  	case NFA_NOPEN:		/* \%( "Invisible Submatch" */
Karsten Hopp ca9c19
- 	case NFA_MULTIBYTE:	/* mbyte char */
Karsten Hopp ca9c19
- 	case NFA_COMPOSING:	/* composing char */
Karsten Hopp ca9c19
  	    if (nfa_calc_size == TRUE)
Karsten Hopp ca9c19
  	    {
Karsten Hopp ca9c19
  		nstate += 2;
Karsten Hopp ca9c19
--- 2285,2290 ----
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 2336,2344 ****
Karsten Hopp ca9c19
  		case NFA_NOPEN:
Karsten Hopp ca9c19
  		    mclose = NFA_NCLOSE;
Karsten Hopp ca9c19
  		    break;
Karsten Hopp ca9c19
- 		case NFA_MULTIBYTE:
Karsten Hopp ca9c19
- 		    mclose = NFA_END_MULTIBYTE;
Karsten Hopp ca9c19
- 		    break;
Karsten Hopp ca9c19
  		case NFA_COMPOSING:
Karsten Hopp ca9c19
  		    mclose = NFA_END_COMPOSING;
Karsten Hopp ca9c19
  		    break;
Karsten Hopp ca9c19
--- 2297,2302 ----
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 2377,2385 ****
Karsten Hopp ca9c19
  		goto theend;
Karsten Hopp ca9c19
  	    patch(e.out, s1);
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
! 	    if (mopen == NFA_MULTIBYTE || mopen == NFA_COMPOSING)
Karsten Hopp ca9c19
! 		/* MULTIBYTE->out1 = END_MULTIBYTE
Karsten Hopp ca9c19
! 		* COMPOSING->out1 = END_COMPOSING */
Karsten Hopp ca9c19
  		patch(list1(&s->out1), s1);
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  	    PUSH(frag(s, list1(&s1->out)));
Karsten Hopp ca9c19
--- 2335,2342 ----
Karsten Hopp ca9c19
  		goto theend;
Karsten Hopp ca9c19
  	    patch(e.out, s1);
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
! 	    if (mopen == NFA_COMPOSING)
Karsten Hopp ca9c19
! 		/* COMPOSING->out1 = END_COMPOSING */
Karsten Hopp ca9c19
  		patch(list1(&s->out1), s1);
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  	    PUSH(frag(s, list1(&s1->out)));
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 2540,2556 ****
Karsten Hopp ca9c19
  	case NFA_COMPOSING:
Karsten Hopp ca9c19
  	    /* nfa_regmatch() will match all the bytes of this composing char. */
Karsten Hopp ca9c19
  	    break;
Karsten Hopp ca9c19
- 
Karsten Hopp ca9c19
- 	case NFA_MULTIBYTE:
Karsten Hopp ca9c19
- 	    /* nfa_regmatch() will match all the bytes of this multibyte char. */
Karsten Hopp ca9c19
- 	    break;
Karsten Hopp ca9c19
  #endif
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
- 	case NFA_END_MULTIBYTE:
Karsten Hopp ca9c19
- 	    /* Successfully matched this mbyte char */
Karsten Hopp ca9c19
- 	    addstate(l, state->out, m, off, lid, match);
Karsten Hopp ca9c19
- 	    break;
Karsten Hopp ca9c19
- 
Karsten Hopp ca9c19
  	case NFA_NOPEN:
Karsten Hopp ca9c19
  	case NFA_NCLOSE:
Karsten Hopp ca9c19
  	    addstate(l, state->out, m, off, lid, match);
Karsten Hopp ca9c19
--- 2497,2504 ----
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 2841,2847 ****
Karsten Hopp ca9c19
      regsub_T		*submatch;
Karsten Hopp ca9c19
      regsub_T		*m;
Karsten Hopp ca9c19
  {
Karsten Hopp ca9c19
!     int		c = -1;
Karsten Hopp ca9c19
      int		n;
Karsten Hopp ca9c19
      int		i = 0;
Karsten Hopp ca9c19
      int		result;
Karsten Hopp ca9c19
--- 2789,2795 ----
Karsten Hopp ca9c19
      regsub_T		*submatch;
Karsten Hopp ca9c19
      regsub_T		*m;
Karsten Hopp ca9c19
  {
Karsten Hopp ca9c19
!     int		c;
Karsten Hopp ca9c19
      int		n;
Karsten Hopp ca9c19
      int		i = 0;
Karsten Hopp ca9c19
      int		result;
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 2859,2865 ****
Karsten Hopp ca9c19
      List	*listtbl[2][2];
Karsten Hopp ca9c19
      List	*ll;
Karsten Hopp ca9c19
      int		listid = 1;
Karsten Hopp ca9c19
-     int		endnode;
Karsten Hopp ca9c19
      List	*thislist;
Karsten Hopp ca9c19
      List	*nextlist;
Karsten Hopp ca9c19
      List	*neglist;
Karsten Hopp ca9c19
--- 2807,2812 ----
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 3190,3222 ****
Karsten Hopp ca9c19
  		break;
Karsten Hopp ca9c19
  	    }
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
! 	    case NFA_MULTIBYTE:
Karsten Hopp ca9c19
  	    case NFA_COMPOSING:
Karsten Hopp ca9c19
! 	        endnode = t->state->c + 1;
Karsten Hopp ca9c19
  		result = OK;
Karsten Hopp ca9c19
  		sta = t->state->out;
Karsten Hopp ca9c19
! 		len = 1;
Karsten Hopp ca9c19
! 		while (sta->c != endnode && len <= n)
Karsten Hopp ca9c19
  		{
Karsten Hopp ca9c19
! 		    if (reginput[len-1] != sta->c)
Karsten Hopp ca9c19
! 		    {
Karsten Hopp ca9c19
! 			result = FAIL;
Karsten Hopp ca9c19
  			break;
Karsten Hopp ca9c19
! 		    }
Karsten Hopp ca9c19
! 		    len++;
Karsten Hopp ca9c19
  		    sta = sta->out;
Karsten Hopp ca9c19
  		}
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  		/* if input char length doesn't match regexp char length */
Karsten Hopp ca9c19
! 		if (len -1 < n || sta->c != endnode)
Karsten Hopp ca9c19
  		    result = FAIL;
Karsten Hopp ca9c19
! 		end = t->state->out1;	    /* NFA_END_MULTIBYTE or
Karsten Hopp ca9c19
! 					       NFA_END_COMPOSING */
Karsten Hopp ca9c19
  		/* If \Z was present, then ignore composing characters */
Karsten Hopp ca9c19
! 		if (ireg_icombine && endnode == NFA_END_COMPOSING)
Karsten Hopp ca9c19
  		    result = 1 ^ sta->negated;
Karsten Hopp ca9c19
  		ADD_POS_NEG_STATE(end);
Karsten Hopp ca9c19
  		break;
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  	    case NFA_NEWL:
Karsten Hopp ca9c19
  		if (!reg_line_lbr && REG_MULTI
Karsten Hopp ca9c19
--- 3137,3171 ----
Karsten Hopp ca9c19
  		break;
Karsten Hopp ca9c19
  	    }
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
! #ifdef FEAT_MBYTE
Karsten Hopp ca9c19
  	    case NFA_COMPOSING:
Karsten Hopp ca9c19
! 	    {
Karsten Hopp ca9c19
! 		int mc = c;
Karsten Hopp ca9c19
! 
Karsten Hopp ca9c19
  		result = OK;
Karsten Hopp ca9c19
  		sta = t->state->out;
Karsten Hopp ca9c19
! 		len = 0;
Karsten Hopp ca9c19
! 		while (sta->c != NFA_END_COMPOSING && len < n)
Karsten Hopp ca9c19
  		{
Karsten Hopp ca9c19
! 		    if (len > 0)
Karsten Hopp ca9c19
! 			mc = mb_ptr2char(reginput + len);
Karsten Hopp ca9c19
! 		    if (mc != sta->c)
Karsten Hopp ca9c19
  			break;
Karsten Hopp ca9c19
! 		    len += mb_char2len(mc);
Karsten Hopp ca9c19
  		    sta = sta->out;
Karsten Hopp ca9c19
  		}
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  		/* if input char length doesn't match regexp char length */
Karsten Hopp ca9c19
! 		if (len < n || sta->c != NFA_END_COMPOSING)
Karsten Hopp ca9c19
  		    result = FAIL;
Karsten Hopp ca9c19
! 		end = t->state->out1;	    /* NFA_END_COMPOSING */
Karsten Hopp ca9c19
  		/* If \Z was present, then ignore composing characters */
Karsten Hopp ca9c19
! 		if (ireg_icombine)
Karsten Hopp ca9c19
  		    result = 1 ^ sta->negated;
Karsten Hopp ca9c19
  		ADD_POS_NEG_STATE(end);
Karsten Hopp ca9c19
  		break;
Karsten Hopp ca9c19
+ 	    }
Karsten Hopp ca9c19
+ #endif
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
  	    case NFA_NEWL:
Karsten Hopp ca9c19
  		if (!reg_line_lbr && REG_MULTI
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 3425,3430 ****
Karsten Hopp ca9c19
--- 3374,3387 ----
Karsten Hopp ca9c19
  		if (!result)
Karsten Hopp ca9c19
  		    result = ireg_ic == TRUE
Karsten Hopp ca9c19
  				&& MB_TOLOWER(t->state->c) == MB_TOLOWER(c);
Karsten Hopp ca9c19
+ #ifdef FEAT_MBYTE
Karsten Hopp ca9c19
+ 		/* If there is a composing character which is not being
Karsten Hopp ca9c19
+ 		 * ignored there can be no match. Match with composing
Karsten Hopp ca9c19
+ 		 * character uses NFA_COMPOSING above. */
Karsten Hopp ca9c19
+ 		if (result && enc_utf8 && !ireg_icombine
Karsten Hopp ca9c19
+ 						      && n != utf_char2len(c))
Karsten Hopp ca9c19
+ 		    result = FALSE;
Karsten Hopp ca9c19
+ #endif
Karsten Hopp ca9c19
  		ADD_POS_NEG_STATE(t->state);
Karsten Hopp ca9c19
  		break;
Karsten Hopp ca9c19
  	    }
Karsten Hopp ca9c19
*** ../vim-7.3.1010/src/testdir/test95.in	2013-05-24 20:25:28.000000000 +0200
Karsten Hopp ca9c19
--- src/testdir/test95.in	2013-05-24 20:45:08.000000000 +0200
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 35,40 ****
Karsten Hopp ca9c19
--- 35,44 ----
Karsten Hopp ca9c19
  :call add(tl, ['\f\+', '&*Ÿfname ', 'fname'])
Karsten Hopp ca9c19
  :call add(tl, ['\%#=1\f\+', '&*Ÿfname ', 'fname'])
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
+ :"""" Test composing character matching
Karsten Hopp ca9c19
+ :call add(tl, ['.ม', 'xม่x yมy', 'yม'])
Karsten Hopp ca9c19
+ :call add(tl, ['.ม่', 'xม่x yมy', 'xม่'])
Karsten Hopp ca9c19
+ 
Karsten Hopp ca9c19
  :"""" Test \Z
Karsten Hopp ca9c19
  :call add(tl, ['ú\Z', 'x'])
Karsten Hopp ca9c19
  
Karsten Hopp ca9c19
*** ../vim-7.3.1010/src/testdir/test95.ok	2013-05-24 20:25:28.000000000 +0200
Karsten Hopp ca9c19
--- src/testdir/test95.ok	2013-05-24 20:44:41.000000000 +0200
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 9,13 ****
Karsten Hopp ca9c19
--- 9,15 ----
Karsten Hopp ca9c19
  OK - \%#=1\i\+
Karsten Hopp ca9c19
  OK - \f\+
Karsten Hopp ca9c19
  OK - \%#=1\f\+
Karsten Hopp ca9c19
+ OK - .ม
Karsten Hopp ca9c19
+ OK - .ม่
Karsten Hopp ca9c19
  OK - ú\Z
Karsten Hopp ca9c19
  OK - [^[=a=]]\+
Karsten Hopp ca9c19
*** ../vim-7.3.1010/src/version.c	2013-05-24 20:25:28.000000000 +0200
Karsten Hopp ca9c19
--- src/version.c	2013-05-24 21:56:02.000000000 +0200
Karsten Hopp ca9c19
***************
Karsten Hopp ca9c19
*** 730,731 ****
Karsten Hopp ca9c19
--- 730,733 ----
Karsten Hopp ca9c19
  {   /* Add new patch number below this line */
Karsten Hopp ca9c19
+ /**/
Karsten Hopp ca9c19
+     1011,
Karsten Hopp ca9c19
  /**/
Karsten Hopp ca9c19
Karsten Hopp ca9c19
-- 
Karsten Hopp ca9c19
If you had to identify, in one word, the reason why the
Karsten Hopp ca9c19
human race has not achieved, and never will achieve, its
Karsten Hopp ca9c19
full potential, that word would be "meetings."
Karsten Hopp ca9c19
Karsten Hopp ca9c19
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp ca9c19
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp ca9c19
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
Karsten Hopp ca9c19
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///