Karsten Hopp b9a68f
To: vim_dev@googlegroups.com
Karsten Hopp b9a68f
Subject: Patch 7.4.293
Karsten Hopp b9a68f
Fcc: outbox
Karsten Hopp b9a68f
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp b9a68f
Mime-Version: 1.0
Karsten Hopp b9a68f
Content-Type: text/plain; charset=UTF-8
Karsten Hopp b9a68f
Content-Transfer-Encoding: 8bit
Karsten Hopp b9a68f
------------
Karsten Hopp b9a68f
Karsten Hopp b9a68f
Patch 7.4.293
Karsten Hopp b9a68f
Problem:    It is not possible to ignore composing characters at a specific
Karsten Hopp b9a68f
	    point in a pattern.
Karsten Hopp b9a68f
Solution:   Add the %C item.
Karsten Hopp b9a68f
Files:	    src/regexp.c, src/regexp_nfa.c, src/testdir/test95.in,
Karsten Hopp b9a68f
	    src/testdir/test95.ok, runtime/doc/pattern.txt
Karsten Hopp b9a68f
Karsten Hopp b9a68f
Karsten Hopp b9a68f
*** ../vim-7.4.292/src/regexp.c	2014-05-13 18:03:55.729737466 +0200
Karsten Hopp b9a68f
--- src/regexp.c	2014-05-13 18:27:08.725749659 +0200
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 244,249 ****
Karsten Hopp b9a68f
--- 244,250 ----
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  #define RE_MARK		207	/* mark cmp  Match mark position */
Karsten Hopp b9a68f
  #define RE_VISUAL	208	/*	Match Visual area */
Karsten Hopp b9a68f
+ #define RE_COMPOSING	209	/* any composing characters */
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  /*
Karsten Hopp b9a68f
   * Magic characters have a special meaning, they don't match literally.
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 2208,2213 ****
Karsten Hopp b9a68f
--- 2209,2218 ----
Karsten Hopp b9a68f
  		    ret = regnode(RE_VISUAL);
Karsten Hopp b9a68f
  		    break;
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
+ 		case 'C':
Karsten Hopp b9a68f
+ 		    ret = regnode(RE_COMPOSING);
Karsten Hopp b9a68f
+ 		    break;
Karsten Hopp b9a68f
+ 
Karsten Hopp b9a68f
  		/* \%[abc]: Emit as a list of branches, all ending at the last
Karsten Hopp b9a68f
  		 * branch which matches nothing. */
Karsten Hopp b9a68f
  		case '[':
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 4710,4720 ****
Karsten Hopp b9a68f
  			    status = RA_NOMATCH;
Karsten Hopp b9a68f
  		    }
Karsten Hopp b9a68f
  #ifdef FEAT_MBYTE
Karsten Hopp b9a68f
! 		    /* Check for following composing character. */
Karsten Hopp b9a68f
  		    if (status != RA_NOMATCH
Karsten Hopp b9a68f
  			    && enc_utf8
Karsten Hopp b9a68f
  			    && UTF_COMPOSINGLIKE(reginput, reginput + len)
Karsten Hopp b9a68f
! 			    && !ireg_icombine)
Karsten Hopp b9a68f
  		    {
Karsten Hopp b9a68f
  			/* raaron: This code makes a composing character get
Karsten Hopp b9a68f
  			 * ignored, which is the correct behavior (sometimes)
Karsten Hopp b9a68f
--- 4715,4727 ----
Karsten Hopp b9a68f
  			    status = RA_NOMATCH;
Karsten Hopp b9a68f
  		    }
Karsten Hopp b9a68f
  #ifdef FEAT_MBYTE
Karsten Hopp b9a68f
! 		    /* Check for following composing character, unless %C
Karsten Hopp b9a68f
! 		     * follows (skips over all composing chars). */
Karsten Hopp b9a68f
  		    if (status != RA_NOMATCH
Karsten Hopp b9a68f
  			    && enc_utf8
Karsten Hopp b9a68f
  			    && UTF_COMPOSINGLIKE(reginput, reginput + len)
Karsten Hopp b9a68f
! 			    && !ireg_icombine
Karsten Hopp b9a68f
! 			    && OP(next) != RE_COMPOSING)
Karsten Hopp b9a68f
  		    {
Karsten Hopp b9a68f
  			/* raaron: This code makes a composing character get
Karsten Hopp b9a68f
  			 * ignored, which is the correct behavior (sometimes)
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 4791,4796 ****
Karsten Hopp b9a68f
--- 4798,4813 ----
Karsten Hopp b9a68f
  		status = RA_NOMATCH;
Karsten Hopp b9a68f
  	    break;
Karsten Hopp b9a68f
  #endif
Karsten Hopp b9a68f
+ 	  case RE_COMPOSING:
Karsten Hopp b9a68f
+ #ifdef FEAT_MBYTE
Karsten Hopp b9a68f
+ 	    if (enc_utf8)
Karsten Hopp b9a68f
+ 	    {
Karsten Hopp b9a68f
+ 		/* Skip composing characters. */
Karsten Hopp b9a68f
+ 		while (utf_iscomposing(utf_ptr2char(reginput)))
Karsten Hopp b9a68f
+ 		    mb_cptr_adv(reginput);
Karsten Hopp b9a68f
+ 	    }
Karsten Hopp b9a68f
+ #endif
Karsten Hopp b9a68f
+ 	    break;
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  	  case NOTHING:
Karsten Hopp b9a68f
  	    break;
Karsten Hopp b9a68f
*** ../vim-7.4.292/src/regexp_nfa.c	2014-05-13 16:44:25.633695709 +0200
Karsten Hopp b9a68f
--- src/regexp_nfa.c	2014-05-13 19:25:58.285780556 +0200
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 81,86 ****
Karsten Hopp b9a68f
--- 81,87 ----
Karsten Hopp b9a68f
      NFA_COMPOSING,		    /* Next nodes in NFA are part of the
Karsten Hopp b9a68f
  				       composing multibyte char */
Karsten Hopp b9a68f
      NFA_END_COMPOSING,		    /* End of a composing char in the NFA */
Karsten Hopp b9a68f
+     NFA_ANY_COMPOSING,		    /* \%C: Any composing characters. */
Karsten Hopp b9a68f
      NFA_OPT_CHARS,		    /* \%[abc] */
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
      /* The following are used only in the postfix form, not in the NFA */
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 1418,1423 ****
Karsten Hopp b9a68f
--- 1419,1428 ----
Karsten Hopp b9a68f
  		    EMIT(NFA_VISUAL);
Karsten Hopp b9a68f
  		    break;
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
+ 		case 'C':
Karsten Hopp b9a68f
+ 		    EMIT(NFA_ANY_COMPOSING);
Karsten Hopp b9a68f
+ 		    break;
Karsten Hopp b9a68f
+ 
Karsten Hopp b9a68f
  		case '[':
Karsten Hopp b9a68f
  		    {
Karsten Hopp b9a68f
  			int	    n;
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 2429,2434 ****
Karsten Hopp b9a68f
--- 2434,2440 ----
Karsten Hopp b9a68f
  	case NFA_MARK_LT:	STRCPY(code, "NFA_MARK_LT "); break;
Karsten Hopp b9a68f
  	case NFA_CURSOR:	STRCPY(code, "NFA_CURSOR "); break;
Karsten Hopp b9a68f
  	case NFA_VISUAL:	STRCPY(code, "NFA_VISUAL "); break;
Karsten Hopp b9a68f
+ 	case NFA_ANY_COMPOSING:	STRCPY(code, "NFA_ANY_COMPOSING "); break;
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  	case NFA_STAR:		STRCPY(code, "NFA_STAR "); break;
Karsten Hopp b9a68f
  	case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 2967,2972 ****
Karsten Hopp b9a68f
--- 2973,2979 ----
Karsten Hopp b9a68f
  	    case NFA_NLOWER_IC:
Karsten Hopp b9a68f
  	    case NFA_UPPER_IC:
Karsten Hopp b9a68f
  	    case NFA_NUPPER_IC:
Karsten Hopp b9a68f
+ 	    case NFA_ANY_COMPOSING:
Karsten Hopp b9a68f
  		/* possibly non-ascii */
Karsten Hopp b9a68f
  #ifdef FEAT_MBYTE
Karsten Hopp b9a68f
  		if (has_mbyte)
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 4152,4157 ****
Karsten Hopp b9a68f
--- 4159,4165 ----
Karsten Hopp b9a68f
  		continue;
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  	    case NFA_ANY:
Karsten Hopp b9a68f
+ 	    case NFA_ANY_COMPOSING:
Karsten Hopp b9a68f
  	    case NFA_IDENT:
Karsten Hopp b9a68f
  	    case NFA_SIDENT:
Karsten Hopp b9a68f
  	    case NFA_KWORD:
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 4395,4401 ****
Karsten Hopp b9a68f
      switch (state->c)
Karsten Hopp b9a68f
      {
Karsten Hopp b9a68f
  	case NFA_MATCH:
Karsten Hopp b9a68f
! 	    nfa_match = TRUE;
Karsten Hopp b9a68f
  	    break;
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  	case NFA_SPLIT:
Karsten Hopp b9a68f
--- 4403,4409 ----
Karsten Hopp b9a68f
      switch (state->c)
Karsten Hopp b9a68f
      {
Karsten Hopp b9a68f
  	case NFA_MATCH:
Karsten Hopp b9a68f
! //	    nfa_match = TRUE;
Karsten Hopp b9a68f
  	    break;
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  	case NFA_SPLIT:
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 5151,5156 ****
Karsten Hopp b9a68f
--- 5159,5165 ----
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  	case NFA_MATCH:
Karsten Hopp b9a68f
  	case NFA_MCLOSE:
Karsten Hopp b9a68f
+ 	case NFA_ANY_COMPOSING:
Karsten Hopp b9a68f
  	    /* empty match works always */
Karsten Hopp b9a68f
  	    return 0;
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 5573,5578 ****
Karsten Hopp b9a68f
--- 5582,5593 ----
Karsten Hopp b9a68f
  	    {
Karsten Hopp b9a68f
  	    case NFA_MATCH:
Karsten Hopp b9a68f
  	      {
Karsten Hopp b9a68f
+ #ifdef FEAT_MBYTE
Karsten Hopp b9a68f
+ 		/* If the match ends before a composing characters and
Karsten Hopp b9a68f
+ 		 * ireg_icombine is not set, that is not really a match. */
Karsten Hopp b9a68f
+ 		if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc))
Karsten Hopp b9a68f
+ 		    break;
Karsten Hopp b9a68f
+ #endif
Karsten Hopp b9a68f
  		nfa_match = TRUE;
Karsten Hopp b9a68f
  		copy_sub(&submatch->norm, &t->subs.norm);
Karsten Hopp b9a68f
  #ifdef FEAT_SYN_HL
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 6120,6125 ****
Karsten Hopp b9a68f
--- 6135,6157 ----
Karsten Hopp b9a68f
  		}
Karsten Hopp b9a68f
  		break;
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
+ 	    case NFA_ANY_COMPOSING:
Karsten Hopp b9a68f
+ 		/* On a composing character skip over it.  Otherwise do
Karsten Hopp b9a68f
+ 		 * nothing.  Always matches. */
Karsten Hopp b9a68f
+ #ifdef FEAT_MBYTE
Karsten Hopp b9a68f
+ 		if (enc_utf8 && utf_iscomposing(curc))
Karsten Hopp b9a68f
+ 		{
Karsten Hopp b9a68f
+ 		    add_off = clen;
Karsten Hopp b9a68f
+ 		}
Karsten Hopp b9a68f
+ 		else
Karsten Hopp b9a68f
+ #endif
Karsten Hopp b9a68f
+ 		{
Karsten Hopp b9a68f
+ 		    add_here = TRUE;
Karsten Hopp b9a68f
+ 		    add_off = 0;
Karsten Hopp b9a68f
+ 		}
Karsten Hopp b9a68f
+ 		add_state = t->state->out;
Karsten Hopp b9a68f
+ 		break;
Karsten Hopp b9a68f
+ 
Karsten Hopp b9a68f
  	    /*
Karsten Hopp b9a68f
  	     * Character classes like \a for alpha, \d for digit etc.
Karsten Hopp b9a68f
  	     */
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 6484,6495 ****
Karsten Hopp b9a68f
  		if (!result && ireg_ic)
Karsten Hopp b9a68f
  		    result = MB_TOLOWER(c) == MB_TOLOWER(curc);
Karsten Hopp b9a68f
  #ifdef FEAT_MBYTE
Karsten Hopp b9a68f
! 		/* If there is a composing character which is not being
Karsten Hopp b9a68f
! 		 * ignored there can be no match. Match with composing
Karsten Hopp b9a68f
! 		 * character uses NFA_COMPOSING above. */
Karsten Hopp b9a68f
! 		if (result && enc_utf8 && !ireg_icombine
Karsten Hopp b9a68f
! 						&& clen != utf_char2len(curc))
Karsten Hopp b9a68f
! 		    result = FALSE;
Karsten Hopp b9a68f
  #endif
Karsten Hopp b9a68f
  		ADD_STATE_IF_MATCH(t->state);
Karsten Hopp b9a68f
  		break;
Karsten Hopp b9a68f
--- 6516,6525 ----
Karsten Hopp b9a68f
  		if (!result && ireg_ic)
Karsten Hopp b9a68f
  		    result = MB_TOLOWER(c) == MB_TOLOWER(curc);
Karsten Hopp b9a68f
  #ifdef FEAT_MBYTE
Karsten Hopp b9a68f
! 		/* If ireg_icombine is not set only skip over the character
Karsten Hopp b9a68f
! 		 * itself.  When it is set skip over composing characters. */
Karsten Hopp b9a68f
! 		if (result && enc_utf8 && !ireg_icombine)
Karsten Hopp b9a68f
! 		    clen = utf_char2len(curc);
Karsten Hopp b9a68f
  #endif
Karsten Hopp b9a68f
  		ADD_STATE_IF_MATCH(t->state);
Karsten Hopp b9a68f
  		break;
Karsten Hopp b9a68f
diff: ../vim-7.4.292/src/testdir/test95.insrc/testdir/test95.ok,: No such file or directory
Karsten Hopp b9a68f
diff: src/testdir/test95.insrc/testdir/test95.ok,: No such file or directory
Karsten Hopp b9a68f
*** ../vim-7.4.292/runtime/doc/pattern.txt	2013-08-10 13:24:59.000000000 +0200
Karsten Hopp b9a68f
--- runtime/doc/pattern.txt	2014-05-13 18:59:57.621766895 +0200
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 545,550 ****
Karsten Hopp b9a68f
--- 545,551 ----
Karsten Hopp b9a68f
  |/\%u|	\%u	\%u	match specified multibyte character (eg \%u20ac)
Karsten Hopp b9a68f
  |/\%U|	\%U	\%U	match specified large multibyte character (eg
Karsten Hopp b9a68f
  			\%U12345678)
Karsten Hopp b9a68f
+ |/\%C|	\%C	\%C	match any composing characters
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  Example			matches ~
Karsten Hopp b9a68f
  \<\I\i*		or
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 1207,1218 ****
Karsten Hopp b9a68f
  8. Composing characters					*patterns-composing*
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  							*/\Z*
Karsten Hopp b9a68f
! When "\Z" appears anywhere in the pattern, composing characters are ignored.
Karsten Hopp b9a68f
! Thus only the base characters need to match, the composing characters may be
Karsten Hopp b9a68f
! different and the number of composing characters may differ.  Only relevant
Karsten Hopp b9a68f
! when 'encoding' is "utf-8".
Karsten Hopp b9a68f
  Exception: If the pattern starts with one or more composing characters, these
Karsten Hopp b9a68f
  must match.
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  When a composing character appears at the start of the pattern of after an
Karsten Hopp b9a68f
  item that doesn't include the composing character, a match is found at any
Karsten Hopp b9a68f
--- 1208,1225 ----
Karsten Hopp b9a68f
  8. Composing characters					*patterns-composing*
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  							*/\Z*
Karsten Hopp b9a68f
! When "\Z" appears anywhere in the pattern, all composing characters are
Karsten Hopp b9a68f
! ignored.  Thus only the base characters need to match, the composing
Karsten Hopp b9a68f
! characters may be different and the number of composing characters may differ.
Karsten Hopp b9a68f
! Only relevant when 'encoding' is "utf-8".
Karsten Hopp b9a68f
  Exception: If the pattern starts with one or more composing characters, these
Karsten Hopp b9a68f
  must match.
Karsten Hopp b9a68f
+ 							*/\%C*
Karsten Hopp b9a68f
+ Use "\%C" to skip any composing characters.  For example, the pattern "a" does
Karsten Hopp b9a68f
+ not match in "càt" (where the a has the composing character 0x0300), but
Karsten Hopp b9a68f
+ "a\%C" does.  Note that this does not match "cát" (where the á is character
Karsten Hopp b9a68f
+ 0xe1, it does not have a compositing character).  It does match "cat" (where
Karsten Hopp b9a68f
+ the a is just an a).
Karsten Hopp b9a68f
  
Karsten Hopp b9a68f
  When a composing character appears at the start of the pattern of after an
Karsten Hopp b9a68f
  item that doesn't include the composing character, a match is found at any
Karsten Hopp b9a68f
*** ../vim-7.4.292/src/version.c	2014-05-13 18:03:55.729737466 +0200
Karsten Hopp b9a68f
--- src/version.c	2014-05-13 18:28:45.885750510 +0200
Karsten Hopp b9a68f
***************
Karsten Hopp b9a68f
*** 736,737 ****
Karsten Hopp b9a68f
--- 736,739 ----
Karsten Hopp b9a68f
  {   /* Add new patch number below this line */
Karsten Hopp b9a68f
+ /**/
Karsten Hopp b9a68f
+     293,
Karsten Hopp b9a68f
  /**/
Karsten Hopp b9a68f
Karsten Hopp b9a68f
-- 
Karsten Hopp b9a68f
hundred-and-one symptoms of being an internet addict:
Karsten Hopp b9a68f
155. You forget to eat because you're too busy surfing the net.
Karsten Hopp b9a68f
Karsten Hopp b9a68f
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp b9a68f
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp b9a68f
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
Karsten Hopp b9a68f
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///