diff --git a/7.4.293 b/7.4.293 new file mode 100644 index 0000000..5cc60a1 --- /dev/null +++ b/7.4.293 @@ -0,0 +1,301 @@ +To: vim_dev@googlegroups.com +Subject: Patch 7.4.293 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.4.293 +Problem: It is not possible to ignore composing characters at a specific + point in a pattern. +Solution: Add the %C item. +Files: src/regexp.c, src/regexp_nfa.c, src/testdir/test95.in, + src/testdir/test95.ok, runtime/doc/pattern.txt + + +*** ../vim-7.4.292/src/regexp.c 2014-05-13 18:03:55.729737466 +0200 +--- src/regexp.c 2014-05-13 18:27:08.725749659 +0200 +*************** +*** 244,249 **** +--- 244,250 ---- + + #define RE_MARK 207 /* mark cmp Match mark position */ + #define RE_VISUAL 208 /* Match Visual area */ ++ #define RE_COMPOSING 209 /* any composing characters */ + + /* + * Magic characters have a special meaning, they don't match literally. +*************** +*** 2208,2213 **** +--- 2209,2218 ---- + ret = regnode(RE_VISUAL); + break; + ++ case 'C': ++ ret = regnode(RE_COMPOSING); ++ break; ++ + /* \%[abc]: Emit as a list of branches, all ending at the last + * branch which matches nothing. */ + case '[': +*************** +*** 4710,4720 **** + status = RA_NOMATCH; + } + #ifdef FEAT_MBYTE +! /* Check for following composing character. */ + if (status != RA_NOMATCH + && enc_utf8 + && UTF_COMPOSINGLIKE(reginput, reginput + len) +! && !ireg_icombine) + { + /* raaron: This code makes a composing character get + * ignored, which is the correct behavior (sometimes) +--- 4715,4727 ---- + status = RA_NOMATCH; + } + #ifdef FEAT_MBYTE +! /* Check for following composing character, unless %C +! * follows (skips over all composing chars). */ + if (status != RA_NOMATCH + && enc_utf8 + && UTF_COMPOSINGLIKE(reginput, reginput + len) +! && !ireg_icombine +! && OP(next) != RE_COMPOSING) + { + /* raaron: This code makes a composing character get + * ignored, which is the correct behavior (sometimes) +*************** +*** 4791,4796 **** +--- 4798,4813 ---- + status = RA_NOMATCH; + break; + #endif ++ case RE_COMPOSING: ++ #ifdef FEAT_MBYTE ++ if (enc_utf8) ++ { ++ /* Skip composing characters. */ ++ while (utf_iscomposing(utf_ptr2char(reginput))) ++ mb_cptr_adv(reginput); ++ } ++ #endif ++ break; + + case NOTHING: + break; +*** ../vim-7.4.292/src/regexp_nfa.c 2014-05-13 16:44:25.633695709 +0200 +--- src/regexp_nfa.c 2014-05-13 19:25:58.285780556 +0200 +*************** +*** 81,86 **** +--- 81,87 ---- + NFA_COMPOSING, /* Next nodes in NFA are part of the + composing multibyte char */ + NFA_END_COMPOSING, /* End of a composing char in the NFA */ ++ NFA_ANY_COMPOSING, /* \%C: Any composing characters. */ + NFA_OPT_CHARS, /* \%[abc] */ + + /* The following are used only in the postfix form, not in the NFA */ +*************** +*** 1418,1423 **** +--- 1419,1428 ---- + EMIT(NFA_VISUAL); + break; + ++ case 'C': ++ EMIT(NFA_ANY_COMPOSING); ++ break; ++ + case '[': + { + int n; +*************** +*** 2429,2434 **** +--- 2434,2440 ---- + case NFA_MARK_LT: STRCPY(code, "NFA_MARK_LT "); break; + case NFA_CURSOR: STRCPY(code, "NFA_CURSOR "); break; + case NFA_VISUAL: STRCPY(code, "NFA_VISUAL "); break; ++ case NFA_ANY_COMPOSING: STRCPY(code, "NFA_ANY_COMPOSING "); break; + + case NFA_STAR: STRCPY(code, "NFA_STAR "); break; + case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; +*************** +*** 2967,2972 **** +--- 2973,2979 ---- + case NFA_NLOWER_IC: + case NFA_UPPER_IC: + case NFA_NUPPER_IC: ++ case NFA_ANY_COMPOSING: + /* possibly non-ascii */ + #ifdef FEAT_MBYTE + if (has_mbyte) +*************** +*** 4152,4157 **** +--- 4159,4165 ---- + continue; + + case NFA_ANY: ++ case NFA_ANY_COMPOSING: + case NFA_IDENT: + case NFA_SIDENT: + case NFA_KWORD: +*************** +*** 4395,4401 **** + switch (state->c) + { + case NFA_MATCH: +! nfa_match = TRUE; + break; + + case NFA_SPLIT: +--- 4403,4409 ---- + switch (state->c) + { + case NFA_MATCH: +! // nfa_match = TRUE; + break; + + case NFA_SPLIT: +*************** +*** 5151,5156 **** +--- 5159,5165 ---- + + case NFA_MATCH: + case NFA_MCLOSE: ++ case NFA_ANY_COMPOSING: + /* empty match works always */ + return 0; + +*************** +*** 5573,5578 **** +--- 5582,5593 ---- + { + case NFA_MATCH: + { ++ #ifdef FEAT_MBYTE ++ /* If the match ends before a composing characters and ++ * ireg_icombine is not set, that is not really a match. */ ++ if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc)) ++ break; ++ #endif + nfa_match = TRUE; + copy_sub(&submatch->norm, &t->subs.norm); + #ifdef FEAT_SYN_HL +*************** +*** 6120,6125 **** +--- 6135,6157 ---- + } + break; + ++ case NFA_ANY_COMPOSING: ++ /* On a composing character skip over it. Otherwise do ++ * nothing. Always matches. */ ++ #ifdef FEAT_MBYTE ++ if (enc_utf8 && utf_iscomposing(curc)) ++ { ++ add_off = clen; ++ } ++ else ++ #endif ++ { ++ add_here = TRUE; ++ add_off = 0; ++ } ++ add_state = t->state->out; ++ break; ++ + /* + * Character classes like \a for alpha, \d for digit etc. + */ +*************** +*** 6484,6495 **** + if (!result && ireg_ic) + result = MB_TOLOWER(c) == MB_TOLOWER(curc); + #ifdef FEAT_MBYTE +! /* If there is a composing character which is not being +! * ignored there can be no match. Match with composing +! * character uses NFA_COMPOSING above. */ +! if (result && enc_utf8 && !ireg_icombine +! && clen != utf_char2len(curc)) +! result = FALSE; + #endif + ADD_STATE_IF_MATCH(t->state); + break; +--- 6516,6525 ---- + if (!result && ireg_ic) + result = MB_TOLOWER(c) == MB_TOLOWER(curc); + #ifdef FEAT_MBYTE +! /* If ireg_icombine is not set only skip over the character +! * itself. When it is set skip over composing characters. */ +! if (result && enc_utf8 && !ireg_icombine) +! clen = utf_char2len(curc); + #endif + ADD_STATE_IF_MATCH(t->state); + break; +diff: ../vim-7.4.292/src/testdir/test95.insrc/testdir/test95.ok,: No such file or directory +diff: src/testdir/test95.insrc/testdir/test95.ok,: No such file or directory +*** ../vim-7.4.292/runtime/doc/pattern.txt 2013-08-10 13:24:59.000000000 +0200 +--- runtime/doc/pattern.txt 2014-05-13 18:59:57.621766895 +0200 +*************** +*** 545,550 **** +--- 545,551 ---- + |/\%u| \%u \%u match specified multibyte character (eg \%u20ac) + |/\%U| \%U \%U match specified large multibyte character (eg + \%U12345678) ++ |/\%C| \%C \%C match any composing characters + + Example matches ~ + \<\I\i* or +*************** +*** 1207,1218 **** + 8. Composing characters *patterns-composing* + + */\Z* +! When "\Z" appears anywhere in the pattern, composing characters are ignored. +! Thus only the base characters need to match, the composing characters may be +! different and the number of composing characters may differ. Only relevant +! when 'encoding' is "utf-8". + Exception: If the pattern starts with one or more composing characters, these + must match. + + When a composing character appears at the start of the pattern of after an + item that doesn't include the composing character, a match is found at any +--- 1208,1225 ---- + 8. Composing characters *patterns-composing* + + */\Z* +! When "\Z" appears anywhere in the pattern, all composing characters are +! ignored. Thus only the base characters need to match, the composing +! characters may be different and the number of composing characters may differ. +! Only relevant when 'encoding' is "utf-8". + Exception: If the pattern starts with one or more composing characters, these + must match. ++ */\%C* ++ Use "\%C" to skip any composing characters. For example, the pattern "a" does ++ not match in "càt" (where the a has the composing character 0x0300), but ++ "a\%C" does. Note that this does not match "cát" (where the á is character ++ 0xe1, it does not have a compositing character). It does match "cat" (where ++ the a is just an a). + + When a composing character appears at the start of the pattern of after an + item that doesn't include the composing character, a match is found at any +*** ../vim-7.4.292/src/version.c 2014-05-13 18:03:55.729737466 +0200 +--- src/version.c 2014-05-13 18:28:45.885750510 +0200 +*************** +*** 736,737 **** +--- 736,739 ---- + { /* Add new patch number below this line */ ++ /**/ ++ 293, + /**/ + +-- +hundred-and-one symptoms of being an internet addict: +155. You forget to eat because you're too busy surfing the net. + + /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ +/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ +\\\ an exciting new programming language -- http://www.Zimbu.org /// + \\\ help me help AIDS victims -- http://ICCF-Holland.org ///