| To: vim_dev@googlegroups.com |
| Subject: Patch 7.3.1015 |
| Fcc: outbox |
| From: Bram Moolenaar <Bram@moolenaar.net> |
| Mime-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| |
| Patch 7.3.1015 |
| Problem: New regexp engine: Matching composing characters is wrong. |
| Solution: Fix matching composing characters. |
| Files: src/regexp_nfa.c, src/testdir/test95.in, src/testdir/test95.ok |
| |
| |
| |
| |
| |
| *** 716,721 **** |
| --- 716,722 ---- |
| * the composing char is matched here. */ |
| if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) |
| { |
| + old_regparse = regparse; |
| c = getchr(); |
| goto nfa_do_multibyte; |
| } |
| |
| *** 1217,1225 **** |
| |
| nfa_do_multibyte: |
| /* Length of current char with composing chars. */ |
| ! if (enc_utf8 && clen != (plen = (*mb_ptr2len)(old_regparse))) |
| { |
| ! /* A base character plus composing characters. |
| * This requires creating a separate atom as if enclosing |
| * the characters in (), where NFA_COMPOSING is the ( and |
| * NFA_END_COMPOSING is the ). Note that right now we are |
| --- 1218,1228 ---- |
| |
| nfa_do_multibyte: |
| /* Length of current char with composing chars. */ |
| ! if (enc_utf8 && (clen != (plen = (*mb_ptr2len)(old_regparse)) |
| ! || utf_iscomposing(c))) |
| { |
| ! /* A base character plus composing characters, or just one |
| ! * or more composing characters. |
| * This requires creating a separate atom as if enclosing |
| * the characters in (), where NFA_COMPOSING is the ( and |
| * NFA_END_COMPOSING is the ). Note that right now we are |
| |
| *** 1400,1406 **** |
| /* Save pos after the repeated atom and the \{} */ |
| new_regparse = regparse; |
| |
| - new_regparse = regparse; |
| quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY); |
| for (i = 0; i < maxval; i++) |
| { |
| --- 1403,1408 ---- |
| |
| *** 3218,3228 **** |
| result = OK; |
| sta = t->state->out; |
| len = 0; |
| if (ireg_icombine) |
| { |
| ! /* If \Z was present, then ignore composing characters. */ |
| /* TODO: How about negated? */ |
| ! if (sta->c != c) |
| result = FAIL; |
| len = n; |
| while (sta->c != NFA_END_COMPOSING) |
| --- 3220,3238 ---- |
| result = OK; |
| sta = t->state->out; |
| len = 0; |
| + if (utf_iscomposing(sta->c)) |
| + { |
| + /* Only match composing character(s), ignore base |
| + * character. Used for ".{composing}" and "{composing}" |
| + * (no preceding character). */ |
| + len += mb_char2len(c); |
| + } |
| if (ireg_icombine) |
| { |
| ! /* If \Z was present, then ignore composing characters. |
| ! * When ignoring the base character this always matches. */ |
| /* TODO: How about negated? */ |
| ! if (len == 0 && sta->c != c) |
| result = FAIL; |
| len = n; |
| while (sta->c != NFA_END_COMPOSING) |
| |
| |
| |
| *** 38,43 **** |
| --- 38,52 ---- |
| :"""" Test composing character matching |
| :call add(tl, ['.ม', 'xม่x yมy', 'yม']) |
| :call add(tl, ['.ม่', 'xม่x yมy', 'xม่']) |
| + :call add(tl, ["\u05b9", " x\u05b9 ", "x\u05b9"]) |
| + :call add(tl, [".\u05b9", " x\u05b9 ", "x\u05b9"]) |
| + :call add(tl, ["\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) |
| + :call add(tl, [".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) |
| + :call add(tl, ["\u05bb\u05b9", " x\u05b9\u05bb "]) |
| + :call add(tl, [".\u05bb\u05b9", " x\u05b9\u05bb "]) |
| + :call add(tl, ["\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) |
| + :call add(tl, [".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) |
| + |
| |
| :"""" Test \Z |
| :call add(tl, ['ú\Z', 'x']) |
| |
| *** 50,55 **** |
| --- 59,66 ---- |
| :call add(tl, ["ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"]) |
| :call add(tl, ["ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"]) |
| :call add(tl, ["ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"]) |
| + :call add(tl, ["\u05b9\\+\\Z", "xyz", "xyz"]) |
| + :call add(tl, ["\\Z\u05b9\\+", "xyz", "xyz"]) |
| |
| :"""" Combining different tests and features |
| :call add(tl, ['[^[=a=]]\+', 'ddaãâbcd', 'dd']) |
| |
| |
| |
| *** 11,16 **** |
| --- 11,24 ---- |
| OK - \%#=1\f\+ |
| OK - .ม |
| OK - .ม่ |
| + OK - ֹ |
| + OK - .ֹ |
| + OK - ֹֻ |
| + OK - .ֹֻ |
| + OK - ֹֻ |
| + OK - .ֹֻ |
| + OK - ֹ |
| + OK - .ֹ |
| OK - ú\Z |
| OK - יהוה\Z |
| OK - יְהוָה\Z |
| |
| *** 21,24 **** |
| --- 29,34 ---- |
| OK - קֹx\Z |
| OK - קx\Z |
| OK - קx\Z |
| + OK - ֹ\+\Z |
| + OK - \Zֹ\+ |
| OK - [^[=a=]]\+ |
| |
| |
| |
| *** 730,731 **** |
| --- 730,733 ---- |
| { /* Add new patch number below this line */ |
| + /**/ |
| + 1015, |
| /**/ |
| |
| -- |
| If your company is not involved in something called "ISO 9000" you probably |
| have no idea what it is. If your company _is_ involved in ISO 9000 then you |
| definitely have no idea what it is. |
| (Scott Adams - The Dilbert principle) |
| |
| /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ |
| /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ |
| \\\ an exciting new programming language -- http://www.Zimbu.org /// |
| \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |