|
Karsten Hopp |
ca9c19 |
To: vim_dev@googlegroups.com
|
|
Karsten Hopp |
ca9c19 |
Subject: Patch 7.3.1011
|
|
Karsten Hopp |
ca9c19 |
Fcc: outbox
|
|
Karsten Hopp |
ca9c19 |
From: Bram Moolenaar <Bram@moolenaar.net>
|
|
Karsten Hopp |
ca9c19 |
Mime-Version: 1.0
|
|
Karsten Hopp |
ca9c19 |
Content-Type: text/plain; charset=UTF-8
|
|
Karsten Hopp |
ca9c19 |
Content-Transfer-Encoding: 8bit
|
|
Karsten Hopp |
ca9c19 |
------------
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
Patch 7.3.1011
|
|
Karsten Hopp |
ca9c19 |
Problem: New regexp engine is inefficient with multi-byte characters.
|
|
Karsten Hopp |
ca9c19 |
Solution: Handle a character at a time instead of a byte at a time. Also
|
|
Karsten Hopp |
ca9c19 |
make \Z partly work.
|
|
Karsten Hopp |
ca9c19 |
Files: src/regexp_nfa.c, src/testdir/test95.in, src/testdir/test95.ok
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
*** ../vim-7.3.1010/src/regexp_nfa.c 2013-05-24 20:25:28.000000000 +0200
|
|
Karsten Hopp |
ca9c19 |
--- src/regexp_nfa.c 2013-05-24 21:49:43.000000000 +0200
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 46,54 ****
|
|
Karsten Hopp |
ca9c19 |
NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */
|
|
Karsten Hopp |
ca9c19 |
NFA_START_INVISIBLE,
|
|
Karsten Hopp |
ca9c19 |
NFA_END_INVISIBLE,
|
|
Karsten Hopp |
ca9c19 |
- NFA_MULTIBYTE, /* Next nodes in NFA are part of the same
|
|
Karsten Hopp |
ca9c19 |
- multibyte char */
|
|
Karsten Hopp |
ca9c19 |
- NFA_END_MULTIBYTE, /* End of multibyte char in the NFA */
|
|
Karsten Hopp |
ca9c19 |
NFA_COMPOSING, /* Next nodes in NFA are part of the
|
|
Karsten Hopp |
ca9c19 |
composing multibyte char */
|
|
Karsten Hopp |
ca9c19 |
NFA_END_COMPOSING, /* End of a composing char in the NFA */
|
|
Karsten Hopp |
ca9c19 |
--- 46,51 ----
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 195,220 ****
|
|
Karsten Hopp |
ca9c19 |
*post_ptr++ = c; \
|
|
Karsten Hopp |
ca9c19 |
} while (0)
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
- #define EMIT_MBYTE(c) \
|
|
Karsten Hopp |
ca9c19 |
- len = (*mb_char2bytes)(c, buf); \
|
|
Karsten Hopp |
ca9c19 |
- EMIT(buf[0]); \
|
|
Karsten Hopp |
ca9c19 |
- for (i = 1; i < len; i++) \
|
|
Karsten Hopp |
ca9c19 |
- { \
|
|
Karsten Hopp |
ca9c19 |
- EMIT(buf[i]); \
|
|
Karsten Hopp |
ca9c19 |
- EMIT(NFA_CONCAT); \
|
|
Karsten Hopp |
ca9c19 |
- } \
|
|
Karsten Hopp |
ca9c19 |
- EMIT(NFA_MULTIBYTE);
|
|
Karsten Hopp |
ca9c19 |
-
|
|
Karsten Hopp |
ca9c19 |
- #define EMIT_COMPOSING_UTF(input) \
|
|
Karsten Hopp |
ca9c19 |
- len = utfc_ptr2len(input); \
|
|
Karsten Hopp |
ca9c19 |
- EMIT(input[0]); \
|
|
Karsten Hopp |
ca9c19 |
- for (i = 1; i < len; i++) \
|
|
Karsten Hopp |
ca9c19 |
- { \
|
|
Karsten Hopp |
ca9c19 |
- EMIT(input[i]); \
|
|
Karsten Hopp |
ca9c19 |
- EMIT(NFA_CONCAT); \
|
|
Karsten Hopp |
ca9c19 |
- } \
|
|
Karsten Hopp |
ca9c19 |
- EMIT(NFA_COMPOSING);
|
|
Karsten Hopp |
ca9c19 |
-
|
|
Karsten Hopp |
ca9c19 |
/*
|
|
Karsten Hopp |
ca9c19 |
* Initialize internal variables before NFA compilation.
|
|
Karsten Hopp |
ca9c19 |
* Return OK on success, FAIL otherwise.
|
|
Karsten Hopp |
ca9c19 |
--- 192,197 ----
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 611,618 ****
|
|
Karsten Hopp |
ca9c19 |
#ifdef FEAT_MBYTE
|
|
Karsten Hopp |
ca9c19 |
char_u *old_regparse = regparse;
|
|
Karsten Hopp |
ca9c19 |
int clen;
|
|
Karsten Hopp |
ca9c19 |
- int len;
|
|
Karsten Hopp |
ca9c19 |
- static char_u buf[30];
|
|
Karsten Hopp |
ca9c19 |
int i;
|
|
Karsten Hopp |
ca9c19 |
#endif
|
|
Karsten Hopp |
ca9c19 |
int extra = 0;
|
|
Karsten Hopp |
ca9c19 |
--- 588,593 ----
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 845,858 ****
|
|
Karsten Hopp |
ca9c19 |
return FAIL;
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
c = coll_get_char();
|
|
Karsten Hopp |
ca9c19 |
! #ifdef FEAT_MBYTE
|
|
Karsten Hopp |
ca9c19 |
! if ((*mb_char2len)(c) > 1)
|
|
Karsten Hopp |
ca9c19 |
! {
|
|
Karsten Hopp |
ca9c19 |
! EMIT_MBYTE(c);
|
|
Karsten Hopp |
ca9c19 |
! }
|
|
Karsten Hopp |
ca9c19 |
! else
|
|
Karsten Hopp |
ca9c19 |
! #endif
|
|
Karsten Hopp |
ca9c19 |
! EMIT(c);
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
/* Catch \%^ and \%$ regardless of where they appear in the
|
|
Karsten Hopp |
ca9c19 |
--- 820,826 ----
|
|
Karsten Hopp |
ca9c19 |
return FAIL;
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
c = coll_get_char();
|
|
Karsten Hopp |
ca9c19 |
! EMIT(c);
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
/* Catch \%^ and \%$ regardless of where they appear in the
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 1135,1146 ****
|
|
Karsten Hopp |
ca9c19 |
* skip it. */
|
|
Karsten Hopp |
ca9c19 |
for (c = startc + 1; c <= endc; c++)
|
|
Karsten Hopp |
ca9c19 |
{
|
|
Karsten Hopp |
ca9c19 |
! if ((*mb_char2len)(c) > 1)
|
|
Karsten Hopp |
ca9c19 |
! {
|
|
Karsten Hopp |
ca9c19 |
! EMIT_MBYTE(c);
|
|
Karsten Hopp |
ca9c19 |
! }
|
|
Karsten Hopp |
ca9c19 |
! else
|
|
Karsten Hopp |
ca9c19 |
! EMIT(c);
|
|
Karsten Hopp |
ca9c19 |
TRY_NEG();
|
|
Karsten Hopp |
ca9c19 |
EMIT_GLUE();
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
--- 1103,1109 ----
|
|
Karsten Hopp |
ca9c19 |
* skip it. */
|
|
Karsten Hopp |
ca9c19 |
for (c = startc + 1; c <= endc; c++)
|
|
Karsten Hopp |
ca9c19 |
{
|
|
Karsten Hopp |
ca9c19 |
! EMIT(c);
|
|
Karsten Hopp |
ca9c19 |
TRY_NEG();
|
|
Karsten Hopp |
ca9c19 |
EMIT_GLUE();
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 1187,1200 ****
|
|
Karsten Hopp |
ca9c19 |
if (got_coll_char == TRUE && startc == 0)
|
|
Karsten Hopp |
ca9c19 |
EMIT(0x0a);
|
|
Karsten Hopp |
ca9c19 |
else
|
|
Karsten Hopp |
ca9c19 |
! #ifdef FEAT_MBYTE
|
|
Karsten Hopp |
ca9c19 |
! if ((*mb_char2len)(startc) > 1)
|
|
Karsten Hopp |
ca9c19 |
! {
|
|
Karsten Hopp |
ca9c19 |
! EMIT_MBYTE(startc);
|
|
Karsten Hopp |
ca9c19 |
! }
|
|
Karsten Hopp |
ca9c19 |
! else
|
|
Karsten Hopp |
ca9c19 |
! #endif
|
|
Karsten Hopp |
ca9c19 |
! EMIT(startc);
|
|
Karsten Hopp |
ca9c19 |
TRY_NEG();
|
|
Karsten Hopp |
ca9c19 |
EMIT_GLUE();
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
--- 1150,1156 ----
|
|
Karsten Hopp |
ca9c19 |
if (got_coll_char == TRUE && startc == 0)
|
|
Karsten Hopp |
ca9c19 |
EMIT(0x0a);
|
|
Karsten Hopp |
ca9c19 |
else
|
|
Karsten Hopp |
ca9c19 |
! EMIT(startc);
|
|
Karsten Hopp |
ca9c19 |
TRY_NEG();
|
|
Karsten Hopp |
ca9c19 |
EMIT_GLUE();
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 1242,1271 ****
|
|
Karsten Hopp |
ca9c19 |
int plen;
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
nfa_do_multibyte:
|
|
Karsten Hopp |
ca9c19 |
! /* length of current char, with composing chars,
|
|
Karsten Hopp |
ca9c19 |
! * from pointer */
|
|
Karsten Hopp |
ca9c19 |
! plen = (*mb_ptr2len)(old_regparse);
|
|
Karsten Hopp |
ca9c19 |
! if (enc_utf8 && clen != plen)
|
|
Karsten Hopp |
ca9c19 |
! {
|
|
Karsten Hopp |
ca9c19 |
! /* A composing character is always handled as a
|
|
Karsten Hopp |
ca9c19 |
! * separate atom, surrounded by NFA_COMPOSING and
|
|
Karsten Hopp |
ca9c19 |
! * NFA_END_COMPOSING. Note that right now we are
|
|
Karsten Hopp |
ca9c19 |
* building the postfix form, not the NFA itself;
|
|
Karsten Hopp |
ca9c19 |
* a composing char could be: a, b, c, NFA_COMPOSING
|
|
Karsten Hopp |
ca9c19 |
! * where 'a', 'b', 'c' are chars with codes > 256.
|
|
Karsten Hopp |
ca9c19 |
! */
|
|
Karsten Hopp |
ca9c19 |
! EMIT_COMPOSING_UTF(old_regparse);
|
|
Karsten Hopp |
ca9c19 |
regparse = old_regparse + plen;
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
else
|
|
Karsten Hopp |
ca9c19 |
- /* A multi-byte character is always handled as a
|
|
Karsten Hopp |
ca9c19 |
- * separate atom, surrounded by NFA_MULTIBYTE and
|
|
Karsten Hopp |
ca9c19 |
- * NFA_END_MULTIBYTE */
|
|
Karsten Hopp |
ca9c19 |
- if (plen > 1)
|
|
Karsten Hopp |
ca9c19 |
- {
|
|
Karsten Hopp |
ca9c19 |
- EMIT_MBYTE(c);
|
|
Karsten Hopp |
ca9c19 |
- }
|
|
Karsten Hopp |
ca9c19 |
- else
|
|
Karsten Hopp |
ca9c19 |
#endif
|
|
Karsten Hopp |
ca9c19 |
{
|
|
Karsten Hopp |
ca9c19 |
c = no_Magic(c);
|
|
Karsten Hopp |
ca9c19 |
--- 1198,1227 ----
|
|
Karsten Hopp |
ca9c19 |
int plen;
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
nfa_do_multibyte:
|
|
Karsten Hopp |
ca9c19 |
! /* Length of current char with composing chars. */
|
|
Karsten Hopp |
ca9c19 |
! if (enc_utf8 && clen != (plen = (*mb_ptr2len)(old_regparse)))
|
|
Karsten Hopp |
ca9c19 |
! {
|
|
Karsten Hopp |
ca9c19 |
! /* A base character plus composing characters.
|
|
Karsten Hopp |
ca9c19 |
! * This requires creating a separate atom as if enclosing
|
|
Karsten Hopp |
ca9c19 |
! * the characters in (), where NFA_COMPOSING is the ( and
|
|
Karsten Hopp |
ca9c19 |
! * NFA_END_COMPOSING is the ). Note that right now we are
|
|
Karsten Hopp |
ca9c19 |
* building the postfix form, not the NFA itself;
|
|
Karsten Hopp |
ca9c19 |
* a composing char could be: a, b, c, NFA_COMPOSING
|
|
Karsten Hopp |
ca9c19 |
! * where 'b' and 'c' are chars with codes > 256. */
|
|
Karsten Hopp |
ca9c19 |
! i = 0;
|
|
Karsten Hopp |
ca9c19 |
! for (;;)
|
|
Karsten Hopp |
ca9c19 |
! {
|
|
Karsten Hopp |
ca9c19 |
! EMIT(c);
|
|
Karsten Hopp |
ca9c19 |
! if (i > 0)
|
|
Karsten Hopp |
ca9c19 |
! EMIT(NFA_CONCAT);
|
|
Karsten Hopp |
ca9c19 |
! if (i += utf_char2len(c) >= plen)
|
|
Karsten Hopp |
ca9c19 |
! break;
|
|
Karsten Hopp |
ca9c19 |
! c = utf_ptr2char(old_regparse + i);
|
|
Karsten Hopp |
ca9c19 |
! }
|
|
Karsten Hopp |
ca9c19 |
! EMIT(NFA_COMPOSING);
|
|
Karsten Hopp |
ca9c19 |
regparse = old_regparse + plen;
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
else
|
|
Karsten Hopp |
ca9c19 |
#endif
|
|
Karsten Hopp |
ca9c19 |
{
|
|
Karsten Hopp |
ca9c19 |
c = no_Magic(c);
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 1702,1710 ****
|
|
Karsten Hopp |
ca9c19 |
case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break;
|
|
Karsten Hopp |
ca9c19 |
case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break;
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
- case NFA_MULTIBYTE: STRCPY(code, "NFA_MULTIBYTE"); break;
|
|
Karsten Hopp |
ca9c19 |
- case NFA_END_MULTIBYTE: STRCPY(code, "NFA_END_MULTIBYTE"); break;
|
|
Karsten Hopp |
ca9c19 |
-
|
|
Karsten Hopp |
ca9c19 |
case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break;
|
|
Karsten Hopp |
ca9c19 |
case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break;
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
--- 1658,1663 ----
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 2194,2200 ****
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
e1 = POP();
|
|
Karsten Hopp |
ca9c19 |
e1.start->negated = TRUE;
|
|
Karsten Hopp |
ca9c19 |
! if (e1.start->c == NFA_MULTIBYTE || e1.start->c == NFA_COMPOSING)
|
|
Karsten Hopp |
ca9c19 |
e1.start->out1->negated = TRUE;
|
|
Karsten Hopp |
ca9c19 |
PUSH(e1);
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
--- 2147,2153 ----
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
e1 = POP();
|
|
Karsten Hopp |
ca9c19 |
e1.start->negated = TRUE;
|
|
Karsten Hopp |
ca9c19 |
! if (e1.start->c == NFA_COMPOSING)
|
|
Karsten Hopp |
ca9c19 |
e1.start->out1->negated = TRUE;
|
|
Karsten Hopp |
ca9c19 |
PUSH(e1);
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 2311,2316 ****
|
|
Karsten Hopp |
ca9c19 |
--- 2264,2279 ----
|
|
Karsten Hopp |
ca9c19 |
PUSH(frag(s, list1(&s1->out)));
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
+ case NFA_COMPOSING: /* char with composing char */
|
|
Karsten Hopp |
ca9c19 |
+ #if 0
|
|
Karsten Hopp |
ca9c19 |
+ /* TODO */
|
|
Karsten Hopp |
ca9c19 |
+ if (regflags & RF_ICOMBINE)
|
|
Karsten Hopp |
ca9c19 |
+ {
|
|
Karsten Hopp |
ca9c19 |
+ goto normalchar;
|
|
Karsten Hopp |
ca9c19 |
+ }
|
|
Karsten Hopp |
ca9c19 |
+ #endif
|
|
Karsten Hopp |
ca9c19 |
+ /* FALLTHROUGH */
|
|
Karsten Hopp |
ca9c19 |
+
|
|
Karsten Hopp |
ca9c19 |
case NFA_MOPEN + 0: /* Submatch */
|
|
Karsten Hopp |
ca9c19 |
case NFA_MOPEN + 1:
|
|
Karsten Hopp |
ca9c19 |
case NFA_MOPEN + 2:
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 2322,2329 ****
|
|
Karsten Hopp |
ca9c19 |
case NFA_MOPEN + 8:
|
|
Karsten Hopp |
ca9c19 |
case NFA_MOPEN + 9:
|
|
Karsten Hopp |
ca9c19 |
case NFA_NOPEN: /* \%( "Invisible Submatch" */
|
|
Karsten Hopp |
ca9c19 |
- case NFA_MULTIBYTE: /* mbyte char */
|
|
Karsten Hopp |
ca9c19 |
- case NFA_COMPOSING: /* composing char */
|
|
Karsten Hopp |
ca9c19 |
if (nfa_calc_size == TRUE)
|
|
Karsten Hopp |
ca9c19 |
{
|
|
Karsten Hopp |
ca9c19 |
nstate += 2;
|
|
Karsten Hopp |
ca9c19 |
--- 2285,2290 ----
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 2336,2344 ****
|
|
Karsten Hopp |
ca9c19 |
case NFA_NOPEN:
|
|
Karsten Hopp |
ca9c19 |
mclose = NFA_NCLOSE;
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
- case NFA_MULTIBYTE:
|
|
Karsten Hopp |
ca9c19 |
- mclose = NFA_END_MULTIBYTE;
|
|
Karsten Hopp |
ca9c19 |
- break;
|
|
Karsten Hopp |
ca9c19 |
case NFA_COMPOSING:
|
|
Karsten Hopp |
ca9c19 |
mclose = NFA_END_COMPOSING;
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
--- 2297,2302 ----
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 2377,2385 ****
|
|
Karsten Hopp |
ca9c19 |
goto theend;
|
|
Karsten Hopp |
ca9c19 |
patch(e.out, s1);
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
! if (mopen == NFA_MULTIBYTE || mopen == NFA_COMPOSING)
|
|
Karsten Hopp |
ca9c19 |
! /* MULTIBYTE->out1 = END_MULTIBYTE
|
|
Karsten Hopp |
ca9c19 |
! * COMPOSING->out1 = END_COMPOSING */
|
|
Karsten Hopp |
ca9c19 |
patch(list1(&s->out1), s1);
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
PUSH(frag(s, list1(&s1->out)));
|
|
Karsten Hopp |
ca9c19 |
--- 2335,2342 ----
|
|
Karsten Hopp |
ca9c19 |
goto theend;
|
|
Karsten Hopp |
ca9c19 |
patch(e.out, s1);
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
! if (mopen == NFA_COMPOSING)
|
|
Karsten Hopp |
ca9c19 |
! /* COMPOSING->out1 = END_COMPOSING */
|
|
Karsten Hopp |
ca9c19 |
patch(list1(&s->out1), s1);
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
PUSH(frag(s, list1(&s1->out)));
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 2540,2556 ****
|
|
Karsten Hopp |
ca9c19 |
case NFA_COMPOSING:
|
|
Karsten Hopp |
ca9c19 |
/* nfa_regmatch() will match all the bytes of this composing char. */
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
-
|
|
Karsten Hopp |
ca9c19 |
- case NFA_MULTIBYTE:
|
|
Karsten Hopp |
ca9c19 |
- /* nfa_regmatch() will match all the bytes of this multibyte char. */
|
|
Karsten Hopp |
ca9c19 |
- break;
|
|
Karsten Hopp |
ca9c19 |
#endif
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
- case NFA_END_MULTIBYTE:
|
|
Karsten Hopp |
ca9c19 |
- /* Successfully matched this mbyte char */
|
|
Karsten Hopp |
ca9c19 |
- addstate(l, state->out, m, off, lid, match);
|
|
Karsten Hopp |
ca9c19 |
- break;
|
|
Karsten Hopp |
ca9c19 |
-
|
|
Karsten Hopp |
ca9c19 |
case NFA_NOPEN:
|
|
Karsten Hopp |
ca9c19 |
case NFA_NCLOSE:
|
|
Karsten Hopp |
ca9c19 |
addstate(l, state->out, m, off, lid, match);
|
|
Karsten Hopp |
ca9c19 |
--- 2497,2504 ----
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 2841,2847 ****
|
|
Karsten Hopp |
ca9c19 |
regsub_T *submatch;
|
|
Karsten Hopp |
ca9c19 |
regsub_T *m;
|
|
Karsten Hopp |
ca9c19 |
{
|
|
Karsten Hopp |
ca9c19 |
! int c = -1;
|
|
Karsten Hopp |
ca9c19 |
int n;
|
|
Karsten Hopp |
ca9c19 |
int i = 0;
|
|
Karsten Hopp |
ca9c19 |
int result;
|
|
Karsten Hopp |
ca9c19 |
--- 2789,2795 ----
|
|
Karsten Hopp |
ca9c19 |
regsub_T *submatch;
|
|
Karsten Hopp |
ca9c19 |
regsub_T *m;
|
|
Karsten Hopp |
ca9c19 |
{
|
|
Karsten Hopp |
ca9c19 |
! int c;
|
|
Karsten Hopp |
ca9c19 |
int n;
|
|
Karsten Hopp |
ca9c19 |
int i = 0;
|
|
Karsten Hopp |
ca9c19 |
int result;
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 2859,2865 ****
|
|
Karsten Hopp |
ca9c19 |
List *listtbl[2][2];
|
|
Karsten Hopp |
ca9c19 |
List *ll;
|
|
Karsten Hopp |
ca9c19 |
int listid = 1;
|
|
Karsten Hopp |
ca9c19 |
- int endnode;
|
|
Karsten Hopp |
ca9c19 |
List *thislist;
|
|
Karsten Hopp |
ca9c19 |
List *nextlist;
|
|
Karsten Hopp |
ca9c19 |
List *neglist;
|
|
Karsten Hopp |
ca9c19 |
--- 2807,2812 ----
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 3190,3222 ****
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
! case NFA_MULTIBYTE:
|
|
Karsten Hopp |
ca9c19 |
case NFA_COMPOSING:
|
|
Karsten Hopp |
ca9c19 |
! endnode = t->state->c + 1;
|
|
Karsten Hopp |
ca9c19 |
result = OK;
|
|
Karsten Hopp |
ca9c19 |
sta = t->state->out;
|
|
Karsten Hopp |
ca9c19 |
! len = 1;
|
|
Karsten Hopp |
ca9c19 |
! while (sta->c != endnode && len <= n)
|
|
Karsten Hopp |
ca9c19 |
{
|
|
Karsten Hopp |
ca9c19 |
! if (reginput[len-1] != sta->c)
|
|
Karsten Hopp |
ca9c19 |
! {
|
|
Karsten Hopp |
ca9c19 |
! result = FAIL;
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
! }
|
|
Karsten Hopp |
ca9c19 |
! len++;
|
|
Karsten Hopp |
ca9c19 |
sta = sta->out;
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
/* if input char length doesn't match regexp char length */
|
|
Karsten Hopp |
ca9c19 |
! if (len -1 < n || sta->c != endnode)
|
|
Karsten Hopp |
ca9c19 |
result = FAIL;
|
|
Karsten Hopp |
ca9c19 |
! end = t->state->out1; /* NFA_END_MULTIBYTE or
|
|
Karsten Hopp |
ca9c19 |
! NFA_END_COMPOSING */
|
|
Karsten Hopp |
ca9c19 |
/* If \Z was present, then ignore composing characters */
|
|
Karsten Hopp |
ca9c19 |
! if (ireg_icombine && endnode == NFA_END_COMPOSING)
|
|
Karsten Hopp |
ca9c19 |
result = 1 ^ sta->negated;
|
|
Karsten Hopp |
ca9c19 |
ADD_POS_NEG_STATE(end);
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
case NFA_NEWL:
|
|
Karsten Hopp |
ca9c19 |
if (!reg_line_lbr && REG_MULTI
|
|
Karsten Hopp |
ca9c19 |
--- 3137,3171 ----
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
! #ifdef FEAT_MBYTE
|
|
Karsten Hopp |
ca9c19 |
case NFA_COMPOSING:
|
|
Karsten Hopp |
ca9c19 |
! {
|
|
Karsten Hopp |
ca9c19 |
! int mc = c;
|
|
Karsten Hopp |
ca9c19 |
!
|
|
Karsten Hopp |
ca9c19 |
result = OK;
|
|
Karsten Hopp |
ca9c19 |
sta = t->state->out;
|
|
Karsten Hopp |
ca9c19 |
! len = 0;
|
|
Karsten Hopp |
ca9c19 |
! while (sta->c != NFA_END_COMPOSING && len < n)
|
|
Karsten Hopp |
ca9c19 |
{
|
|
Karsten Hopp |
ca9c19 |
! if (len > 0)
|
|
Karsten Hopp |
ca9c19 |
! mc = mb_ptr2char(reginput + len);
|
|
Karsten Hopp |
ca9c19 |
! if (mc != sta->c)
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
! len += mb_char2len(mc);
|
|
Karsten Hopp |
ca9c19 |
sta = sta->out;
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
/* if input char length doesn't match regexp char length */
|
|
Karsten Hopp |
ca9c19 |
! if (len < n || sta->c != NFA_END_COMPOSING)
|
|
Karsten Hopp |
ca9c19 |
result = FAIL;
|
|
Karsten Hopp |
ca9c19 |
! end = t->state->out1; /* NFA_END_COMPOSING */
|
|
Karsten Hopp |
ca9c19 |
/* If \Z was present, then ignore composing characters */
|
|
Karsten Hopp |
ca9c19 |
! if (ireg_icombine)
|
|
Karsten Hopp |
ca9c19 |
result = 1 ^ sta->negated;
|
|
Karsten Hopp |
ca9c19 |
ADD_POS_NEG_STATE(end);
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
+ }
|
|
Karsten Hopp |
ca9c19 |
+ #endif
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
case NFA_NEWL:
|
|
Karsten Hopp |
ca9c19 |
if (!reg_line_lbr && REG_MULTI
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 3425,3430 ****
|
|
Karsten Hopp |
ca9c19 |
--- 3374,3387 ----
|
|
Karsten Hopp |
ca9c19 |
if (!result)
|
|
Karsten Hopp |
ca9c19 |
result = ireg_ic == TRUE
|
|
Karsten Hopp |
ca9c19 |
&& MB_TOLOWER(t->state->c) == MB_TOLOWER(c);
|
|
Karsten Hopp |
ca9c19 |
+ #ifdef FEAT_MBYTE
|
|
Karsten Hopp |
ca9c19 |
+ /* If there is a composing character which is not being
|
|
Karsten Hopp |
ca9c19 |
+ * ignored there can be no match. Match with composing
|
|
Karsten Hopp |
ca9c19 |
+ * character uses NFA_COMPOSING above. */
|
|
Karsten Hopp |
ca9c19 |
+ if (result && enc_utf8 && !ireg_icombine
|
|
Karsten Hopp |
ca9c19 |
+ && n != utf_char2len(c))
|
|
Karsten Hopp |
ca9c19 |
+ result = FALSE;
|
|
Karsten Hopp |
ca9c19 |
+ #endif
|
|
Karsten Hopp |
ca9c19 |
ADD_POS_NEG_STATE(t->state);
|
|
Karsten Hopp |
ca9c19 |
break;
|
|
Karsten Hopp |
ca9c19 |
}
|
|
Karsten Hopp |
ca9c19 |
*** ../vim-7.3.1010/src/testdir/test95.in 2013-05-24 20:25:28.000000000 +0200
|
|
Karsten Hopp |
ca9c19 |
--- src/testdir/test95.in 2013-05-24 20:45:08.000000000 +0200
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 35,40 ****
|
|
Karsten Hopp |
ca9c19 |
--- 35,44 ----
|
|
Karsten Hopp |
ca9c19 |
:call add(tl, ['\f\+', '&*fname ', 'fname'])
|
|
Karsten Hopp |
ca9c19 |
:call add(tl, ['\%#=1\f\+', '&*fname ', 'fname'])
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
+ :"""" Test composing character matching
|
|
Karsten Hopp |
ca9c19 |
+ :call add(tl, ['.ม', 'xม่x yมy', 'yม'])
|
|
Karsten Hopp |
ca9c19 |
+ :call add(tl, ['.ม่', 'xม่x yมy', 'xม่'])
|
|
Karsten Hopp |
ca9c19 |
+
|
|
Karsten Hopp |
ca9c19 |
:"""" Test \Z
|
|
Karsten Hopp |
ca9c19 |
:call add(tl, ['ú\Z', 'x'])
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
*** ../vim-7.3.1010/src/testdir/test95.ok 2013-05-24 20:25:28.000000000 +0200
|
|
Karsten Hopp |
ca9c19 |
--- src/testdir/test95.ok 2013-05-24 20:44:41.000000000 +0200
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 9,13 ****
|
|
Karsten Hopp |
ca9c19 |
--- 9,15 ----
|
|
Karsten Hopp |
ca9c19 |
OK - \%#=1\i\+
|
|
Karsten Hopp |
ca9c19 |
OK - \f\+
|
|
Karsten Hopp |
ca9c19 |
OK - \%#=1\f\+
|
|
Karsten Hopp |
ca9c19 |
+ OK - .ม
|
|
Karsten Hopp |
ca9c19 |
+ OK - .ม่
|
|
Karsten Hopp |
ca9c19 |
OK - ú\Z
|
|
Karsten Hopp |
ca9c19 |
OK - [^[=a=]]\+
|
|
Karsten Hopp |
ca9c19 |
*** ../vim-7.3.1010/src/version.c 2013-05-24 20:25:28.000000000 +0200
|
|
Karsten Hopp |
ca9c19 |
--- src/version.c 2013-05-24 21:56:02.000000000 +0200
|
|
Karsten Hopp |
ca9c19 |
***************
|
|
Karsten Hopp |
ca9c19 |
*** 730,731 ****
|
|
Karsten Hopp |
ca9c19 |
--- 730,733 ----
|
|
Karsten Hopp |
ca9c19 |
{ /* Add new patch number below this line */
|
|
Karsten Hopp |
ca9c19 |
+ /**/
|
|
Karsten Hopp |
ca9c19 |
+ 1011,
|
|
Karsten Hopp |
ca9c19 |
/**/
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
--
|
|
Karsten Hopp |
ca9c19 |
If you had to identify, in one word, the reason why the
|
|
Karsten Hopp |
ca9c19 |
human race has not achieved, and never will achieve, its
|
|
Karsten Hopp |
ca9c19 |
full potential, that word would be "meetings."
|
|
Karsten Hopp |
ca9c19 |
|
|
Karsten Hopp |
ca9c19 |
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
|
|
Karsten Hopp |
ca9c19 |
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
|
|
Karsten Hopp |
ca9c19 |
\\\ an exciting new programming language -- http://www.Zimbu.org ///
|
|
Karsten Hopp |
ca9c19 |
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///
|