Patch 7.3.1111
Problem: nfa_recognize_char_class() implementation is inefficient.
Solution: Use bits in an int instead of chars in a string. (Dominique Pelle)
Files: src/regexp_nfa.c, src/testdir/test36.in, src/testdir/test36.ok
Comment: Karsten Hopp: manually redone patch as it didn't apply
diff -up vim73/src/regexp_nfa.c.1111 vim73/src/regexp_nfa.c
--- vim73/src/regexp_nfa.c.1111 2013-07-09 14:56:29.355057017 +0200
+++ vim73/src/regexp_nfa.c 2013-07-09 15:04:06.283217016 +0200
@@ -380,38 +380,18 @@ nfa_recognize_char_class(start, end, ext
char_u *end;
int extra_newl;
{
- int i;
- /* Each of these variables takes up a char in "config[]",
- * in the order they are here. */
- int not = FALSE, af = FALSE, AF = FALSE, az = FALSE, AZ = FALSE,
- o7 = FALSE, o9 = FALSE, underscore = FALSE, newl = FALSE;
+# define CLASS_not 0x80
+# define CLASS_af 0x40
+# define CLASS_AF 0x20
+# define CLASS_az 0x10
+# define CLASS_AZ 0x08
+# define CLASS_o7 0x04
+# define CLASS_o9 0x02
+# define CLASS_underscore 0x01
+
+ int newl = FALSE;
char_u *p;
-#define NCONFIGS 16
- int classid[NCONFIGS] = {
- NFA_DIGIT, NFA_NDIGIT, NFA_HEX, NFA_NHEX,
- NFA_OCTAL, NFA_NOCTAL, NFA_WORD, NFA_NWORD,
- NFA_HEAD, NFA_NHEAD, NFA_ALPHA, NFA_NALPHA,
- NFA_LOWER, NFA_NLOWER, NFA_UPPER, NFA_NUPPER
- };
- char_u myconfig[10];
- char_u config[NCONFIGS][9] = {
- "000000100", /* digit */
- "100000100", /* non digit */
- "011000100", /* hex-digit */
- "111000100", /* non hex-digit */
- "000001000", /* octal-digit */
- "100001000", /* [^0-7] */
- "000110110", /* [0-9A-Za-z_] */
- "100110110", /* [^0-9A-Za-z_] */
- "000110010", /* head of word */
- "100110010", /* not head of word */
- "000110000", /* alphabetic char a-z */
- "100110000", /* non alphabetic char */
- "000100000", /* lowercase letter */
- "100100000", /* non lowercase */
- "000010000", /* uppercase */
- "100010000" /* non uppercase */
- };
+ int config = 0;
if (extra_newl == TRUE)
newl = TRUE;
@@ -421,7 +401,7 @@ nfa_recognize_char_class(start, end, ext
p = start;
if (*p == '^')
{
- not = TRUE;
+ config |= CLASS_not;
p++;
}
@@ -434,37 +414,37 @@ nfa_recognize_char_class(start, end, ext
case '0':
if (*(p + 2) == '9')
{
- o9 = TRUE;
+ config |= CLASS_o9;
break;
}
else
if (*(p + 2) == '7')
{
- o7 = TRUE;
+ config |= CLASS_o7;
break;
}
case 'a':
if (*(p + 2) == 'z')
{
- az = TRUE;
+ config |= CLASS_az;
break;
}
else
if (*(p + 2) == 'f')
{
- af = TRUE;
+ config |= CLASS_af;
break;
}
case 'A':
if (*(p + 2) == 'Z')
{
- AZ = TRUE;
+ config |= CLASS_AZ;
break;
}
else
if (*(p + 2) == 'F')
{
- AF = TRUE;
+ config |= CLASS_AF;
break;
}
/* FALLTHROUGH */
@@ -480,7 +460,7 @@ nfa_recognize_char_class(start, end, ext
}
else if (*p == '_')
{
- underscore = TRUE;
+ config |= CLASS_underscore;
p ++;
}
else if (*p == '\n')
@@ -495,38 +475,45 @@ nfa_recognize_char_class(start, end, ext
if (p != end)
return FAIL;
- /* build the config that represents the ranges we gathered */
- STRCPY(myconfig, "000000000");
- if (not == TRUE)
- myconfig[0] = '1';
- if (af == TRUE)
- myconfig[1] = '1';
- if (AF == TRUE)
- myconfig[2] = '1';
- if (az == TRUE)
- myconfig[3] = '1';
- if (AZ == TRUE)
- myconfig[4] = '1';
- if (o7 == TRUE)
- myconfig[5] = '1';
- if (o9 == TRUE)
- myconfig[6] = '1';
- if (underscore == TRUE)
- myconfig[7] = '1';
if (newl == TRUE)
- {
- myconfig[8] = '1';
extra_newl = ADD_NL;
- }
- /* try to recognize character classes */
- for (i = 0; i < NCONFIGS; i++)
- if (STRNCMP(myconfig, config[i], 8) == 0)
- return classid[i] + extra_newl;
- /* fallthrough => no success so far */
+ switch (config)
+ {
+ case CLASS_o9:
+ return extra_newl + NFA_DIGIT;
+ case CLASS_not | CLASS_o9:
+ return extra_newl + NFA_NDIGIT;
+ case CLASS_af | CLASS_AF | CLASS_o9:
+ return extra_newl + NFA_HEX;
+ case CLASS_not | CLASS_af | CLASS_AF | CLASS_o9:
+ return extra_newl + NFA_NHEX;
+ case CLASS_o7:
+ return extra_newl + NFA_OCTAL;
+ case CLASS_not | CLASS_o7:
+ return extra_newl + NFA_NOCTAL;
+ case CLASS_az | CLASS_AZ | CLASS_o9 | CLASS_underscore:
+ return extra_newl + NFA_WORD;
+ case CLASS_not | CLASS_az | CLASS_AZ | CLASS_o9 | CLASS_underscore:
+ return extra_newl + NFA_NWORD;
+ case CLASS_az | CLASS_AZ | CLASS_underscore:
+ return extra_newl + NFA_HEAD;
+ case CLASS_not | CLASS_az | CLASS_AZ | CLASS_underscore:
+ return extra_newl + NFA_NHEAD;
+ case CLASS_az | CLASS_AZ:
+ return extra_newl + NFA_ALPHA;
+ case CLASS_not | CLASS_az | CLASS_AZ:
+ return extra_newl + NFA_NALPHA;
+ case CLASS_az:
+ return extra_newl + NFA_LOWER;
+ case CLASS_not | CLASS_az:
+ return extra_newl + NFA_NLOWER;
+ case CLASS_AZ:
+ return extra_newl + NFA_UPPER;
+ case CLASS_not | CLASS_AZ:
+ return extra_newl + NFA_NUPPER;
+ }
return FAIL;
-
-#undef NCONFIGS
}
/*
@@ -900,7 +887,7 @@ nfa_regatom()
EMSG_RET_FAIL(_(e_z1_not_allowed));
EMIT(NFA_ZREF1 + (no_Magic(c) - '1'));
/* No need to set nfa_has_backref, the sub-matches don't
- * change when \z1 .. \z9 maches or not. */
+ * change when \z1 .. \z9 matches or not. */
re_has_z = REX_USE;
break;
case '(':
@@ -4658,7 +4645,7 @@ nfa_regmatch(prog, start, submatch, m)
}
else
{
- /* skip ofer the matched characters, set character
+ /* skip over the matched characters, set character
* count in NFA_SKIP */
ll = nextlist;
add_state = t->state->out;
diff -up vim73/src/testdir/test36.in.1111 vim73/src/testdir/test36.in
--- vim73/src/testdir/test36.in.1111 2013-07-09 15:04:36.243178056 +0200
+++ vim73/src/testdir/test36.in 2013-07-09 15:06:54.077397744 +0200
@@ -1,40 +1,105 @@
-Test character classes in regexp
+Test character classes in regexp using regexpengine 0, 1, 2.
STARTTEST
-/^start-here
-j:s/\d//g
-j:s/\D//g
-j:s/\o//g
-j:s/\O//g
-j:s/\x//g
-j:s/\X//g
-j:s/\w//g
-j:s/\W//g
-j:s/\h//g
-j:s/\H//g
-j:s/\a//g
-j:s/\A//g
-j:s/\l//g
-j:s/\L//g
-j:s/\u//g
-j:s/\U//g
+/^start-here/+1
+Y:s/\%#=0\d//g
+p:s/\%#=1\d//g
+p:s/\%#=2\d//g
+p:s/\%#=0[0-9]//g
+p:s/\%#=1[0-9]//g
+p:s/\%#=2[0-9]//g
+p:s/\%#=0\D//g
+p:s/\%#=1\D//g
+p:s/\%#=2\D//g
+p:s/\%#=0[^0-9]//g
+p:s/\%#=1[^0-9]//g
+p:s/\%#=2[^0-9]//g
+p:s/\%#=0\o//g
+p:s/\%#=1\o//g
+p:s/\%#=2\o//g
+p:s/\%#=0[0-7]//g
+p:s/\%#=1[0-7]//g
+p:s/\%#=2[0-7]//g
+p:s/\%#=0\O//g
+p:s/\%#=1\O//g
+p:s/\%#=2\O//g
+p:s/\%#=0[^0-7]//g
+p:s/\%#=1[^0-7]//g
+p:s/\%#=2[^0-7]//g
+p:s/\%#=0\x//g
+p:s/\%#=1\x//g
+p:s/\%#=2\x//g
+p:s/\%#=0[0-9A-Fa-f]//g
+p:s/\%#=1[0-9A-Fa-f]//g
+p:s/\%#=2[0-9A-Fa-f]//g
+p:s/\%#=0\X//g
+p:s/\%#=1\X//g
+p:s/\%#=2\X//g
+p:s/\%#=0[^0-9A-Fa-f]//g
+p:s/\%#=1[^0-9A-Fa-f]//g
+p:s/\%#=2[^0-9A-Fa-f]//g
+p:s/\%#=0\w//g
+p:s/\%#=1\w//g
+p:s/\%#=2\w//g
+p:s/\%#=0[0-9A-Za-z_]//g
+p:s/\%#=1[0-9A-Za-z_]//g
+p:s/\%#=2[0-9A-Za-z_]//g
+p:s/\%#=0\W//g
+p:s/\%#=1\W//g
+p:s/\%#=2\W//g
+p:s/\%#=0[^0-9A-Za-z_]//g
+p:s/\%#=1[^0-9A-Za-z_]//g
+p:s/\%#=2[^0-9A-Za-z_]//g
+p:s/\%#=0\h//g
+p:s/\%#=1\h//g
+p:s/\%#=2\h//g
+p:s/\%#=0[A-Za-z_]//g
+p:s/\%#=1[A-Za-z_]//g
+p:s/\%#=2[A-Za-z_]//g
+p:s/\%#=0\H//g
+p:s/\%#=1\H//g
+p:s/\%#=2\H//g
+p:s/\%#=0[^A-Za-z_]//g
+p:s/\%#=1[^A-Za-z_]//g
+p:s/\%#=2[^A-Za-z_]//g
+p:s/\%#=0\a//g
+p:s/\%#=1\a//g
+p:s/\%#=2\a//g
+p:s/\%#=0[A-Za-z]//g
+p:s/\%#=1[A-Za-z]//g
+p:s/\%#=2[A-Za-z]//g
+p:s/\%#=0\A//g
+p:s/\%#=1\A//g
+p:s/\%#=2\A//g
+p:s/\%#=0[^A-Za-z]//g
+p:s/\%#=1[^A-Za-z]//g
+p:s/\%#=2[^A-Za-z]//g
+p:s/\%#=0\l//g
+p:s/\%#=1\l//g
+p:s/\%#=2\l//g
+p:s/\%#=0[a-z]//g
+p:s/\%#=1[a-z]//g
+p:s/\%#=2[a-z]//g
+p:s/\%#=0\L//g
+p:s/\%#=1\L//g
+p:s/\%#=2\L//g
+p:s/\%#=0[^a-z]//g
+p:s/\%#=1[^a-z]//g
+p:s/\%#=2[^a-z]//g
+p:s/\%#=0\u//g
+p:s/\%#=1\u//g
+p:s/\%#=2\u//g
+p:s/\%#=0[A-Z]//g
+p:s/\%#=1[A-Z]//g
+p:s/\%#=2[A-Z]//g
+p:s/\%#=0\U//g
+p:s/\%#=1\U//g
+p:s/\%#=2\U//g
+p:s/\%#=0[^A-Z]//g
+p:s/\%#=1[^A-Z]//g
+p:s/\%#=2[^A-Z]//g
:/^start-here/+1,$wq! test.out
ENDTEST
start-here
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
-
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
diff -up vim73/src/testdir/test36.ok.1111 vim73/src/testdir/test36.ok
--- vim73/src/testdir/test36.ok.1111 2013-07-09 15:07:04.737028021 +0200
+++ vim73/src/testdir/test36.ok 2013-07-09 15:10:07.822677205 +0200
@@ -1,16 +1,96 @@
!"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+0123456789
+0123456789
+0123456789
+0123456789
+0123456789
0123456789
!"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+01234567
+01234567
+01234567
+01234567
+01234567
01234567
!"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~¦±¼ÇÓé
+0123456789ABCDEFabcdef
+0123456789ABCDEFabcdef
+0123456789ABCDEFabcdef
+0123456789ABCDEFabcdef
+0123456789ABCDEFabcdef
0123456789ABCDEFabcdef
!"#$%&'()#+'-./:;<=>?@[\]^`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@[\]^`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@[\]^`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@[\]^`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@[\]^`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./:;<=>?@[\]^`{|}~¦±¼ÇÓé
+0123456789ABCDEFGHIXYZ_abcdefghiwxyz
+0123456789ABCDEFGHIXYZ_abcdefghiwxyz
+0123456789ABCDEFGHIXYZ_abcdefghiwxyz
+0123456789ABCDEFGHIXYZ_abcdefghiwxyz
+0123456789ABCDEFGHIXYZ_abcdefghiwxyz
0123456789ABCDEFGHIXYZ_abcdefghiwxyz
!"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~¦±¼ÇÓé
+ABCDEFGHIXYZ_abcdefghiwxyz
+ABCDEFGHIXYZ_abcdefghiwxyz
+ABCDEFGHIXYZ_abcdefghiwxyz
+ABCDEFGHIXYZ_abcdefghiwxyz
+ABCDEFGHIXYZ_abcdefghiwxyz
ABCDEFGHIXYZ_abcdefghiwxyz
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~¦±¼ÇÓé
+ABCDEFGHIXYZabcdefghiwxyz
+ABCDEFGHIXYZabcdefghiwxyz
+ABCDEFGHIXYZabcdefghiwxyz
+ABCDEFGHIXYZabcdefghiwxyz
+ABCDEFGHIXYZabcdefghiwxyz
ABCDEFGHIXYZabcdefghiwxyz
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~¦±¼ÇÓé
+abcdefghiwxyz
+abcdefghiwxyz
+abcdefghiwxyz
+abcdefghiwxyz
+abcdefghiwxyz
abcdefghiwxyz
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+
!"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~¦±¼ÇÓé
+ABCDEFGHIXYZ
+ABCDEFGHIXYZ
+ABCDEFGHIXYZ
+ABCDEFGHIXYZ
+ABCDEFGHIXYZ
ABCDEFGHIXYZ
diff -up vim73/src/version.c.1111 vim73/src/version.c
--- vim73/src/version.c.1111 2013-07-09 15:10:34.038767743 +0200
+++ vim73/src/version.c 2013-07-09 15:11:04.404714295 +0200
@@ -729,6 +729,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 1111,
+/**/
1110,
/**/
1109,