To: vim_dev@googlegroups.com
Subject: Patch 7.3.1073
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------
Patch 7.3.1073
Problem: New regexp engine may run out of states.
Solution: Allocate states dynamically. Also make the test report errors.
Files: src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok,
src/testdir/test95.in
*** ../vim-7.3.1072/src/regexp_nfa.c 2013-05-30 17:49:19.000000000 +0200
--- src/regexp_nfa.c 2013-05-30 18:36:12.000000000 +0200
***************
*** 233,239 ****
/* helper functions used when doing re2post() ... regatom() parsing */
#define EMIT(c) do { \
! if (post_ptr >= post_end) \
return FAIL; \
*post_ptr++ = c; \
} while (0)
--- 233,239 ----
/* helper functions used when doing re2post() ... regatom() parsing */
#define EMIT(c) do { \
! if (post_ptr >= post_end && realloc_post_list() == FAIL) \
return FAIL; \
*post_ptr++ = c; \
} while (0)
***************
*** 256,266 ****
nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER;
/* Some items blow up in size, such as [A-z]. Add more space for that.
! * TODO: some patterns may still fail. */
nstate_max += 1000;
/* Size for postfix representation of expr. */
! postfix_size = sizeof(*post_start) * nstate_max;
post_start = (int *)lalloc(postfix_size, TRUE);
if (post_start == NULL)
--- 256,266 ----
nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER;
/* Some items blow up in size, such as [A-z]. Add more space for that.
! * When it is still not enough realloc_post_list() will be used. */
nstate_max += 1000;
/* Size for postfix representation of expr. */
! postfix_size = sizeof(int) * nstate_max;
post_start = (int *)lalloc(postfix_size, TRUE);
if (post_start == NULL)
***************
*** 277,282 ****
--- 277,307 ----
}
/*
+ * Allocate more space for post_start. Called when
+ * running above the estimated number of states.
+ */
+ static int
+ realloc_post_list()
+ {
+ int nstate_max = post_end - post_start;
+ int new_max = nstate_max + 1000;
+ int *new_start;
+ int *old_start;
+
+ new_start = (int *)lalloc(new_max * sizeof(int), TRUE);
+ if (new_start == NULL)
+ return FAIL;
+ mch_memmove(new_start, post_start, nstate_max * sizeof(int));
+ vim_memset(new_start + nstate_max, 0, 1000 * sizeof(int));
+ old_start = post_start;
+ post_start = new_start;
+ post_ptr = new_start + (post_ptr - old_start);
+ post_end = post_start + new_max;
+ vim_free(old_start);
+ return OK;
+ }
+
+ /*
* Search between "start" and "end" and try to recognize a
* character class in expanded form. For example [0-9].
* On success, return the id the character class to be emitted.
***************
*** 1306,1312 ****
int greedy = TRUE; /* Braces are prefixed with '-' ? */
char_u *old_regparse, *new_regparse;
int c2;
! int *old_post_ptr, *my_post_start;
int old_regnpar;
int quest;
--- 1331,1338 ----
int greedy = TRUE; /* Braces are prefixed with '-' ? */
char_u *old_regparse, *new_regparse;
int c2;
! int old_post_pos;
! int my_post_start;
int old_regnpar;
int quest;
***************
*** 1317,1323 ****
* <atom>{m,n} is next */
old_regnpar = regnpar;
/* store current pos in the postfix form, for \{m,n} involving 0s */
! my_post_start = post_ptr;
ret = nfa_regatom();
if (ret == FAIL)
--- 1343,1349 ----
* <atom>{m,n} is next */
old_regnpar = regnpar;
/* store current pos in the postfix form, for \{m,n} involving 0s */
! my_post_start = (int)(post_ptr - post_start);
ret = nfa_regatom();
if (ret == FAIL)
***************
*** 1430,1443 ****
if (maxval == 0)
{
/* Ignore result of previous call to nfa_regatom() */
! post_ptr = my_post_start;
/* NFA_SKIP_CHAR has 0-length and works everywhere */
EMIT(NFA_SKIP_CHAR);
return OK;
}
/* Ignore previous call to nfa_regatom() */
! post_ptr = my_post_start;
/* Save pos after the repeated atom and the \{} */
new_regparse = regparse;
--- 1456,1469 ----
if (maxval == 0)
{
/* Ignore result of previous call to nfa_regatom() */
! post_ptr = post_start + my_post_start;
/* NFA_SKIP_CHAR has 0-length and works everywhere */
EMIT(NFA_SKIP_CHAR);
return OK;
}
/* Ignore previous call to nfa_regatom() */
! post_ptr = post_start + my_post_start;
/* Save pos after the repeated atom and the \{} */
new_regparse = regparse;
***************
*** 1449,1461 ****
curchr = -1;
/* Restore count of parenthesis */
regnpar = old_regnpar;
! old_post_ptr = post_ptr;
if (nfa_regatom() == FAIL)
return FAIL;
/* after "minval" times, atoms are optional */
if (i + 1 > minval)
EMIT(quest);
! if (old_post_ptr != my_post_start)
EMIT(NFA_CONCAT);
}
--- 1475,1487 ----
curchr = -1;
/* Restore count of parenthesis */
regnpar = old_regnpar;
! old_post_pos = (int)(post_ptr - post_start);
if (nfa_regatom() == FAIL)
return FAIL;
/* after "minval" times, atoms are optional */
if (i + 1 > minval)
EMIT(quest);
! if (old_post_pos != my_post_start)
EMIT(NFA_CONCAT);
}
***************
*** 1572,1580 ****
nfa_regbranch()
{
int ch;
! int *old_post_ptr;
! old_post_ptr = post_ptr;
/* First branch, possibly the only one */
if (nfa_regconcat() == FAIL)
--- 1598,1606 ----
nfa_regbranch()
{
int ch;
! int old_post_pos;
! old_post_pos = (int)(post_ptr - post_start);
/* First branch, possibly the only one */
if (nfa_regconcat() == FAIL)
***************
*** 1587,1604 ****
skipchr();
EMIT(NFA_NOPEN);
EMIT(NFA_PREV_ATOM_NO_WIDTH);
! old_post_ptr = post_ptr;
if (nfa_regconcat() == FAIL)
return FAIL;
/* if concat is empty, skip a input char. But do emit a node */
! if (old_post_ptr == post_ptr)
EMIT(NFA_SKIP_CHAR);
EMIT(NFA_CONCAT);
ch = peekchr();
}
/* Even if a branch is empty, emit one node for it */
! if (old_post_ptr == post_ptr)
EMIT(NFA_SKIP_CHAR);
return OK;
--- 1613,1630 ----
skipchr();
EMIT(NFA_NOPEN);
EMIT(NFA_PREV_ATOM_NO_WIDTH);
! old_post_pos = (int)(post_ptr - post_start);
if (nfa_regconcat() == FAIL)
return FAIL;
/* if concat is empty, skip a input char. But do emit a node */
! if (old_post_pos == (int)(post_ptr - post_start))
EMIT(NFA_SKIP_CHAR);
EMIT(NFA_CONCAT);
ch = peekchr();
}
/* Even if a branch is empty, emit one node for it */
! if (old_post_pos == (int)(post_ptr - post_start))
EMIT(NFA_SKIP_CHAR);
return OK;
*** ../vim-7.3.1072/src/testdir/test64.in 2013-05-30 17:05:34.000000000 +0200
--- src/testdir/test64.in 2013-05-30 18:38:49.000000000 +0200
***************
*** 348,353 ****
--- 348,356 ----
:call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"])
:call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"])
:"
+ :"""" Requiring lots of states.
+ :call add(tl, [0, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"])
+ :"
:"
:"""" Run the tests
:"
***************
*** 361,367 ****
: continue
: endif
: let ®expengine = engine
! : let l = matchlist(text, pat)
:" check the match itself
: if len(l) == 0 && len(t) > matchidx
: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
--- 364,374 ----
: continue
: endif
: let ®expengine = engine
! : try
! : let l = matchlist(text, pat)
! : catch
! : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
! : endtry
:" check the match itself
: if len(l) == 0 && len(t) > matchidx
: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
*** ../vim-7.3.1072/src/testdir/test64.ok 2013-05-30 17:05:34.000000000 +0200
--- src/testdir/test64.ok 2013-05-30 18:42:43.000000000 +0200
***************
*** 740,745 ****
--- 740,747 ----
OK 0 - \_[^a]\+
OK 1 - \_[^a]\+
OK 2 - \_[^a]\+
+ OK 0 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
+ OK 1 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
192.168.0.1
192.168.0.1
192.168.0.1
*** ../vim-7.3.1072/src/testdir/test95.in 2013-05-26 15:14:49.000000000 +0200
--- src/testdir/test95.in 2013-05-30 18:13:59.000000000 +0200
***************
*** 85,91 ****
: continue
: endif
: let ®expengine = engine
! : let l = matchlist(text, pat)
:" check the match itself
: if len(l) == 0 && len(t) > matchidx
: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
--- 85,95 ----
: continue
: endif
: let ®expengine = engine
! : try
! : let l = matchlist(text, pat)
! : catch
! : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
! : endtry
:" check the match itself
: if len(l) == 0 && len(t) > matchidx
: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
*** ../vim-7.3.1072/src/version.c 2013-05-30 17:49:19.000000000 +0200
--- src/version.c 2013-05-30 18:43:08.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
{ /* Add new patch number below this line */
+ /**/
+ 1073,
/**/
--
How To Keep A Healthy Level Of Insanity:
17. When the money comes out the ATM, scream "I won!, I won! 3rd
time this week!!!!!"
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language -- http://www.Zimbu.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///