diff --git a/7.3.1073 b/7.3.1073 new file mode 100644 index 0000000..30ffa94 --- /dev/null +++ b/7.3.1073 @@ -0,0 +1,339 @@ +To: vim_dev@googlegroups.com +Subject: Patch 7.3.1073 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.3.1073 +Problem: New regexp engine may run out of states. +Solution: Allocate states dynamically. Also make the test report errors. +Files: src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok, + src/testdir/test95.in + + +*** ../vim-7.3.1072/src/regexp_nfa.c 2013-05-30 17:49:19.000000000 +0200 +--- src/regexp_nfa.c 2013-05-30 18:36:12.000000000 +0200 +*************** +*** 233,239 **** + + /* helper functions used when doing re2post() ... regatom() parsing */ + #define EMIT(c) do { \ +! if (post_ptr >= post_end) \ + return FAIL; \ + *post_ptr++ = c; \ + } while (0) +--- 233,239 ---- + + /* helper functions used when doing re2post() ... regatom() parsing */ + #define EMIT(c) do { \ +! if (post_ptr >= post_end && realloc_post_list() == FAIL) \ + return FAIL; \ + *post_ptr++ = c; \ + } while (0) +*************** +*** 256,266 **** + nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER; + + /* Some items blow up in size, such as [A-z]. Add more space for that. +! * TODO: some patterns may still fail. */ + nstate_max += 1000; + + /* Size for postfix representation of expr. */ +! postfix_size = sizeof(*post_start) * nstate_max; + + post_start = (int *)lalloc(postfix_size, TRUE); + if (post_start == NULL) +--- 256,266 ---- + nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER; + + /* Some items blow up in size, such as [A-z]. Add more space for that. +! * When it is still not enough realloc_post_list() will be used. */ + nstate_max += 1000; + + /* Size for postfix representation of expr. */ +! postfix_size = sizeof(int) * nstate_max; + + post_start = (int *)lalloc(postfix_size, TRUE); + if (post_start == NULL) +*************** +*** 277,282 **** +--- 277,307 ---- + } + + /* ++ * Allocate more space for post_start. Called when ++ * running above the estimated number of states. ++ */ ++ static int ++ realloc_post_list() ++ { ++ int nstate_max = post_end - post_start; ++ int new_max = nstate_max + 1000; ++ int *new_start; ++ int *old_start; ++ ++ new_start = (int *)lalloc(new_max * sizeof(int), TRUE); ++ if (new_start == NULL) ++ return FAIL; ++ mch_memmove(new_start, post_start, nstate_max * sizeof(int)); ++ vim_memset(new_start + nstate_max, 0, 1000 * sizeof(int)); ++ old_start = post_start; ++ post_start = new_start; ++ post_ptr = new_start + (post_ptr - old_start); ++ post_end = post_start + new_max; ++ vim_free(old_start); ++ return OK; ++ } ++ ++ /* + * Search between "start" and "end" and try to recognize a + * character class in expanded form. For example [0-9]. + * On success, return the id the character class to be emitted. +*************** +*** 1306,1312 **** + int greedy = TRUE; /* Braces are prefixed with '-' ? */ + char_u *old_regparse, *new_regparse; + int c2; +! int *old_post_ptr, *my_post_start; + int old_regnpar; + int quest; + +--- 1331,1338 ---- + int greedy = TRUE; /* Braces are prefixed with '-' ? */ + char_u *old_regparse, *new_regparse; + int c2; +! int old_post_pos; +! int my_post_start; + int old_regnpar; + int quest; + +*************** +*** 1317,1323 **** + * {m,n} is next */ + old_regnpar = regnpar; + /* store current pos in the postfix form, for \{m,n} involving 0s */ +! my_post_start = post_ptr; + + ret = nfa_regatom(); + if (ret == FAIL) +--- 1343,1349 ---- + * {m,n} is next */ + old_regnpar = regnpar; + /* store current pos in the postfix form, for \{m,n} involving 0s */ +! my_post_start = (int)(post_ptr - post_start); + + ret = nfa_regatom(); + if (ret == FAIL) +*************** +*** 1430,1443 **** + if (maxval == 0) + { + /* Ignore result of previous call to nfa_regatom() */ +! post_ptr = my_post_start; + /* NFA_SKIP_CHAR has 0-length and works everywhere */ + EMIT(NFA_SKIP_CHAR); + return OK; + } + + /* Ignore previous call to nfa_regatom() */ +! post_ptr = my_post_start; + /* Save pos after the repeated atom and the \{} */ + new_regparse = regparse; + +--- 1456,1469 ---- + if (maxval == 0) + { + /* Ignore result of previous call to nfa_regatom() */ +! post_ptr = post_start + my_post_start; + /* NFA_SKIP_CHAR has 0-length and works everywhere */ + EMIT(NFA_SKIP_CHAR); + return OK; + } + + /* Ignore previous call to nfa_regatom() */ +! post_ptr = post_start + my_post_start; + /* Save pos after the repeated atom and the \{} */ + new_regparse = regparse; + +*************** +*** 1449,1461 **** + curchr = -1; + /* Restore count of parenthesis */ + regnpar = old_regnpar; +! old_post_ptr = post_ptr; + if (nfa_regatom() == FAIL) + return FAIL; + /* after "minval" times, atoms are optional */ + if (i + 1 > minval) + EMIT(quest); +! if (old_post_ptr != my_post_start) + EMIT(NFA_CONCAT); + } + +--- 1475,1487 ---- + curchr = -1; + /* Restore count of parenthesis */ + regnpar = old_regnpar; +! old_post_pos = (int)(post_ptr - post_start); + if (nfa_regatom() == FAIL) + return FAIL; + /* after "minval" times, atoms are optional */ + if (i + 1 > minval) + EMIT(quest); +! if (old_post_pos != my_post_start) + EMIT(NFA_CONCAT); + } + +*************** +*** 1572,1580 **** + nfa_regbranch() + { + int ch; +! int *old_post_ptr; + +! old_post_ptr = post_ptr; + + /* First branch, possibly the only one */ + if (nfa_regconcat() == FAIL) +--- 1598,1606 ---- + nfa_regbranch() + { + int ch; +! int old_post_pos; + +! old_post_pos = (int)(post_ptr - post_start); + + /* First branch, possibly the only one */ + if (nfa_regconcat() == FAIL) +*************** +*** 1587,1604 **** + skipchr(); + EMIT(NFA_NOPEN); + EMIT(NFA_PREV_ATOM_NO_WIDTH); +! old_post_ptr = post_ptr; + if (nfa_regconcat() == FAIL) + return FAIL; + /* if concat is empty, skip a input char. But do emit a node */ +! if (old_post_ptr == post_ptr) + EMIT(NFA_SKIP_CHAR); + EMIT(NFA_CONCAT); + ch = peekchr(); + } + + /* Even if a branch is empty, emit one node for it */ +! if (old_post_ptr == post_ptr) + EMIT(NFA_SKIP_CHAR); + + return OK; +--- 1613,1630 ---- + skipchr(); + EMIT(NFA_NOPEN); + EMIT(NFA_PREV_ATOM_NO_WIDTH); +! old_post_pos = (int)(post_ptr - post_start); + if (nfa_regconcat() == FAIL) + return FAIL; + /* if concat is empty, skip a input char. But do emit a node */ +! if (old_post_pos == (int)(post_ptr - post_start)) + EMIT(NFA_SKIP_CHAR); + EMIT(NFA_CONCAT); + ch = peekchr(); + } + + /* Even if a branch is empty, emit one node for it */ +! if (old_post_pos == (int)(post_ptr - post_start)) + EMIT(NFA_SKIP_CHAR); + + return OK; +*** ../vim-7.3.1072/src/testdir/test64.in 2013-05-30 17:05:34.000000000 +0200 +--- src/testdir/test64.in 2013-05-30 18:38:49.000000000 +0200 +*************** +*** 348,353 **** +--- 348,356 ---- + :call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"]) + :call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"]) + :" ++ :"""" Requiring lots of states. ++ :call add(tl, [0, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"]) ++ :" + :" + :"""" Run the tests + :" +*************** +*** 361,367 **** + : continue + : endif + : let ®expengine = engine +! : let l = matchlist(text, pat) + :" check the match itself + : if len(l) == 0 && len(t) > matchidx + : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"' +--- 364,374 ---- + : continue + : endif + : let ®expengine = engine +! : try +! : let l = matchlist(text, pat) +! : catch +! : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"' +! : endtry + :" check the match itself + : if len(l) == 0 && len(t) > matchidx + : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"' +*** ../vim-7.3.1072/src/testdir/test64.ok 2013-05-30 17:05:34.000000000 +0200 +--- src/testdir/test64.ok 2013-05-30 18:42:43.000000000 +0200 +*************** +*** 740,745 **** +--- 740,747 ---- + OK 0 - \_[^a]\+ + OK 1 - \_[^a]\+ + OK 2 - \_[^a]\+ ++ OK 0 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12} ++ OK 1 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12} + 192.168.0.1 + 192.168.0.1 + 192.168.0.1 +*** ../vim-7.3.1072/src/testdir/test95.in 2013-05-26 15:14:49.000000000 +0200 +--- src/testdir/test95.in 2013-05-30 18:13:59.000000000 +0200 +*************** +*** 85,91 **** + : continue + : endif + : let ®expengine = engine +! : let l = matchlist(text, pat) + :" check the match itself + : if len(l) == 0 && len(t) > matchidx + : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"' +--- 85,95 ---- + : continue + : endif + : let ®expengine = engine +! : try +! : let l = matchlist(text, pat) +! : catch +! : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"' +! : endtry + :" check the match itself + : if len(l) == 0 && len(t) > matchidx + : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"' +*** ../vim-7.3.1072/src/version.c 2013-05-30 17:49:19.000000000 +0200 +--- src/version.c 2013-05-30 18:43:08.000000000 +0200 +*************** +*** 730,731 **** +--- 730,733 ---- + { /* Add new patch number below this line */ ++ /**/ ++ 1073, + /**/ + +-- +How To Keep A Healthy Level Of Insanity: +17. When the money comes out the ATM, scream "I won!, I won! 3rd + time this week!!!!!" + + /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ +/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ +\\\ an exciting new programming language -- http://www.Zimbu.org /// + \\\ help me help AIDS victims -- http://ICCF-Holland.org ///