diff --git a/7.3.1150 b/7.3.1150 new file mode 100644 index 0000000..210b083 --- /dev/null +++ b/7.3.1150 @@ -0,0 +1,409 @@ +To: vim_dev@googlegroups.com +Subject: Patch 7.3.1150 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.3.1150 +Problem: New regexpengine: Slow when a look-behind match does not have a + width specified. +Solution: Try to compute the maximum width. +Files: src/regexp_nfa.c + + +*** ../vim-7.3.1149/src/regexp_nfa.c 2013-06-08 18:19:39.000000000 +0200 +--- src/regexp_nfa.c 2013-06-08 22:29:25.000000000 +0200 +*************** +*** 38,56 **** + NFA_START_COLL, /* [abc] start */ + NFA_END_COLL, /* [abc] end */ + NFA_START_NEG_COLL, /* [^abc] start */ +! NFA_END_NEG_COLL, /* [^abc] end (only used in postfix) */ +! NFA_RANGE, /* range of the two previous items (only +! * used in postfix) */ + NFA_RANGE_MIN, /* low end of a range */ + NFA_RANGE_MAX, /* high end of a range */ + +! NFA_CONCAT, /* concatenate two previous items (only +! * used in postfix) */ +! NFA_OR, +! NFA_STAR, /* greedy * */ +! NFA_STAR_NONGREEDY, /* non-greedy * */ +! NFA_QUEST, /* greedy \? */ +! NFA_QUEST_NONGREEDY, /* non-greedy \? */ + + NFA_BOL, /* ^ Begin line */ + NFA_EOL, /* $ End line */ +--- 38,56 ---- + NFA_START_COLL, /* [abc] start */ + NFA_END_COLL, /* [abc] end */ + NFA_START_NEG_COLL, /* [^abc] start */ +! NFA_END_NEG_COLL, /* [^abc] end (postfix only) */ +! NFA_RANGE, /* range of the two previous items +! * (postfix only) */ + NFA_RANGE_MIN, /* low end of a range */ + NFA_RANGE_MAX, /* high end of a range */ + +! NFA_CONCAT, /* concatenate two previous items (postfix +! * only) */ +! NFA_OR, /* \| (postfix only) */ +! NFA_STAR, /* greedy * (posfix only) */ +! NFA_STAR_NONGREEDY, /* non-greedy * (postfix only) */ +! NFA_QUEST, /* greedy \? (postfix only) */ +! NFA_QUEST_NONGREEDY, /* non-greedy \? (postfix only) */ + + NFA_BOL, /* ^ Begin line */ + NFA_EOL, /* $ End line */ +*************** +*** 153,160 **** + + /* NFA_FIRST_NL */ + NFA_ANY, /* Match any one character. */ +- NFA_ANYOF, /* Match any character in this string. */ +- NFA_ANYBUT, /* Match any character not in this string. */ + NFA_IDENT, /* Match identifier char */ + NFA_SIDENT, /* Match identifier char but no digit */ + NFA_KWORD, /* Match keyword char */ +--- 153,158 ---- +*************** +*** 496,503 **** + + /* + * Figure out if the NFA state list contains just literal text and nothing +! * else. If so return a string with what must match after regstart. +! * Otherwise return NULL. + */ + static char_u * + nfa_get_match_text(start) +--- 494,501 ---- + + /* + * Figure out if the NFA state list contains just literal text and nothing +! * else. If so return a string in allocated memory with what must match after +! * regstart. Otherwise return NULL. + */ + static char_u * + nfa_get_match_text(start) +*************** +*** 2578,2583 **** +--- 2576,2800 ---- + } + + /* ++ * Estimate the maximum byte length of anything matching "state". ++ * When unknown or unlimited return -1. ++ */ ++ static int ++ nfa_max_width(startstate, depth) ++ nfa_state_T *startstate; ++ int depth; ++ { ++ int l, r; ++ nfa_state_T *state = startstate; ++ int len = 0; ++ ++ /* detect looping in a NFA_SPLIT */ ++ if (depth > 4) ++ return -1; ++ ++ for (;;) ++ { ++ switch (state->c) ++ { ++ case NFA_END_INVISIBLE: ++ case NFA_END_INVISIBLE_NEG: ++ /* the end, return what we have */ ++ return len; ++ ++ case NFA_SPLIT: ++ /* two alternatives, use the maximum */ ++ l = nfa_max_width(state->out, depth + 1); ++ r = nfa_max_width(state->out1, depth + 1); ++ if (l < 0 || r < 0) ++ return -1; ++ return len + (l > r ? l : r); ++ ++ case NFA_ANY: ++ case NFA_START_COLL: ++ case NFA_START_NEG_COLL: ++ /* matches some character, including composing chars */ ++ #ifdef FEAT_MBYTE ++ if (enc_utf8) ++ len += MB_MAXBYTES; ++ else if (has_mbyte) ++ len += 2; ++ else ++ #endif ++ ++len; ++ if (state->c != NFA_ANY) ++ { ++ /* skip over the characters */ ++ state = state->out1->out; ++ continue; ++ } ++ break; ++ ++ case NFA_DIGIT: ++ case NFA_WHITE: ++ case NFA_HEX: ++ case NFA_OCTAL: ++ /* ascii */ ++ ++len; ++ break; ++ ++ case NFA_IDENT: ++ case NFA_SIDENT: ++ case NFA_KWORD: ++ case NFA_SKWORD: ++ case NFA_FNAME: ++ case NFA_SFNAME: ++ case NFA_PRINT: ++ case NFA_SPRINT: ++ case NFA_NWHITE: ++ case NFA_NDIGIT: ++ case NFA_NHEX: ++ case NFA_NOCTAL: ++ case NFA_WORD: ++ case NFA_NWORD: ++ case NFA_HEAD: ++ case NFA_NHEAD: ++ case NFA_ALPHA: ++ case NFA_NALPHA: ++ case NFA_LOWER: ++ case NFA_NLOWER: ++ case NFA_UPPER: ++ case NFA_NUPPER: ++ /* possibly non-ascii */ ++ #ifdef FEAT_MBYTE ++ if (has_mbyte) ++ len += 3; ++ else ++ #endif ++ ++len; ++ break; ++ ++ case NFA_START_INVISIBLE: ++ case NFA_START_INVISIBLE_NEG: ++ case NFA_START_INVISIBLE_BEFORE: ++ case NFA_START_INVISIBLE_BEFORE_NEG: ++ /* zero-width, out1 points to the END state */ ++ state = state->out1->out; ++ continue; ++ ++ case NFA_BACKREF1: ++ case NFA_BACKREF2: ++ case NFA_BACKREF3: ++ case NFA_BACKREF4: ++ case NFA_BACKREF5: ++ case NFA_BACKREF6: ++ case NFA_BACKREF7: ++ case NFA_BACKREF8: ++ case NFA_BACKREF9: ++ #ifdef FEAT_SYN_HL ++ case NFA_ZREF1: ++ case NFA_ZREF2: ++ case NFA_ZREF3: ++ case NFA_ZREF4: ++ case NFA_ZREF5: ++ case NFA_ZREF6: ++ case NFA_ZREF7: ++ case NFA_ZREF8: ++ case NFA_ZREF9: ++ #endif ++ case NFA_NEWL: ++ case NFA_SKIP: ++ /* unknown width */ ++ return -1; ++ ++ case NFA_BOL: ++ case NFA_EOL: ++ case NFA_BOF: ++ case NFA_EOF: ++ case NFA_BOW: ++ case NFA_EOW: ++ case NFA_MOPEN: ++ case NFA_MOPEN1: ++ case NFA_MOPEN2: ++ case NFA_MOPEN3: ++ case NFA_MOPEN4: ++ case NFA_MOPEN5: ++ case NFA_MOPEN6: ++ case NFA_MOPEN7: ++ case NFA_MOPEN8: ++ case NFA_MOPEN9: ++ #ifdef FEAT_SYN_HL ++ case NFA_ZOPEN: ++ case NFA_ZOPEN1: ++ case NFA_ZOPEN2: ++ case NFA_ZOPEN3: ++ case NFA_ZOPEN4: ++ case NFA_ZOPEN5: ++ case NFA_ZOPEN6: ++ case NFA_ZOPEN7: ++ case NFA_ZOPEN8: ++ case NFA_ZOPEN9: ++ case NFA_ZCLOSE: ++ case NFA_ZCLOSE1: ++ case NFA_ZCLOSE2: ++ case NFA_ZCLOSE3: ++ case NFA_ZCLOSE4: ++ case NFA_ZCLOSE5: ++ case NFA_ZCLOSE6: ++ case NFA_ZCLOSE7: ++ case NFA_ZCLOSE8: ++ case NFA_ZCLOSE9: ++ #endif ++ case NFA_MCLOSE: ++ case NFA_MCLOSE1: ++ case NFA_MCLOSE2: ++ case NFA_MCLOSE3: ++ case NFA_MCLOSE4: ++ case NFA_MCLOSE5: ++ case NFA_MCLOSE6: ++ case NFA_MCLOSE7: ++ case NFA_MCLOSE8: ++ case NFA_MCLOSE9: ++ case NFA_NOPEN: ++ case NFA_NCLOSE: ++ ++ case NFA_LNUM_GT: ++ case NFA_LNUM_LT: ++ case NFA_COL_GT: ++ case NFA_COL_LT: ++ case NFA_VCOL_GT: ++ case NFA_VCOL_LT: ++ case NFA_MARK_GT: ++ case NFA_MARK_LT: ++ case NFA_VISUAL: ++ case NFA_LNUM: ++ case NFA_CURSOR: ++ case NFA_COL: ++ case NFA_VCOL: ++ case NFA_MARK: ++ ++ case NFA_ZSTART: ++ case NFA_ZEND: ++ case NFA_OPT_CHARS: ++ case NFA_SKIP_CHAR: ++ case NFA_START_PATTERN: ++ case NFA_END_PATTERN: ++ case NFA_COMPOSING: ++ case NFA_END_COMPOSING: ++ /* zero-width */ ++ break; ++ ++ default: ++ if (state->c < 0) ++ /* don't know what this is */ ++ return -1; ++ /* normal character */ ++ len += MB_CHAR2LEN(state->c); ++ break; ++ } ++ ++ /* normal way to continue */ ++ state = state->out; ++ } ++ ++ /* unrecognized */ ++ return -1; ++ } ++ /* + * Convert a postfix form into its equivalent NFA. + * Return the NFA start state on success, NULL otherwise. + */ +*************** +*** 2856,2863 **** + s = alloc_state(start_state, e.start, s1); + if (s == NULL) + goto theend; +- if (before) +- s->val = n; /* store the count */ + if (pattern) + { + /* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */ +--- 3073,3078 ---- +*************** +*** 2871,2876 **** +--- 3086,3099 ---- + { + patch(e.out, s1); + PUSH(frag(s, list1(&s1->out))); ++ if (before) ++ { ++ if (n <= 0) ++ /* See if we can guess the maximum width, it avoids a ++ * lot of pointless tries. */ ++ n = nfa_max_width(e.start, 0); ++ s->val = n; /* store the count */ ++ } + } + break; + } +*************** +*** 4088,4096 **** + + /* Go back the specified number of bytes, or as far as the + * start of the previous line, to try matching "\@<=" or +! * not matching "\@val <= 0) + { + if (REG_MULTI) +--- 4311,4318 ---- + + /* Go back the specified number of bytes, or as far as the + * start of the previous line, to try matching "\@<=" or +! * not matching "\@val <= 0) + { + if (REG_MULTI) +*************** +*** 4386,4392 **** + + /* + * Check for a match with match_text. +! * Called after skip_to_start() has find regstart. + * Returns zero for no match, 1 for a match. + */ + static long +--- 4608,4614 ---- + + /* + * Check for a match with match_text. +! * Called after skip_to_start() has found regstart. + * Returns zero for no match, 1 for a match. + */ + static long +*************** +*** 4736,4742 **** + #ifdef FEAT_SYN_HL + || (cout >= NFA_ZCLOSE && cout <= NFA_ZCLOSE9) + #endif +- || cout == NFA_NCLOSE + || t->pim != NULL + || (t->state->c != NFA_START_INVISIBLE_BEFORE + && t->state->c != NFA_START_INVISIBLE_BEFORE_NEG +--- 4958,4963 ---- +*** ../vim-7.3.1149/src/version.c 2013-06-08 18:19:40.000000000 +0200 +--- src/version.c 2013-06-08 22:15:27.000000000 +0200 +*************** +*** 730,731 **** +--- 730,733 ---- + { /* Add new patch number below this line */ ++ /**/ ++ 1150, + /**/ + +-- +Amnesia is one of my favorite words, but I forgot what it means. + + /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ +/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ +\\\ an exciting new programming language -- http://www.Zimbu.org /// + \\\ help me help AIDS victims -- http://ICCF-Holland.org ///