diff --git a/7.3.1133 b/7.3.1133 new file mode 100644 index 0000000..5882b75 --- /dev/null +++ b/7.3.1133 @@ -0,0 +1,358 @@ +To: vim_dev@googlegroups.com +Subject: Patch 7.3.1133 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.3.1133 +Problem: New regexp engine is a bit slow. +Solution: Skip ahead to a character that must match. Don't try matching a + "^" patter past the start of line. +Files: src/regexp_nfa.c, src/regexp.h + + +*** ../vim-7.3.1132/src/regexp_nfa.c 2013-06-06 18:04:47.000000000 +0200 +--- src/regexp_nfa.c 2013-06-06 18:37:23.000000000 +0200 +*************** +*** 257,262 **** +--- 257,264 ---- + static int nfa_ll_index = 0; + + static int nfa_regcomp_start __ARGS((char_u *expr, int re_flags)); ++ static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth)); ++ static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth)); + static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); + static int nfa_emit_equi_class __ARGS((int c, int neg)); + static int nfa_regatom __ARGS((void)); +*************** +*** 331,336 **** +--- 333,487 ---- + } + + /* ++ * Figure out if the NFA state list starts with an anchor, must match at start ++ * of the line. ++ */ ++ static int ++ nfa_get_reganch(start, depth) ++ nfa_state_T *start; ++ int depth; ++ { ++ nfa_state_T *p = start; ++ ++ if (depth > 4) ++ return 0; ++ ++ while (p != NULL) ++ { ++ switch (p->c) ++ { ++ case NFA_BOL: ++ case NFA_BOF: ++ return 1; /* yes! */ ++ ++ case NFA_ZSTART: ++ case NFA_ZEND: ++ case NFA_CURSOR: ++ case NFA_VISUAL: ++ ++ case NFA_MOPEN: ++ case NFA_MOPEN1: ++ case NFA_MOPEN2: ++ case NFA_MOPEN3: ++ case NFA_MOPEN4: ++ case NFA_MOPEN5: ++ case NFA_MOPEN6: ++ case NFA_MOPEN7: ++ case NFA_MOPEN8: ++ case NFA_MOPEN9: ++ case NFA_NOPEN: ++ #ifdef FEAT_SYN_HL ++ case NFA_ZOPEN: ++ case NFA_ZOPEN1: ++ case NFA_ZOPEN2: ++ case NFA_ZOPEN3: ++ case NFA_ZOPEN4: ++ case NFA_ZOPEN5: ++ case NFA_ZOPEN6: ++ case NFA_ZOPEN7: ++ case NFA_ZOPEN8: ++ case NFA_ZOPEN9: ++ #endif ++ p = p->out; ++ break; ++ ++ case NFA_SPLIT: ++ return nfa_get_reganch(p->out, depth + 1) ++ && nfa_get_reganch(p->out1, depth + 1); ++ ++ default: ++ return 0; /* noooo */ ++ } ++ } ++ return 0; ++ } ++ ++ /* ++ * Figure out if the NFA state list starts with a character which must match ++ * at start of the match. ++ */ ++ static int ++ nfa_get_regstart(start, depth) ++ nfa_state_T *start; ++ int depth; ++ { ++ nfa_state_T *p = start; ++ ++ if (depth > 4) ++ return 0; ++ ++ while (p != NULL) ++ { ++ switch (p->c) ++ { ++ /* all kinds of zero-width matches */ ++ case NFA_BOL: ++ case NFA_BOF: ++ case NFA_BOW: ++ case NFA_EOW: ++ case NFA_ZSTART: ++ case NFA_ZEND: ++ case NFA_CURSOR: ++ case NFA_VISUAL: ++ case NFA_LNUM: ++ case NFA_LNUM_GT: ++ case NFA_LNUM_LT: ++ case NFA_COL: ++ case NFA_COL_GT: ++ case NFA_COL_LT: ++ case NFA_VCOL: ++ case NFA_VCOL_GT: ++ case NFA_VCOL_LT: ++ case NFA_MARK: ++ case NFA_MARK_GT: ++ case NFA_MARK_LT: ++ ++ case NFA_MOPEN: ++ case NFA_MOPEN1: ++ case NFA_MOPEN2: ++ case NFA_MOPEN3: ++ case NFA_MOPEN4: ++ case NFA_MOPEN5: ++ case NFA_MOPEN6: ++ case NFA_MOPEN7: ++ case NFA_MOPEN8: ++ case NFA_MOPEN9: ++ case NFA_NOPEN: ++ #ifdef FEAT_SYN_HL ++ case NFA_ZOPEN: ++ case NFA_ZOPEN1: ++ case NFA_ZOPEN2: ++ case NFA_ZOPEN3: ++ case NFA_ZOPEN4: ++ case NFA_ZOPEN5: ++ case NFA_ZOPEN6: ++ case NFA_ZOPEN7: ++ case NFA_ZOPEN8: ++ case NFA_ZOPEN9: ++ #endif ++ p = p->out; ++ break; ++ ++ case NFA_SPLIT: ++ { ++ int c1 = nfa_get_regstart(p->out, depth + 1); ++ int c2 = nfa_get_regstart(p->out1, depth + 1); ++ ++ if (c1 == c2) ++ return c1; /* yes! */ ++ return 0; ++ } ++ ++ default: ++ if (p->c > 0 && !p->negated) ++ return p->c; /* yes! */ ++ return 0; ++ } ++ } ++ return 0; ++ } ++ ++ /* + * Allocate more space for post_start. Called when + * running above the estimated number of states. + */ +*************** +*** 2121,2126 **** +--- 2272,2281 ---- + if (debugf != NULL) + { + nfa_print_state(debugf, prog->start); ++ ++ fprintf(debugf, "reganch: %d\n", prog->reganch); ++ fprintf(debugf, "regstart: %d\n", prog->regstart); ++ + fclose(debugf); + } + } +*************** +*** 4248,4253 **** +--- 4403,4412 ---- + t = &neglist->t[0]; + neglist->n--; + listidx--; ++ #ifdef ENABLE_LOG ++ fprintf(log_fd, " using neglist entry, %d remaining\n", ++ neglist->n); ++ #endif + } + else + t = &thislist->t[listidx]; +*************** +*** 4688,4694 **** + + case NFA_END_NEG_RANGE: + /* This follows a series of negated nodes, like: +! * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */ + if (curc > 0) + { + ll = nextlist; +--- 4847,4853 ---- + + case NFA_END_NEG_RANGE: + /* This follows a series of negated nodes, like: +! * NOT CHAR(x), NOT CHAR(y), etc. */ + if (curc > 0) + { + ll = nextlist; +*************** +*** 5304,5316 **** + * Returns 0 for failure, number of lines contained in the match otherwise. + */ + static long +! nfa_regexec_both(line, col) + char_u *line; +! colnr_T col; /* column to start looking for match */ + { + nfa_regprog_T *prog; + long retval = 0L; + int i; + + if (REG_MULTI) + { +--- 5463,5476 ---- + * Returns 0 for failure, number of lines contained in the match otherwise. + */ + static long +! nfa_regexec_both(line, startcol) + char_u *line; +! colnr_T startcol; /* column to start looking for match */ + { + nfa_regprog_T *prog; + long retval = 0L; + int i; ++ colnr_T col = startcol; + + if (REG_MULTI) + { +*************** +*** 5333,5342 **** + goto theend; + } + +- /* If the start column is past the maximum column: no need to try. */ +- if (ireg_maxcol > 0 && col >= ireg_maxcol) +- goto theend; +- + /* If pattern contains "\c" or "\C": overrule value of ireg_ic */ + if (prog->regflags & RF_ICASE) + ireg_ic = TRUE; +--- 5493,5498 ---- +*************** +*** 5361,5366 **** +--- 5517,5548 ---- + nfa_regengine.expr = prog->pattern; + #endif + ++ if (prog->reganch && col > 0) ++ return 0L; ++ ++ if (prog->regstart != NUL) ++ { ++ char_u *s; ++ ++ /* Skip until the char we know it must start with. ++ * Used often, do some work to avoid call overhead. */ ++ if (!ireg_ic ++ #ifdef FEAT_MBYTE ++ && !has_mbyte ++ #endif ++ ) ++ s = vim_strbyte(regline + col, prog->regstart); ++ else ++ s = cstrchr(regline + col, prog->regstart); ++ if (s == NULL) ++ return 0L; ++ col = (int)(s - regline); ++ } ++ ++ /* If the start column is past the maximum column: no need to try. */ ++ if (ireg_maxcol > 0 && col >= ireg_maxcol) ++ goto theend; ++ + nstate = prog->nstate; + for (i = 0; i < nstate; ++i) + { +*************** +*** 5459,5464 **** +--- 5641,5650 ---- + prog->has_zend = nfa_has_zend; + prog->has_backref = nfa_has_backref; + prog->nsubexp = regnpar; ++ ++ prog->reganch = nfa_get_reganch(prog->start, 0); ++ prog->regstart = nfa_get_regstart(prog->start, 0); ++ + #ifdef ENABLE_LOG + nfa_postfix_dump(expr, OK); + nfa_dump(prog); +*** ../vim-7.3.1132/src/regexp.h 2013-06-03 12:17:00.000000000 +0200 +--- src/regexp.h 2013-06-06 17:19:23.000000000 +0200 +*************** +*** 87,92 **** +--- 87,96 ---- + unsigned regflags; + + nfa_state_T *start; /* points into state[] */ ++ ++ int reganch; /* pattern starts with ^ */ ++ int regstart; /* char at start of pattern */ ++ + int has_zend; /* pattern contains \ze */ + int has_backref; /* pattern contains \1 .. \9 */ + #ifdef FEAT_SYN_HL +*** ../vim-7.3.1132/src/version.c 2013-06-06 18:04:47.000000000 +0200 +--- src/version.c 2013-06-06 18:43:55.000000000 +0200 +*************** +*** 730,731 **** +--- 730,733 ---- + { /* Add new patch number below this line */ ++ /**/ ++ 1133, + /**/ + +-- +From "know your smileys": + % Bike accident. A bit far-fetched, I suppose; although... + o _ _ _ + _o /\_ _ \\o (_)\__/o (_) + _< \_ _>(_) (_)/<_ \_| \ _|/' \/ + (_)>(_) (_) (_) (_) (_)' _\o_ + + /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ +/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ +\\\ an exciting new programming language -- http://www.Zimbu.org /// + \\\ help me help AIDS victims -- http://ICCF-Holland.org ///