diff --git a/7.3.1037 b/7.3.1037 new file mode 100644 index 0000000..994134d --- /dev/null +++ b/7.3.1037 @@ -0,0 +1,408 @@ +To: vim_dev@googlegroups.com +Subject: Patch 7.3.1037 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.3.1037 +Problem: Look-behind matching is very slow on long lines. +Solution: Add a byte limit to how far back an attempt is made. +Files: src/regexp.c, src/regexp_nfa.c, src/testdir/test64.in, + src/testdir/test64.ok + + +*** ../vim-7.3.1036/src/regexp.c 2013-05-21 21:37:01.000000000 +0200 +--- src/regexp.c 2013-05-29 14:34:51.000000000 +0200 +*************** +*** 701,706 **** +--- 701,707 ---- + # define CASEMBC(x) + #endif + static void reginsert __ARGS((int, char_u *)); ++ static void reginsert_nr __ARGS((int op, long val, char_u *opnd)); + static void reginsert_limits __ARGS((int, long, long, char_u *)); + static char_u *re_put_long __ARGS((char_u *pr, long_u val)); + static int read_limits __ARGS((long *, long *)); +*************** +*** 1781,1787 **** +--- 1782,1790 ---- + case Magic('@'): + { + int lop = END; ++ int nr; + ++ nr = getdecchrs(); + switch (no_Magic(getchr())) + { + case '=': lop = MATCH; break; /* \@= */ +*************** +*** 1803,1809 **** + *flagp |= HASLOOKBH; + } + regtail(ret, regnode(END)); /* operand ends */ +! reginsert(lop, ret); + break; + } + +--- 1806,1819 ---- + *flagp |= HASLOOKBH; + } + regtail(ret, regnode(END)); /* operand ends */ +! if (lop == BEHIND || lop == NOBEHIND) +! { +! if (nr < 0) +! nr = 0; /* no limit is same as zero limit */ +! reginsert_nr(lop, nr, ret); +! } +! else +! reginsert(lop, ret); + break; + } + +*************** +*** 2780,2785 **** +--- 2790,2827 ---- + + /* + * Insert an operator in front of already-emitted operand. ++ * Add a number to the operator. ++ */ ++ static void ++ reginsert_nr(op, val, opnd) ++ int op; ++ long val; ++ char_u *opnd; ++ { ++ char_u *src; ++ char_u *dst; ++ char_u *place; ++ ++ if (regcode == JUST_CALC_SIZE) ++ { ++ regsize += 7; ++ return; ++ } ++ src = regcode; ++ regcode += 7; ++ dst = regcode; ++ while (src > opnd) ++ *--dst = *--src; ++ ++ place = opnd; /* Op node, where operand used to be. */ ++ *place++ = op; ++ *place++ = NUL; ++ *place++ = NUL; ++ place = re_put_long(place, (long_u)val); ++ } ++ ++ /* ++ * Insert an operator in front of already-emitted operand. + * The operator has the given limit values as operands. Also set next pointer. + * + * Means relocating the operand. +*************** +*** 3182,3188 **** + } + + /* +! * get and return the value of the decimal string immediately after the + * current position. Return -1 for invalid. Consumes all digits. + */ + static int +--- 3224,3230 ---- + } + + /* +! * Get and return the value of the decimal string immediately after the + * current position. Return -1 for invalid. Consumes all digits. + */ + static int +*************** +*** 3200,3205 **** +--- 3242,3248 ---- + nr *= 10; + nr += c - '0'; + ++regparse; ++ curchr = -1; /* no longer valid */ + } + + if (i == 0) +*************** +*** 5432,5438 **** + /* save the position after the found match for next */ + reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos); + +! /* start looking for a match with operand at the current + * position. Go back one character until we find the + * result, hitting the start of the line or the previous + * line (for multi-line matching). +--- 5475,5481 ---- + /* save the position after the found match for next */ + reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos); + +! /* Start looking for a match with operand at the current + * position. Go back one character until we find the + * result, hitting the start of the line or the previous + * line (for multi-line matching). +*************** +*** 5444,5450 **** + rp->rs_state = RS_BEHIND2; + + reg_restore(&rp->rs_un.regsave, &backpos); +! scan = OPERAND(rp->rs_scan); + } + break; + +--- 5487,5493 ---- + rp->rs_state = RS_BEHIND2; + + reg_restore(&rp->rs_un.regsave, &backpos); +! scan = OPERAND(rp->rs_scan) + 4; + } + break; + +*************** +*** 5472,5480 **** +--- 5515,5526 ---- + } + else + { ++ long limit; ++ + /* No match or a match that doesn't end where we want it: Go + * back one character. May go to previous line once. */ + no = OK; ++ limit = OPERAND_MIN(rp->rs_scan); + if (REG_MULTI) + { + if (rp->rs_un.regsave.rs_u.pos.col == 0) +*************** +*** 5493,5519 **** + } + } + else + #ifdef FEAT_MBYTE +! if (has_mbyte) +! rp->rs_un.regsave.rs_u.pos.col -= +! (*mb_head_off)(regline, regline + + rp->rs_un.regsave.rs_u.pos.col - 1) + 1; +! else + #endif +! --rp->rs_un.regsave.rs_u.pos.col; + } + else + { + if (rp->rs_un.regsave.rs_u.ptr == regline) + no = FAIL; + else +! --rp->rs_un.regsave.rs_u.ptr; + } + if (no == OK) + { + /* Advanced, prepare for finding match again. */ + reg_restore(&rp->rs_un.regsave, &backpos); +! scan = OPERAND(rp->rs_scan); + if (status == RA_MATCH) + { + /* We did match, so subexpr may have been changed, +--- 5539,5579 ---- + } + } + else ++ { + #ifdef FEAT_MBYTE +! if (has_mbyte) +! rp->rs_un.regsave.rs_u.pos.col -= +! (*mb_head_off)(regline, regline + + rp->rs_un.regsave.rs_u.pos.col - 1) + 1; +! else + #endif +! --rp->rs_un.regsave.rs_u.pos.col; +! if (limit > 0 +! && ((rp->rs_un.regsave.rs_u.pos.lnum +! < behind_pos.rs_u.pos.lnum +! ? (colnr_T)STRLEN(regline) +! : behind_pos.rs_u.pos.col) +! - rp->rs_un.regsave.rs_u.pos.col > limit)) +! no = FAIL; +! } + } + else + { + if (rp->rs_un.regsave.rs_u.ptr == regline) + no = FAIL; + else +! { +! mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr); +! if (limit > 0 && (long)(behind_pos.rs_u.ptr +! - rp->rs_un.regsave.rs_u.ptr) > limit) +! no = FAIL; +! } + } + if (no == OK) + { + /* Advanced, prepare for finding match again. */ + reg_restore(&rp->rs_un.regsave, &backpos); +! scan = OPERAND(rp->rs_scan) + 4; + if (status == RA_MATCH) + { + /* We did match, so subexpr may have been changed, +*************** +*** 7773,7779 **** + #ifdef DEBUG + static char_u regname[][30] = { + "AUTOMATIC Regexp Engine", +! "BACKTACKING Regexp Engine", + "NFA Regexp Engine" + }; + #endif +--- 7833,7839 ---- + #ifdef DEBUG + static char_u regname[][30] = { + "AUTOMATIC Regexp Engine", +! "BACKTRACKING Regexp Engine", + "NFA Regexp Engine" + }; + #endif +*** ../vim-7.3.1036/src/regexp_nfa.c 2013-05-28 22:52:11.000000000 +0200 +--- src/regexp_nfa.c 2013-05-29 16:31:13.000000000 +0200 +*************** +*** 1331,1336 **** +--- 1331,1346 ---- + case '=': + EMIT(NFA_PREV_ATOM_NO_WIDTH); + break; ++ case '0': ++ case '1': ++ case '2': ++ case '3': ++ case '4': ++ case '5': ++ case '6': ++ case '7': ++ case '8': ++ case '9': + case '!': + case '<': + case '>': +*************** +*** 3817,3823 **** + * because recursive calls should only start in the first position. + * Also don't start a match past the first line. */ + if (nfa_match == FALSE && start->c == NFA_MOPEN + 0 +! && reglnum == 0 && clen != 0) + { + #ifdef ENABLE_LOG + fprintf(log_fd, "(---) STARTSTATE\n"); +--- 3827,3835 ---- + * because recursive calls should only start in the first position. + * Also don't start a match past the first line. */ + if (nfa_match == FALSE && start->c == NFA_MOPEN + 0 +! && reglnum == 0 && clen != 0 +! && (ireg_maxcol == 0 +! || (colnr_T)(reginput - regline) < ireg_maxcol)) + { + #ifdef ENABLE_LOG + fprintf(log_fd, "(---) STARTSTATE\n"); +*** ../vim-7.3.1036/src/testdir/test64.in 2013-05-28 22:03:13.000000000 +0200 +--- src/testdir/test64.in 2013-05-29 14:56:44.000000000 +0200 +*************** +*** 336,341 **** +--- 336,349 ---- + :"call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo']) + :call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i']) + :" ++ :"""" Look-behind with limit ++ :call add(tl, [0, '<\@<=span.', 'xxspanxxTa 5 + Ac 7 + ghi ++ ++ xxxstart3 +*** ../vim-7.3.1036/src/version.c 2013-05-28 22:52:11.000000000 +0200 +--- src/version.c 2013-05-29 13:20:48.000000000 +0200 +*************** +*** 730,731 **** +--- 730,733 ---- + { /* Add new patch number below this line */ ++ /**/ ++ 1037, + /**/ + +-- +hundred-and-one symptoms of being an internet addict: +11. You find yourself typing "com" after every period when using a word + processor.com + + /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ +/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ +\\\ an exciting new programming language -- http://www.Zimbu.org /// + \\\ help me help AIDS victims -- http://ICCF-Holland.org ///