To: vim_dev@googlegroups.com Subject: Patch 7.3.1094 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.3.1094 Problem: New regexp engine: Attempts to match "^" at every character. Solution: Only try "^" at the start of a line. Files: rc/regexp_nfa.c *** ../vim-7.3.1093/src/regexp_nfa.c 2013-06-02 16:07:05.000000000 +0200 --- src/regexp_nfa.c 2013-06-02 16:24:04.000000000 +0200 *************** *** 249,254 **** --- 249,256 ---- * executing. */ static int istate; /* Index in the state vector, used in new_state() */ + /* If not NULL match must end at this position */ + static save_se_T *nfa_endp = NULL; static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags)); static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); *************** *** 3080,3085 **** --- 3082,3099 ---- state->lastlist = l->id; break; + case NFA_BOL: + case NFA_BOF: + /* "^" won't match past end-of-line, don't bother trying. + * Except when we are going to the next line for a look-behind + * match. */ + if (reginput > regline + && (nfa_endp == NULL + || !REG_MULTI + || reglnum == nfa_endp->se_u.pos.lnum)) + goto skip_add; + /* FALLTHROUGH */ + default: if (state->lastlist == l->id) { *************** *** 3659,3682 **** return val == pos; } ! static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m, save_se_T *endp)); /* * Main matching routine. * * Run NFA to determine whether it matches reginput. * ! * When "endp" is not NULL it is a required end-of-match position. * * Return TRUE if there is a match, FALSE otherwise. * Note: Caller must ensure that: start != NULL. */ static int ! nfa_regmatch(start, submatch, m, endp) nfa_state_T *start; regsubs_T *submatch; regsubs_T *m; - save_se_T *endp; { int result; int size = 0; --- 3673,3695 ---- return val == pos; } ! static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m)); /* * Main matching routine. * * Run NFA to determine whether it matches reginput. * ! * When "nfa_endp" is not NULL it is a required end-of-match position. * * Return TRUE if there is a match, FALSE otherwise. * Note: Caller must ensure that: start != NULL. */ static int ! nfa_regmatch(start, submatch, m) nfa_state_T *start; regsubs_T *submatch; regsubs_T *m; { int result; int size = 0; *************** *** 3888,3913 **** else { #ifdef ENABLE_LOG ! if (endp != NULL) { if (REG_MULTI) fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n", (int)reglnum, ! (int)endp->se_u.pos.lnum, (int)(reginput - regline), ! endp->se_u.pos.col); else fprintf(log_fd, "Current col: %d, endp col: %d\n", (int)(reginput - regline), ! (int)(endp->se_u.ptr - reginput)); } #endif ! /* It's only a match if it ends at "endp" */ ! if (endp != NULL && (REG_MULTI ! ? (reglnum != endp->se_u.pos.lnum || (int)(reginput - regline) ! != endp->se_u.pos.col) ! : reginput != endp->se_u.ptr)) break; /* do not set submatches for \@! */ --- 3901,3926 ---- else { #ifdef ENABLE_LOG ! if (nfa_endp != NULL) { if (REG_MULTI) fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n", (int)reglnum, ! (int)nfa_endp->se_u.pos.lnum, (int)(reginput - regline), ! nfa_endp->se_u.pos.col); else fprintf(log_fd, "Current col: %d, endp col: %d\n", (int)(reginput - regline), ! (int)(nfa_endp->se_u.ptr - reginput)); } #endif ! /* It's only a match if it ends at "nfa_endp" */ ! if (nfa_endp != NULL && (REG_MULTI ! ? (reglnum != nfa_endp->se_u.pos.lnum || (int)(reginput - regline) ! != nfa_endp->se_u.pos.col) ! : reginput != nfa_endp->se_u.ptr)) break; /* do not set submatches for \@! */ *************** *** 3929,3934 **** --- 3942,3948 ---- char_u *save_regline = regline; int save_reglnum = reglnum; int save_nfa_match = nfa_match; + save_se_T *save_nfa_endp = nfa_endp; save_se_T endpos; save_se_T *endposp = NULL; *************** *** 4012,4018 **** * recursion. */ nfa_save_listids(start, listids); nfa_set_null_listids(start); ! result = nfa_regmatch(t->state->out, submatch, m, endposp); nfa_set_neg_listids(start); nfa_restore_listids(start, listids); --- 4026,4033 ---- * recursion. */ nfa_save_listids(start, listids); nfa_set_null_listids(start); ! nfa_endp = endposp; ! result = nfa_regmatch(t->state->out, submatch, m); nfa_set_neg_listids(start); nfa_restore_listids(start, listids); *************** *** 4021,4026 **** --- 4036,4042 ---- regline = save_regline; reglnum = save_reglnum; nfa_match = save_nfa_match; + nfa_endp = save_nfa_endp; #ifdef ENABLE_LOG log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); *************** *** 4563,4569 **** * matters! * Do not add the start state in recursive calls of nfa_regmatch(), * because recursive calls should only start in the first position. ! * Unless "endp" is not NULL, then we match the end position. * Also don't start a match past the first line. */ if (nfa_match == FALSE && ((start->c == NFA_MOPEN --- 4579,4585 ---- * matters! * Do not add the start state in recursive calls of nfa_regmatch(), * because recursive calls should only start in the first position. ! * Unless "nfa_endp" is not NULL, then we match the end position. * Also don't start a match past the first line. */ if (nfa_match == FALSE && ((start->c == NFA_MOPEN *************** *** 4571,4583 **** && clen != 0 && (ireg_maxcol == 0 || (colnr_T)(reginput - regline) < ireg_maxcol)) ! || (endp != NULL && (REG_MULTI ! ? (reglnum < endp->se_u.pos.lnum ! || (reglnum == endp->se_u.pos.lnum && (int)(reginput - regline) ! < endp->se_u.pos.col)) ! : reginput < endp->se_u.ptr)))) { #ifdef ENABLE_LOG fprintf(log_fd, "(---) STARTSTATE\n"); --- 4587,4599 ---- && clen != 0 && (ireg_maxcol == 0 || (colnr_T)(reginput - regline) < ireg_maxcol)) ! || (nfa_endp != NULL && (REG_MULTI ! ? (reglnum < nfa_endp->se_u.pos.lnum ! || (reglnum == nfa_endp->se_u.pos.lnum && (int)(reginput - regline) ! < nfa_endp->se_u.pos.col)) ! : reginput < nfa_endp->se_u.ptr)))) { #ifdef ENABLE_LOG fprintf(log_fd, "(---) STARTSTATE\n"); *************** *** 4601,4608 **** * finish. */ if (clen != 0) reginput += clen; ! else if (go_to_nextline || (endp != NULL && REG_MULTI ! && reglnum < endp->se_u.pos.lnum)) reg_nextline(); else break; --- 4617,4624 ---- * finish. */ if (clen != 0) reginput += clen; ! else if (go_to_nextline || (nfa_endp != NULL && REG_MULTI ! && reglnum < nfa_endp->se_u.pos.lnum)) reg_nextline(); else break; *************** *** 4678,4684 **** clear_sub(&m.synt); #endif ! if (nfa_regmatch(start, &subs, &m, NULL) == FALSE) return 0; cleanup_subexpr(); --- 4694,4700 ---- clear_sub(&m.synt); #endif ! if (nfa_regmatch(start, &subs, &m) == FALSE) return 0; cleanup_subexpr(); *** ../vim-7.3.1093/src/version.c 2013-06-02 16:07:05.000000000 +0200 --- src/version.c 2013-06-02 16:25:02.000000000 +0200 *************** *** 730,731 **** --- 730,733 ---- { /* Add new patch number below this line */ + /**/ + 1094, /**/ -- The future isn't what it used to be. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///