To: vim_dev@googlegroups.com
Subject: Patch 7.3.1094
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------
Patch 7.3.1094
Problem: New regexp engine: Attempts to match "^" at every character.
Solution: Only try "^" at the start of a line.
Files: rc/regexp_nfa.c
*** ../vim-7.3.1093/src/regexp_nfa.c 2013-06-02 16:07:05.000000000 +0200
--- src/regexp_nfa.c 2013-06-02 16:24:04.000000000 +0200
***************
*** 249,254 ****
--- 249,256 ----
* executing. */
static int istate; /* Index in the state vector, used in new_state() */
+ /* If not NULL match must end at this position */
+ static save_se_T *nfa_endp = NULL;
static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags));
static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
***************
*** 3080,3085 ****
--- 3082,3099 ----
state->lastlist = l->id;
break;
+ case NFA_BOL:
+ case NFA_BOF:
+ /* "^" won't match past end-of-line, don't bother trying.
+ * Except when we are going to the next line for a look-behind
+ * match. */
+ if (reginput > regline
+ && (nfa_endp == NULL
+ || !REG_MULTI
+ || reglnum == nfa_endp->se_u.pos.lnum))
+ goto skip_add;
+ /* FALLTHROUGH */
+
default:
if (state->lastlist == l->id)
{
***************
*** 3659,3682 ****
return val == pos;
}
! static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m, save_se_T *endp));
/*
* Main matching routine.
*
* Run NFA to determine whether it matches reginput.
*
! * When "endp" is not NULL it is a required end-of-match position.
*
* Return TRUE if there is a match, FALSE otherwise.
* Note: Caller must ensure that: start != NULL.
*/
static int
! nfa_regmatch(start, submatch, m, endp)
nfa_state_T *start;
regsubs_T *submatch;
regsubs_T *m;
- save_se_T *endp;
{
int result;
int size = 0;
--- 3673,3695 ----
return val == pos;
}
! static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m));
/*
* Main matching routine.
*
* Run NFA to determine whether it matches reginput.
*
! * When "nfa_endp" is not NULL it is a required end-of-match position.
*
* Return TRUE if there is a match, FALSE otherwise.
* Note: Caller must ensure that: start != NULL.
*/
static int
! nfa_regmatch(start, submatch, m)
nfa_state_T *start;
regsubs_T *submatch;
regsubs_T *m;
{
int result;
int size = 0;
***************
*** 3888,3913 ****
else
{
#ifdef ENABLE_LOG
! if (endp != NULL)
{
if (REG_MULTI)
fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n",
(int)reglnum,
! (int)endp->se_u.pos.lnum,
(int)(reginput - regline),
! endp->se_u.pos.col);
else
fprintf(log_fd, "Current col: %d, endp col: %d\n",
(int)(reginput - regline),
! (int)(endp->se_u.ptr - reginput));
}
#endif
! /* It's only a match if it ends at "endp" */
! if (endp != NULL && (REG_MULTI
! ? (reglnum != endp->se_u.pos.lnum
|| (int)(reginput - regline)
! != endp->se_u.pos.col)
! : reginput != endp->se_u.ptr))
break;
/* do not set submatches for \@! */
--- 3901,3926 ----
else
{
#ifdef ENABLE_LOG
! if (nfa_endp != NULL)
{
if (REG_MULTI)
fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n",
(int)reglnum,
! (int)nfa_endp->se_u.pos.lnum,
(int)(reginput - regline),
! nfa_endp->se_u.pos.col);
else
fprintf(log_fd, "Current col: %d, endp col: %d\n",
(int)(reginput - regline),
! (int)(nfa_endp->se_u.ptr - reginput));
}
#endif
! /* It's only a match if it ends at "nfa_endp" */
! if (nfa_endp != NULL && (REG_MULTI
! ? (reglnum != nfa_endp->se_u.pos.lnum
|| (int)(reginput - regline)
! != nfa_endp->se_u.pos.col)
! : reginput != nfa_endp->se_u.ptr))
break;
/* do not set submatches for \@! */
***************
*** 3929,3934 ****
--- 3942,3948 ----
char_u *save_regline = regline;
int save_reglnum = reglnum;
int save_nfa_match = nfa_match;
+ save_se_T *save_nfa_endp = nfa_endp;
save_se_T endpos;
save_se_T *endposp = NULL;
***************
*** 4012,4018 ****
* recursion. */
nfa_save_listids(start, listids);
nfa_set_null_listids(start);
! result = nfa_regmatch(t->state->out, submatch, m, endposp);
nfa_set_neg_listids(start);
nfa_restore_listids(start, listids);
--- 4026,4033 ----
* recursion. */
nfa_save_listids(start, listids);
nfa_set_null_listids(start);
! nfa_endp = endposp;
! result = nfa_regmatch(t->state->out, submatch, m);
nfa_set_neg_listids(start);
nfa_restore_listids(start, listids);
***************
*** 4021,4026 ****
--- 4036,4042 ----
regline = save_regline;
reglnum = save_reglnum;
nfa_match = save_nfa_match;
+ nfa_endp = save_nfa_endp;
#ifdef ENABLE_LOG
log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
***************
*** 4563,4569 ****
* matters!
* Do not add the start state in recursive calls of nfa_regmatch(),
* because recursive calls should only start in the first position.
! * Unless "endp" is not NULL, then we match the end position.
* Also don't start a match past the first line. */
if (nfa_match == FALSE
&& ((start->c == NFA_MOPEN
--- 4579,4585 ----
* matters!
* Do not add the start state in recursive calls of nfa_regmatch(),
* because recursive calls should only start in the first position.
! * Unless "nfa_endp" is not NULL, then we match the end position.
* Also don't start a match past the first line. */
if (nfa_match == FALSE
&& ((start->c == NFA_MOPEN
***************
*** 4571,4583 ****
&& clen != 0
&& (ireg_maxcol == 0
|| (colnr_T)(reginput - regline) < ireg_maxcol))
! || (endp != NULL
&& (REG_MULTI
! ? (reglnum < endp->se_u.pos.lnum
! || (reglnum == endp->se_u.pos.lnum
&& (int)(reginput - regline)
! < endp->se_u.pos.col))
! : reginput < endp->se_u.ptr))))
{
#ifdef ENABLE_LOG
fprintf(log_fd, "(---) STARTSTATE\n");
--- 4587,4599 ----
&& clen != 0
&& (ireg_maxcol == 0
|| (colnr_T)(reginput - regline) < ireg_maxcol))
! || (nfa_endp != NULL
&& (REG_MULTI
! ? (reglnum < nfa_endp->se_u.pos.lnum
! || (reglnum == nfa_endp->se_u.pos.lnum
&& (int)(reginput - regline)
! < nfa_endp->se_u.pos.col))
! : reginput < nfa_endp->se_u.ptr))))
{
#ifdef ENABLE_LOG
fprintf(log_fd, "(---) STARTSTATE\n");
***************
*** 4601,4608 ****
* finish. */
if (clen != 0)
reginput += clen;
! else if (go_to_nextline || (endp != NULL && REG_MULTI
! && reglnum < endp->se_u.pos.lnum))
reg_nextline();
else
break;
--- 4617,4624 ----
* finish. */
if (clen != 0)
reginput += clen;
! else if (go_to_nextline || (nfa_endp != NULL && REG_MULTI
! && reglnum < nfa_endp->se_u.pos.lnum))
reg_nextline();
else
break;
***************
*** 4678,4684 ****
clear_sub(&m.synt);
#endif
! if (nfa_regmatch(start, &subs, &m, NULL) == FALSE)
return 0;
cleanup_subexpr();
--- 4694,4700 ----
clear_sub(&m.synt);
#endif
! if (nfa_regmatch(start, &subs, &m) == FALSE)
return 0;
cleanup_subexpr();
*** ../vim-7.3.1093/src/version.c 2013-06-02 16:07:05.000000000 +0200
--- src/version.c 2013-06-02 16:25:02.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
{ /* Add new patch number below this line */
+ /**/
+ 1094,
/**/
--
The future isn't what it used to be.
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language -- http://www.Zimbu.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///