To: vim_dev@googlegroups.com
Subject: Patch 7.3.1151
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------
Patch 7.3.1151
Problem: New regexp engine: Slow when a look-behind match is followed by a
zero-width match.
Solution: Postpone the look-behind match more often.
Files: src/regexp_nfa.c
*** ../vim-7.3.1150/src/regexp_nfa.c 2013-06-08 22:29:59.000000000 +0200
--- src/regexp_nfa.c 2013-06-08 23:11:19.000000000 +0200
***************
*** 2794,2799 ****
--- 2794,2800 ----
/* unrecognized */
return -1;
}
+
/*
* Convert a postfix form into its equivalent NFA.
* Return the NFA start state on success, NULL otherwise.
***************
*** 3433,3438 ****
--- 3434,3440 ----
static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from));
static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2));
static int has_state_with_pos __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs));
+ static int match_follows __ARGS((nfa_state_T *startstate, int depth));
static int state_in_list __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs));
static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, int off));
static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int *ip));
***************
*** 3626,3631 ****
--- 3628,3715 ----
}
/*
+ * Return TRUE if "state" leads to a NFA_MATCH without advancing the input.
+ */
+ static int
+ match_follows(startstate, depth)
+ nfa_state_T *startstate;
+ int depth;
+ {
+ nfa_state_T *state = startstate;
+
+ /* avoid too much recursion */
+ if (depth > 10)
+ return FALSE;
+
+ for (;;)
+ {
+ switch (state->c)
+ {
+ case NFA_MATCH:
+ return TRUE;
+
+ case NFA_SPLIT:
+ return match_follows(state->out, depth + 1)
+ || match_follows(state->out1, depth + 1);
+
+ case NFA_START_INVISIBLE:
+ case NFA_START_INVISIBLE_BEFORE:
+ case NFA_START_INVISIBLE_NEG:
+ case NFA_START_INVISIBLE_BEFORE_NEG:
+ case NFA_COMPOSING:
+ /* skip ahead to next state */
+ state = state->out1->out;
+ break;
+
+ case NFA_ANY:
+ case NFA_IDENT:
+ case NFA_SIDENT:
+ case NFA_KWORD:
+ case NFA_SKWORD:
+ case NFA_FNAME:
+ case NFA_SFNAME:
+ case NFA_PRINT:
+ case NFA_SPRINT:
+ case NFA_WHITE:
+ case NFA_NWHITE:
+ case NFA_DIGIT:
+ case NFA_NDIGIT:
+ case NFA_HEX:
+ case NFA_NHEX:
+ case NFA_OCTAL:
+ case NFA_NOCTAL:
+ case NFA_WORD:
+ case NFA_NWORD:
+ case NFA_HEAD:
+ case NFA_NHEAD:
+ case NFA_ALPHA:
+ case NFA_NALPHA:
+ case NFA_LOWER:
+ case NFA_NLOWER:
+ case NFA_UPPER:
+ case NFA_NUPPER:
+ case NFA_START_COLL:
+ case NFA_START_NEG_COLL:
+ case NFA_NEWL:
+ /* state will advance input */
+ return FALSE;
+
+ default:
+ if (state->c > 0)
+ /* state will advance input */
+ return FALSE;
+
+ /* Others: zero-width or possibly zero-width, might still find
+ * a match at the same position, keep looking. */
+ break;
+ }
+ state = state->out;
+ }
+ return FALSE;
+ }
+
+
+ /*
* Return TRUE if "state" is already in list "l".
*/
static int
***************
*** 5714,5722 ****
if (add_state != NULL)
{
! if (t->pim != NULL)
{
- /* postponed invisible match */
if (t->pim->result == NFA_PIM_TODO)
{
#ifdef ENABLE_LOG
--- 5798,5808 ----
if (add_state != NULL)
{
! /* Handle the postponed invisible match before advancing to
! * the next character and for a zero-width match if the match
! * might end without advancing. */
! if (t->pim != NULL && (!add_here || match_follows(add_state, 0)))
{
if (t->pim->result == NFA_PIM_TODO)
{
#ifdef ENABLE_LOG
***************
*** 5773,5779 ****
}
if (add_here)
! addstate_here(thislist, add_state, &t->subs, NULL, &listidx);
else
{
addstate(nextlist, add_state, &t->subs, add_off);
--- 5859,5865 ----
}
if (add_here)
! addstate_here(thislist, add_state, &t->subs, t->pim, &listidx);
else
{
addstate(nextlist, add_state, &t->subs, add_off);
*** ../vim-7.3.1150/src/version.c 2013-06-08 22:29:59.000000000 +0200
--- src/version.c 2013-06-08 23:23:53.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
{ /* Add new patch number below this line */
+ /**/
+ 1151,
/**/
--
hundred-and-one symptoms of being an internet addict:
117. You are more comfortable typing in html.
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language -- http://www.Zimbu.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///