To: vim_dev@googlegroups.com
Subject: Patch 7.3.1106
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------
Patch 7.3.1106
Problem: New regexp engine: saving and restoring lastlist in the states
takes a lot of time.
Solution: Use a second lastlist value for the first recursive call.
Files: src/regexp.h, src/regexp_nfa.c
*** ../vim-7.3.1105/src/regexp.h 2013-06-02 15:55:52.000000000 +0200
--- src/regexp.h 2013-06-03 11:29:26.000000000 +0200
***************
*** 72,78 ****
nfa_state_T *out;
nfa_state_T *out1;
int id;
! int lastlist;
int negated;
int val;
};
--- 72,78 ----
nfa_state_T *out;
nfa_state_T *out1;
int id;
! int lastlist[2]; /* 0: normal, 1: recursive */
int negated;
int val;
};
*** ../vim-7.3.1105/src/regexp_nfa.c 2013-06-02 22:37:39.000000000 +0200
--- src/regexp_nfa.c 2013-06-03 12:15:17.000000000 +0200
***************
*** 255,260 ****
--- 255,269 ----
/* If not NULL match must end at this position */
static save_se_T *nfa_endp = NULL;
+ /* listid is global, so that it increases on recursive calls to
+ * nfa_regmatch(), which means we don't have to clear the lastlist field of
+ * all the states. */
+ static int nfa_listid;
+ static int nfa_alt_listid;
+
+ /* 0 for first call to nfa_regmatch(), 1 for recursive call. */
+ static int nfa_ll_index = 0;
+
static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags));
static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
static int nfa_emit_equi_class __ARGS((int c, int neg));
***************
*** 2169,2175 ****
s->out1 = out1;
s->id = istate;
! s->lastlist = 0;
s->negated = FALSE;
return s;
--- 2178,2185 ----
s->out1 = out1;
s->id = istate;
! s->lastlist[0] = 0;
! s->lastlist[1] = 0;
s->negated = FALSE;
return s;
***************
*** 3113,3121 ****
#endif
/* These nodes do not need to be added, but we need to bail out
* when it was tried to be added to this list before. */
! if (state->lastlist == l->id)
goto skip_add;
! state->lastlist = l->id;
break;
case NFA_BOL:
--- 3123,3131 ----
#endif
/* These nodes do not need to be added, but we need to bail out
* when it was tried to be added to this list before. */
! if (state->lastlist[nfa_ll_index] == l->id)
goto skip_add;
! state->lastlist[nfa_ll_index] = l->id;
break;
case NFA_BOL:
***************
*** 3131,3137 ****
/* FALLTHROUGH */
default:
! if (state->lastlist == l->id)
{
/* This state is already in the list, don't add it again,
* unless it is an MOPEN that is used for a backreference. */
--- 3141,3147 ----
/* FALLTHROUGH */
default:
! if (state->lastlist[nfa_ll_index] == l->id)
{
/* This state is already in the list, don't add it again,
* unless it is an MOPEN that is used for a backreference. */
***************
*** 3173,3179 ****
}
/* add the state to the list */
! state->lastlist = l->id;
thread = &l->t[l->n++];
thread->state = state;
copy_sub(&thread->subs.norm, &subs->norm);
--- 3183,3189 ----
}
/* add the state to the list */
! state->lastlist[nfa_ll_index] = l->id;
thread = &l->t[l->n++];
thread->state = state;
copy_sub(&thread->subs.norm, &subs->norm);
***************
*** 3616,3621 ****
--- 3626,3632 ----
/*
* Save list IDs for all NFA states of "prog" into "list".
* Also reset the IDs to zero.
+ * Only used for the recursive value lastlist[1].
*/
static void
nfa_save_listids(prog, list)
***************
*** 3629,3636 ****
p = &prog->state[0];
for (i = prog->nstate; --i >= 0; )
{
! list[i] = p->lastlist;
! p->lastlist = 0;
++p;
}
}
--- 3640,3647 ----
p = &prog->state[0];
for (i = prog->nstate; --i >= 0; )
{
! list[i] = p->lastlist[1];
! p->lastlist[1] = 0;
++p;
}
}
***************
*** 3649,3655 ****
p = &prog->state[0];
for (i = prog->nstate; --i >= 0; )
{
! p->lastlist = list[i];
++p;
}
}
--- 3660,3666 ----
p = &prog->state[0];
for (i = prog->nstate; --i >= 0; )
{
! p->lastlist[1] = list[i];
++p;
}
}
***************
*** 3683,3692 ****
--- 3694,3705 ----
char_u *save_regline = regline;
int save_reglnum = reglnum;
int save_nfa_match = nfa_match;
+ int save_nfa_listid = nfa_listid;
save_se_T *save_nfa_endp = nfa_endp;
save_se_T endpos;
save_se_T *endposp = NULL;
int result;
+ int need_restore = FALSE;
if (state->c == NFA_START_INVISIBLE_BEFORE)
{
***************
*** 3745,3774 ****
}
}
- /* Call nfa_regmatch() to check if the current concat matches
- * at this position. The concat ends with the node
- * NFA_END_INVISIBLE */
- if (*listids == NULL)
- {
- *listids = (int *)lalloc(sizeof(int) * nstate, TRUE);
- if (*listids == NULL)
- {
- EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!"));
- return 0;
- }
- }
#ifdef ENABLE_LOG
if (log_fd != stderr)
fclose(log_fd);
log_fd = NULL;
#endif
! /* Have to clear the listid field of the NFA nodes, so that
! * nfa_regmatch() and addstate() can run properly after
! * recursion. */
! nfa_save_listids(prog, *listids);
nfa_endp = endposp;
result = nfa_regmatch(prog, state->out, submatch, m);
! nfa_restore_listids(prog, *listids);
/* restore position in input text */
reginput = save_reginput;
--- 3758,3809 ----
}
}
#ifdef ENABLE_LOG
if (log_fd != stderr)
fclose(log_fd);
log_fd = NULL;
#endif
! /* Have to clear the lastlist field of the NFA nodes, so that
! * nfa_regmatch() and addstate() can run properly after recursion. */
! if (nfa_ll_index == 1)
! {
! /* Already calling nfa_regmatch() recursively. Save the lastlist[1]
! * values and clear them. */
! if (*listids == NULL)
! {
! *listids = (int *)lalloc(sizeof(int) * nstate, TRUE);
! if (*listids == NULL)
! {
! EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!"));
! return 0;
! }
! }
! nfa_save_listids(prog, *listids);
! need_restore = TRUE;
! /* any value of nfa_listid will do */
! }
! else
! {
! /* First recursive nfa_regmatch() call, switch to the second lastlist
! * entry. Make sure nfa_listid is different from a previous recursive
! * call, because some states may still have this ID. */
! ++nfa_ll_index;
! if (nfa_listid <= nfa_alt_listid)
! nfa_listid = nfa_alt_listid;
! }
!
! /* Call nfa_regmatch() to check if the current concat matches at this
! * position. The concat ends with the node NFA_END_INVISIBLE */
nfa_endp = endposp;
result = nfa_regmatch(prog, state->out, submatch, m);
!
! if (need_restore)
! nfa_restore_listids(prog, *listids);
! else
! {
! --nfa_ll_index;
! nfa_alt_listid = nfa_listid;
! }
/* restore position in input text */
reginput = save_reginput;
***************
*** 3776,3781 ****
--- 3811,3817 ----
reglnum = save_reglnum;
nfa_match = save_nfa_match;
nfa_endp = save_nfa_endp;
+ nfa_listid = save_nfa_listid;
#ifdef ENABLE_LOG
log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
***************
*** 3821,3827 ****
nfa_list_T list[3];
nfa_list_T *listtbl[2][2];
nfa_list_T *ll;
- int listid = 1;
int listidx;
nfa_list_T *thislist;
nfa_list_T *nextlist;
--- 3857,3862 ----
***************
*** 3875,3881 ****
#ifdef ENABLE_LOG
fprintf(log_fd, "(---) STARTSTATE\n");
#endif
! thislist->id = listid;
addstate(thislist, start, m, 0);
/* There are two cases when the NFA advances: 1. input char matches the
--- 3910,3916 ----
#ifdef ENABLE_LOG
fprintf(log_fd, "(---) STARTSTATE\n");
#endif
! thislist->id = nfa_listid + 1;
addstate(thislist, start, m, 0);
/* There are two cases when the NFA advances: 1. input char matches the
***************
*** 3923,3932 ****
nextlist = &list[flag ^= 1];
nextlist->n = 0; /* clear nextlist */
listtbl[1][0] = nextlist;
! ++listid;
! thislist->id = listid;
! nextlist->id = listid + 1;
! neglist->id = listid + 1;
#ifdef ENABLE_LOG
fprintf(log_fd, "------------------------------------------\n");
--- 3958,3967 ----
nextlist = &list[flag ^= 1];
nextlist->n = 0; /* clear nextlist */
listtbl[1][0] = nextlist;
! ++nfa_listid;
! thislist->id = nfa_listid;
! nextlist->id = nfa_listid + 1;
! neglist->id = nfa_listid + 1;
#ifdef ENABLE_LOG
fprintf(log_fd, "------------------------------------------\n");
***************
*** 4843,4848 ****
--- 4878,4885 ----
nfa_has_zend = prog->has_zend;
nfa_has_backref = prog->has_backref;
nfa_nsubexpr = prog->nsubexp;
+ nfa_listid = 1;
+ nfa_alt_listid = 2;
#ifdef DEBUG
nfa_regengine.expr = prog->pattern;
#endif
***************
*** 4851,4857 ****
for (i = 0; i < nstate; ++i)
{
prog->state[i].id = i;
! prog->state[i].lastlist = 0;
}
retval = nfa_regtry(prog, col);
--- 4888,4895 ----
for (i = 0; i < nstate; ++i)
{
prog->state[i].id = i;
! prog->state[i].lastlist[0] = 0;
! prog->state[i].lastlist[1] = 0;
}
retval = nfa_regtry(prog, col);
*** ../vim-7.3.1105/src/version.c 2013-06-02 22:37:39.000000000 +0200
--- src/version.c 2013-06-03 12:13:15.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
{ /* Add new patch number below this line */
+ /**/
+ 1106,
/**/
--
hundred-and-one symptoms of being an internet addict:
59. Your wife says communication is important in a marriage...so you buy
another computer and install a second phone line so the two of you can
chat.
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language -- http://www.Zimbu.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///