To: vim_dev@googlegroups.com
Subject: Patch 7.3.1169
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------
Patch 7.3.1169
Problem: New regexp engine: some work is done while executing a pattern,
even though the result is predictable.
Solution: Do the work while compiling the pattern.
Files: src/regexp_nfa.c
*** ../vim-7.3.1168/src/regexp_nfa.c 2013-06-11 18:42:28.000000000 +0200
--- src/regexp_nfa.c 2013-06-11 22:40:12.000000000 +0200
***************
*** 64,72 ****
--- 64,76 ----
NFA_NOPEN, /* Start of subexpression marked with \%( */
NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */
NFA_START_INVISIBLE,
+ NFA_START_INVISIBLE_FIRST,
NFA_START_INVISIBLE_NEG,
+ NFA_START_INVISIBLE_NEG_FIRST,
NFA_START_INVISIBLE_BEFORE,
+ NFA_START_INVISIBLE_BEFORE_FIRST,
NFA_START_INVISIBLE_BEFORE_NEG,
+ NFA_START_INVISIBLE_BEFORE_NEG_FIRST,
NFA_START_PATTERN,
NFA_END_INVISIBLE,
NFA_END_INVISIBLE_NEG,
***************
*** 286,291 ****
--- 290,296 ----
static int *re2post __ARGS((void));
static nfa_state_T *alloc_state __ARGS((int c, nfa_state_T *out, nfa_state_T *out1));
static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size));
+ static void nfa_postprocess __ARGS((nfa_regprog_T *prog));
static int check_char_class __ARGS((int class, int c));
static void st_error __ARGS((int *postfix, int *end, int *p));
static void nfa_save_listids __ARGS((nfa_regprog_T *prog, int *list));
***************
*** 297,302 ****
--- 302,309 ----
static void nfa_regfree __ARGS((regprog_T *prog));
static int nfa_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
static long nfa_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
+ static int match_follows __ARGS((nfa_state_T *startstate, int depth));
+ static int failure_chance __ARGS((nfa_state_T *state, int depth));
/* helper functions used when doing re2post() ... regatom() parsing */
#define EMIT(c) do { \
***************
*** 2040,2051 ****
--- 2047,2066 ----
case NFA_NOPEN: STRCPY(code, "NFA_NOPEN"); break;
case NFA_NCLOSE: STRCPY(code, "NFA_NCLOSE"); break;
case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break;
+ case NFA_START_INVISIBLE_FIRST:
+ STRCPY(code, "NFA_START_INVISIBLE_FIRST"); break;
case NFA_START_INVISIBLE_NEG:
STRCPY(code, "NFA_START_INVISIBLE_NEG"); break;
+ case NFA_START_INVISIBLE_NEG_FIRST:
+ STRCPY(code, "NFA_START_INVISIBLE_NEG_FIRST"); break;
case NFA_START_INVISIBLE_BEFORE:
STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break;
+ case NFA_START_INVISIBLE_BEFORE_FIRST:
+ STRCPY(code, "NFA_START_INVISIBLE_BEFORE_FIRST"); break;
case NFA_START_INVISIBLE_BEFORE_NEG:
STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG"); break;
+ case NFA_START_INVISIBLE_BEFORE_NEG_FIRST:
+ STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG_FIRST"); break;
case NFA_START_PATTERN: STRCPY(code, "NFA_START_PATTERN"); break;
case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break;
case NFA_END_INVISIBLE_NEG: STRCPY(code, "NFA_END_INVISIBLE_NEG"); break;
***************
*** 3318,3323 ****
--- 3333,3395 ----
#undef PUSH
}
+ /*
+ * After building the NFA program, inspect it to add optimization hints.
+ */
+ static void
+ nfa_postprocess(prog)
+ nfa_regprog_T *prog;
+ {
+ int i;
+ int c;
+
+ for (i = 0; i < prog->nstate; ++i)
+ {
+ c = prog->state[i].c;
+ if (c == NFA_START_INVISIBLE
+ || c == NFA_START_INVISIBLE_NEG
+ || c == NFA_START_INVISIBLE_BEFORE
+ || c == NFA_START_INVISIBLE_BEFORE_NEG)
+ {
+ int directly;
+
+ /* Do it directly when what follows is possibly the end of the
+ * match. */
+ if (match_follows(prog->state[i].out1->out, 0))
+ directly = TRUE;
+ else
+ {
+ int ch_invisible = failure_chance(prog->state[i].out, 0);
+ int ch_follows = failure_chance(prog->state[i].out1->out, 0);
+
+ /* Postpone when the invisible match is expensive or has a
+ * lower chance of failing. */
+ if (c == NFA_START_INVISIBLE_BEFORE
+ || c == NFA_START_INVISIBLE_BEFORE_NEG)
+ {
+ /* "before" matches are very expensive when
+ * unbounded, always prefer what follows then,
+ * unless what follows will always match.
+ * Otherwise strongly prefer what follows. */
+ if (prog->state[i].val <= 0 && ch_follows > 0)
+ directly = FALSE;
+ else
+ directly = ch_follows * 10 < ch_invisible;
+ }
+ else
+ {
+ /* normal invisible, first do the one with the
+ * highest failure chance */
+ directly = ch_follows < ch_invisible;
+ }
+ }
+ if (directly)
+ /* switch to the _FIRST state */
+ ++prog->state[i].c;
+ }
+ }
+ }
+
/****************************************************************
* NFA execution code.
****************************************************************/
***************
*** 3457,3463 ****
static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from));
static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2));
static int has_state_with_pos __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs));
- static int match_follows __ARGS((nfa_state_T *startstate, int depth));
static int state_in_list __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs));
static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int off));
static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int *ip));
--- 3529,3534 ----
***************
*** 3703,3711 ****
--- 3774,3786 ----
|| match_follows(state->out1, depth + 1);
case NFA_START_INVISIBLE:
+ case NFA_START_INVISIBLE_FIRST:
case NFA_START_INVISIBLE_BEFORE:
+ case NFA_START_INVISIBLE_BEFORE_FIRST:
case NFA_START_INVISIBLE_NEG:
+ case NFA_START_INVISIBLE_NEG_FIRST:
case NFA_START_INVISIBLE_BEFORE_NEG:
+ case NFA_START_INVISIBLE_BEFORE_NEG_FIRST:
case NFA_COMPOSING:
/* skip ahead to next state */
state = state->out1->out;
***************
*** 4440,4446 ****
}
if (state->c == NFA_START_INVISIBLE_BEFORE
! || state->c == NFA_START_INVISIBLE_BEFORE_NEG)
{
/* The recursive match must end at the current position. When "pim" is
* not NULL it specifies the current position. */
--- 4515,4523 ----
}
if (state->c == NFA_START_INVISIBLE_BEFORE
! || state->c == NFA_START_INVISIBLE_BEFORE_FIRST
! || state->c == NFA_START_INVISIBLE_BEFORE_NEG
! || state->c == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)
{
/* The recursive match must end at the current position. When "pim" is
* not NULL it specifies the current position. */
***************
*** 4581,4587 ****
return result;
}
- static int failure_chance __ARGS((nfa_state_T *state, int depth));
static int skip_to_start __ARGS((int c, colnr_T *colp));
static long find_match_text __ARGS((colnr_T startcol, int regstart, char_u *match_text));
--- 4658,4663 ----
***************
*** 5093,5142 ****
break;
case NFA_START_INVISIBLE:
case NFA_START_INVISIBLE_NEG:
case NFA_START_INVISIBLE_BEFORE:
case NFA_START_INVISIBLE_BEFORE_NEG:
{
- int directly = FALSE;
-
#ifdef ENABLE_LOG
fprintf(log_fd, "Failure chance invisible: %d, what follows: %d\n",
failure_chance(t->state->out, 0),
failure_chance(t->state->out1->out, 0));
#endif
! /* Do it directly when what follows is possibly the end of
! * the match.
! * Do it directly if there already is a PIM.
! * Postpone when the invisible match is expensive or has a
! * lower chance of failing. */
! if (match_follows(t->state->out1->out, 0)
! || t->pim.result != NFA_PIM_UNUSED)
! directly = TRUE;
! else
! {
! int ch_invisible = failure_chance(t->state->out, 0);
! int ch_follows = failure_chance(t->state->out1->out, 0);
!
! if (t->state->c == NFA_START_INVISIBLE_BEFORE
! || t->state->c == NFA_START_INVISIBLE_BEFORE_NEG)
! {
! /* "before" matches are very expensive when
! * unbounded, always prefer what follows then,
! * unless what follows will always match.
! * Otherwise strongly prefer what follows. */
! if (t->state->val <= 0 && ch_follows > 0)
! directly = FALSE;
! else
! directly = ch_follows * 10 < ch_invisible;
! }
! else
! {
! /* normal invisible, first do the one with the
! * highest failure chance */
! directly = ch_follows < ch_invisible;
! }
! }
! if (directly)
{
/*
* First try matching the invisible match, then what
--- 5169,5194 ----
break;
case NFA_START_INVISIBLE:
+ case NFA_START_INVISIBLE_FIRST:
case NFA_START_INVISIBLE_NEG:
+ case NFA_START_INVISIBLE_NEG_FIRST:
case NFA_START_INVISIBLE_BEFORE:
+ case NFA_START_INVISIBLE_BEFORE_FIRST:
case NFA_START_INVISIBLE_BEFORE_NEG:
+ case NFA_START_INVISIBLE_BEFORE_NEG_FIRST:
{
#ifdef ENABLE_LOG
fprintf(log_fd, "Failure chance invisible: %d, what follows: %d\n",
failure_chance(t->state->out, 0),
failure_chance(t->state->out1->out, 0));
#endif
! /* Do it directly if there already is a PIM or when
! * nfa_postprocess() detected it will work better. */
! if (t->pim.result != NFA_PIM_UNUSED
! || t->state->c == NFA_START_INVISIBLE_FIRST
! || t->state->c == NFA_START_INVISIBLE_NEG_FIRST
! || t->state->c == NFA_START_INVISIBLE_BEFORE_FIRST
! || t->state->c == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)
{
/*
* First try matching the invisible match, then what
***************
*** 5148,5155 ****
/* for \@! and \@<! it is a match when the result is
* FALSE */
if (result != (t->state->c == NFA_START_INVISIBLE_NEG
! || t->state->c
! == NFA_START_INVISIBLE_BEFORE_NEG))
{
/* Copy submatch info from the recursive call */
copy_sub_off(&t->subs.norm, &m->norm);
--- 5200,5210 ----
/* for \@! and \@<! it is a match when the result is
* FALSE */
if (result != (t->state->c == NFA_START_INVISIBLE_NEG
! || t->state->c == NFA_START_INVISIBLE_NEG_FIRST
! || t->state->c
! == NFA_START_INVISIBLE_BEFORE_NEG
! || t->state->c
! == NFA_START_INVISIBLE_BEFORE_NEG_FIRST))
{
/* Copy submatch info from the recursive call */
copy_sub_off(&t->subs.norm, &m->norm);
***************
*** 5920,5927 ****
/* for \@! and \@<! it is a match when the result is
* FALSE */
if (result != (pim->state->c == NFA_START_INVISIBLE_NEG
! || pim->state->c
! == NFA_START_INVISIBLE_BEFORE_NEG))
{
/* Copy submatch info from the recursive call */
copy_sub_off(&pim->subs.norm, &m->norm);
--- 5975,5985 ----
/* for \@! and \@<! it is a match when the result is
* FALSE */
if (result != (pim->state->c == NFA_START_INVISIBLE_NEG
! || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST
! || pim->state->c
! == NFA_START_INVISIBLE_BEFORE_NEG
! || pim->state->c
! == NFA_START_INVISIBLE_BEFORE_NEG_FIRST))
{
/* Copy submatch info from the recursive call */
copy_sub_off(&pim->subs.norm, &m->norm);
***************
*** 5944,5951 ****
/* for \@! and \@<! it is a match when result is FALSE */
if (result != (pim->state->c == NFA_START_INVISIBLE_NEG
! || pim->state->c
! == NFA_START_INVISIBLE_BEFORE_NEG))
{
/* Copy submatch info from the recursive call */
copy_sub_off(&t->subs.norm, &pim->subs.norm);
--- 6002,6012 ----
/* for \@! and \@<! it is a match when result is FALSE */
if (result != (pim->state->c == NFA_START_INVISIBLE_NEG
! || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST
! || pim->state->c
! == NFA_START_INVISIBLE_BEFORE_NEG
! || pim->state->c
! == NFA_START_INVISIBLE_BEFORE_NEG_FIRST))
{
/* Copy submatch info from the recursive call */
copy_sub_off(&t->subs.norm, &pim->subs.norm);
***************
*** 6413,6421 ****
prog->has_backref = nfa_has_backref;
prog->nsubexp = regnpar;
prog->reganch = nfa_get_reganch(prog->start, 0);
prog->regstart = nfa_get_regstart(prog->start, 0);
-
prog->match_text = nfa_get_match_text(prog->start);
#ifdef ENABLE_LOG
--- 6474,6483 ----
prog->has_backref = nfa_has_backref;
prog->nsubexp = regnpar;
+ nfa_postprocess(prog);
+
prog->reganch = nfa_get_reganch(prog->start, 0);
prog->regstart = nfa_get_regstart(prog->start, 0);
prog->match_text = nfa_get_match_text(prog->start);
#ifdef ENABLE_LOG
*** ../vim-7.3.1168/src/version.c 2013-06-11 20:53:24.000000000 +0200
--- src/version.c 2013-06-11 22:43:27.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
{ /* Add new patch number below this line */
+ /**/
+ 1169,
/**/
--
hundred-and-one symptoms of being an internet addict:
156. You forget your friend's name but not her e-mail address.
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language -- http://www.Zimbu.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///