To: vim_dev@googlegroups.com
Subject: Patch 7.3.1071
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------
Patch 7.3.1071
Problem: New regexp engine: backreferences don't work correctly.
Solution: Add every possible start/end position on the state stack.
Files: src/regexp_nfa.c, src/regexp.h, src/testdir/test64.in,
src/testdir/test64.ok
*** ../vim-7.3.1070/src/regexp_nfa.c 2013-05-30 11:51:04.000000000 +0200
--- src/regexp_nfa.c 2013-05-30 16:43:43.000000000 +0200
***************
*** 184,189 ****
--- 184,192 ----
/* NFA regexp \ze operator encountered. */
static int nfa_has_zend;
+ /* NFA regexp \1 .. \9 encountered. */
+ static int nfa_has_backref;
+
/* Number of sub expressions actually being used during execution. 1 if only
* the whole match (subexpr 0) is used. */
static int nfa_nsubexpr;
***************
*** 266,271 ****
--- 269,275 ----
post_ptr = post_start;
post_end = post_start + nstate_max;
nfa_has_zend = FALSE;
+ nfa_has_backref = FALSE;
regcomp_start(expr, re_flags);
***************
*** 750,764 ****
/* TODO: Not supported yet */
return FAIL;
! case Magic('1'): EMIT(NFA_BACKREF1); break;
! case Magic('2'): EMIT(NFA_BACKREF2); break;
! case Magic('3'): EMIT(NFA_BACKREF3); break;
! case Magic('4'): EMIT(NFA_BACKREF4); break;
! case Magic('5'): EMIT(NFA_BACKREF5); break;
! case Magic('6'): EMIT(NFA_BACKREF6); break;
! case Magic('7'): EMIT(NFA_BACKREF7); break;
! case Magic('8'): EMIT(NFA_BACKREF8); break;
! case Magic('9'): EMIT(NFA_BACKREF9); break;
case Magic('z'):
c = no_Magic(getchr());
--- 754,771 ----
/* TODO: Not supported yet */
return FAIL;
! case Magic('1'):
! case Magic('2'):
! case Magic('3'):
! case Magic('4'):
! case Magic('5'):
! case Magic('6'):
! case Magic('7'):
! case Magic('8'):
! case Magic('9'):
! EMIT(NFA_BACKREF1 + (no_Magic(c) - '1'));
! nfa_has_backref = TRUE;
! break;
case Magic('z'):
c = no_Magic(getchr());
***************
*** 2581,2587 ****
typedef struct
{
nfa_thread_T *t; /* allocated array of states */
! int n; /* nr of states in "t" */
int id; /* ID of the list */
} nfa_list_T;
--- 2588,2595 ----
typedef struct
{
nfa_thread_T *t; /* allocated array of states */
! int n; /* nr of states currently in "t" */
! int len; /* max nr of states in "t" */
int id; /* ID of the list */
} nfa_list_T;
***************
*** 2612,2620 ****
--- 2620,2711 ----
/* Used during execution: whether a match has been found. */
static int nfa_match;
+ static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2));
static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *sub, int off));
static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *sub, int *ip));
+ /*
+ * Return TRUE if "sub1" and "sub2" have the same positions.
+ */
+ static int
+ sub_equal(sub1, sub2)
+ regsub_T *sub1;
+ regsub_T *sub2;
+ {
+ int i;
+ int todo;
+ linenr_T s1, e1;
+ linenr_T s2, e2;
+ char_u *sp1, *ep1;
+ char_u *sp2, *ep2;
+
+ todo = sub1->in_use > sub2->in_use ? sub1->in_use : sub2->in_use;
+ if (REG_MULTI)
+ {
+ for (i = 0; i < todo; ++i)
+ {
+ if (i < sub1->in_use)
+ {
+ s1 = sub1->list.multi[i].start.lnum;
+ e1 = sub1->list.multi[i].end.lnum;
+ }
+ else
+ {
+ s1 = 0;
+ e1 = 0;
+ }
+ if (i < sub2->in_use)
+ {
+ s2 = sub2->list.multi[i].start.lnum;
+ e2 = sub2->list.multi[i].end.lnum;
+ }
+ else
+ {
+ s2 = 0;
+ e2 = 0;
+ }
+ if (s1 != s2 || e1 != e2)
+ return FALSE;
+ if (s1 != 0 && sub1->list.multi[i].start.col
+ != sub2->list.multi[i].start.col)
+ return FALSE;
+ if (e1 != 0 && sub1->list.multi[i].end.col
+ != sub2->list.multi[i].end.col)
+ return FALSE;
+ }
+ }
+ else
+ {
+ for (i = 0; i < todo; ++i)
+ {
+ if (i < sub1->in_use)
+ {
+ sp1 = sub1->list.line[i].start;
+ ep1 = sub1->list.line[i].end;
+ }
+ else
+ {
+ sp1 = NULL;
+ ep1 = NULL;
+ }
+ if (i < sub2->in_use)
+ {
+ sp2 = sub2->list.line[i].start;
+ ep2 = sub2->list.line[i].end;
+ }
+ else
+ {
+ sp2 = NULL;
+ ep2 = NULL;
+ }
+ if (sp1 != sp2 || ep1 != ep2)
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+ }
+
static void
addstate(l, state, sub, off)
nfa_list_T *l; /* runtime state list */
***************
*** 2623,2629 ****
int off; /* byte offset, when -1 go to next line */
{
int subidx;
! nfa_thread_T *lastthread;
lpos_T save_lpos;
int save_in_use;
char_u *save_ptr;
--- 2714,2720 ----
int off; /* byte offset, when -1 go to next line */
{
int subidx;
! nfa_thread_T *thread;
lpos_T save_lpos;
int save_in_use;
char_u *save_ptr;
***************
*** 2674,2696 ****
{
/* This state is already in the list, don't add it again,
* unless it is an MOPEN that is used for a backreference. */
! return;
}
/* add the state to the list */
state->lastlist = l->id;
! lastthread = &l->t[l->n++];
! lastthread->state = state;
! lastthread->sub.in_use = sub->in_use;
if (sub->in_use > 0)
{
/* Copy the match start and end positions. */
if (REG_MULTI)
! mch_memmove(&lastthread->sub.list.multi[0],
&sub->list.multi[0],
sizeof(struct multipos) * sub->in_use);
else
! mch_memmove(&lastthread->sub.list.line[0],
&sub->list.line[0],
sizeof(struct linepos) * sub->in_use);
}
--- 2765,2808 ----
{
/* This state is already in the list, don't add it again,
* unless it is an MOPEN that is used for a backreference. */
! if (!nfa_has_backref)
! return;
!
! /* See if the same state is already in the list with the same
! * positions. */
! for (i = 0; i < l->n; ++i)
! {
! thread = &l->t[i];
! if (thread->state->id == state->id
! && sub_equal(&thread->sub, sub))
! return;
! }
! }
!
! /* when there are backreferences the number of states may be (a
! * lot) bigger */
! if (nfa_has_backref && l->n == l->len)
! {
! int newlen = l->len * 3 / 2 + 50;
!
! l->t = vim_realloc(l->t, newlen * sizeof(nfa_thread_T));
! l->len = newlen;
}
/* add the state to the list */
state->lastlist = l->id;
! thread = &l->t[l->n++];
! thread->state = state;
! thread->sub.in_use = sub->in_use;
if (sub->in_use > 0)
{
/* Copy the match start and end positions. */
if (REG_MULTI)
! mch_memmove(&thread->sub.list.multi[0],
&sub->list.multi[0],
sizeof(struct multipos) * sub->in_use);
else
! mch_memmove(&thread->sub.list.line[0],
&sub->list.line[0],
sizeof(struct linepos) * sub->in_use);
}
***************
*** 2909,2915 ****
/* re-order to put the new state at the current position */
count = l->n - tlen;
! if (count > 1)
{
/* make space for new states, then move them from the
* end to the current position */
--- 3021,3032 ----
/* re-order to put the new state at the current position */
count = l->n - tlen;
! if (count == 1)
! {
! /* overwrite the current state */
! l->t[i] = l->t[l->n - 1];
! }
! else if (count > 1)
{
/* make space for new states, then move them from the
* end to the current position */
***************
*** 2920,2930 ****
&(l->t[l->n - 1]),
sizeof(nfa_thread_T) * count);
}
- else
- {
- /* overwrite the current state */
- l->t[i] = l->t[l->n - 1];
- }
--l->n;
*ip = i - 1;
}
--- 3037,3042 ----
***************
*** 3183,3196 ****
/* Allocate memory for the lists of nodes. */
size = (nstate + 1) * sizeof(nfa_thread_T);
! list[0].t = (nfa_thread_T *)lalloc(size, TRUE);
! list[1].t = (nfa_thread_T *)lalloc(size, TRUE);
! list[2].t = (nfa_thread_T *)lalloc(size, TRUE);
if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL)
goto theend;
- vim_memset(list[0].t, 0, size);
- vim_memset(list[1].t, 0, size);
- vim_memset(list[2].t, 0, size);
#ifdef ENABLE_LOG
log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
--- 3295,3308 ----
/* Allocate memory for the lists of nodes. */
size = (nstate + 1) * sizeof(nfa_thread_T);
! list[0].t = (nfa_thread_T *)lalloc_clear(size, TRUE);
! list[0].len = nstate + 1;
! list[1].t = (nfa_thread_T *)lalloc_clear(size, TRUE);
! list[1].len = nstate + 1;
! list[2].t = (nfa_thread_T *)lalloc_clear(size, TRUE);
! list[2].len = nstate + 1;
if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL)
goto theend;
#ifdef ENABLE_LOG
log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
***************
*** 3970,3976 ****
vim_free(list[0].t);
vim_free(list[1].t);
vim_free(list[2].t);
- list[0].t = list[1].t = list[2].t = NULL;
vim_free(listids);
#undef ADD_POS_NEG_STATE
#ifdef NFA_REGEXP_DEBUG_LOG
--- 4082,4087 ----
***************
*** 4131,4136 ****
--- 4242,4248 ----
reglnum = 0; /* relative to line */
nfa_has_zend = prog->has_zend;
+ nfa_has_backref = prog->has_backref;
nfa_nsubexpr = prog->nsubexp;
nstate = prog->nstate;
***************
*** 4225,4230 ****
--- 4337,4343 ----
prog->engine = &nfa_regengine;
prog->nstate = nstate;
prog->has_zend = nfa_has_zend;
+ prog->has_backref = nfa_has_backref;
prog->nsubexp = regnpar;
#ifdef ENABLE_LOG
nfa_postfix_dump(expr, OK);
*** ../vim-7.3.1070/src/regexp.h 2013-05-29 21:14:37.000000000 +0200
--- src/regexp.h 2013-05-30 15:54:53.000000000 +0200
***************
*** 87,92 ****
--- 87,93 ----
regprog_T regprog;
nfa_state_T *start;
int has_zend; /* pattern contains \ze */
+ int has_backref; /* pattern contains \1 .. \9 */
int nsubexp; /* number of () */
int nstate;
nfa_state_T state[0]; /* actually longer.. */
*** ../vim-7.3.1070/src/testdir/test64.in 2013-05-30 11:51:04.000000000 +0200
--- src/testdir/test64.in 2013-05-30 16:47:29.000000000 +0200
***************
*** 333,339 ****
:"
:"""" Back references
:call add(tl, [2, '\(\i\+\) \1', ' abc abc', 'abc abc', 'abc'])
! :"call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo'])
:call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i'])
:"
:"""" Look-behind with limit
--- 333,339 ----
:"
:"""" Back references
:call add(tl, [2, '\(\i\+\) \1', ' abc abc', 'abc abc', 'abc'])
! :call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo'])
:call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i'])
:"
:"""" Look-behind with limit
*** ../vim-7.3.1070/src/testdir/test64.ok 2013-05-30 11:51:04.000000000 +0200
--- src/testdir/test64.ok 2013-05-30 17:00:27.000000000 +0200
***************
*** 716,721 ****
--- 716,724 ----
OK 0 - \(\i\+\) \1
OK 1 - \(\i\+\) \1
OK 2 - \(\i\+\) \1
+ OK 0 - \(\i\+\) \1
+ OK 1 - \(\i\+\) \1
+ OK 2 - \(\i\+\) \1
OK 0 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
OK 1 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
*** ../vim-7.3.1070/src/version.c 2013-05-30 15:38:20.000000000 +0200
--- src/version.c 2013-05-30 17:02:40.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
{ /* Add new patch number below this line */
+ /**/
+ 1071,
/**/
--
How To Keep A Healthy Level Of Insanity:
14. Put mosquito netting around your work area. Play a tape of jungle
sounds all day.
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language -- http://www.Zimbu.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///