diff --git a/7.3.1071 b/7.3.1071 new file mode 100644 index 0000000..f3380b9 --- /dev/null +++ b/7.3.1071 @@ -0,0 +1,437 @@ +To: vim_dev@googlegroups.com +Subject: Patch 7.3.1071 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.3.1071 +Problem: New regexp engine: backreferences don't work correctly. +Solution: Add every possible start/end position on the state stack. +Files: src/regexp_nfa.c, src/regexp.h, src/testdir/test64.in, + src/testdir/test64.ok + + +*** ../vim-7.3.1070/src/regexp_nfa.c 2013-05-30 11:51:04.000000000 +0200 +--- src/regexp_nfa.c 2013-05-30 16:43:43.000000000 +0200 +*************** +*** 184,189 **** +--- 184,192 ---- + /* NFA regexp \ze operator encountered. */ + static int nfa_has_zend; + ++ /* NFA regexp \1 .. \9 encountered. */ ++ static int nfa_has_backref; ++ + /* Number of sub expressions actually being used during execution. 1 if only + * the whole match (subexpr 0) is used. */ + static int nfa_nsubexpr; +*************** +*** 266,271 **** +--- 269,275 ---- + post_ptr = post_start; + post_end = post_start + nstate_max; + nfa_has_zend = FALSE; ++ nfa_has_backref = FALSE; + + regcomp_start(expr, re_flags); + +*************** +*** 750,764 **** + /* TODO: Not supported yet */ + return FAIL; + +! case Magic('1'): EMIT(NFA_BACKREF1); break; +! case Magic('2'): EMIT(NFA_BACKREF2); break; +! case Magic('3'): EMIT(NFA_BACKREF3); break; +! case Magic('4'): EMIT(NFA_BACKREF4); break; +! case Magic('5'): EMIT(NFA_BACKREF5); break; +! case Magic('6'): EMIT(NFA_BACKREF6); break; +! case Magic('7'): EMIT(NFA_BACKREF7); break; +! case Magic('8'): EMIT(NFA_BACKREF8); break; +! case Magic('9'): EMIT(NFA_BACKREF9); break; + + case Magic('z'): + c = no_Magic(getchr()); +--- 754,771 ---- + /* TODO: Not supported yet */ + return FAIL; + +! case Magic('1'): +! case Magic('2'): +! case Magic('3'): +! case Magic('4'): +! case Magic('5'): +! case Magic('6'): +! case Magic('7'): +! case Magic('8'): +! case Magic('9'): +! EMIT(NFA_BACKREF1 + (no_Magic(c) - '1')); +! nfa_has_backref = TRUE; +! break; + + case Magic('z'): + c = no_Magic(getchr()); +*************** +*** 2581,2587 **** + typedef struct + { + nfa_thread_T *t; /* allocated array of states */ +! int n; /* nr of states in "t" */ + int id; /* ID of the list */ + } nfa_list_T; + +--- 2588,2595 ---- + typedef struct + { + nfa_thread_T *t; /* allocated array of states */ +! int n; /* nr of states currently in "t" */ +! int len; /* max nr of states in "t" */ + int id; /* ID of the list */ + } nfa_list_T; + +*************** +*** 2612,2620 **** +--- 2620,2711 ---- + /* Used during execution: whether a match has been found. */ + static int nfa_match; + ++ static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2)); + static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *sub, int off)); + static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *sub, int *ip)); + ++ /* ++ * Return TRUE if "sub1" and "sub2" have the same positions. ++ */ ++ static int ++ sub_equal(sub1, sub2) ++ regsub_T *sub1; ++ regsub_T *sub2; ++ { ++ int i; ++ int todo; ++ linenr_T s1, e1; ++ linenr_T s2, e2; ++ char_u *sp1, *ep1; ++ char_u *sp2, *ep2; ++ ++ todo = sub1->in_use > sub2->in_use ? sub1->in_use : sub2->in_use; ++ if (REG_MULTI) ++ { ++ for (i = 0; i < todo; ++i) ++ { ++ if (i < sub1->in_use) ++ { ++ s1 = sub1->list.multi[i].start.lnum; ++ e1 = sub1->list.multi[i].end.lnum; ++ } ++ else ++ { ++ s1 = 0; ++ e1 = 0; ++ } ++ if (i < sub2->in_use) ++ { ++ s2 = sub2->list.multi[i].start.lnum; ++ e2 = sub2->list.multi[i].end.lnum; ++ } ++ else ++ { ++ s2 = 0; ++ e2 = 0; ++ } ++ if (s1 != s2 || e1 != e2) ++ return FALSE; ++ if (s1 != 0 && sub1->list.multi[i].start.col ++ != sub2->list.multi[i].start.col) ++ return FALSE; ++ if (e1 != 0 && sub1->list.multi[i].end.col ++ != sub2->list.multi[i].end.col) ++ return FALSE; ++ } ++ } ++ else ++ { ++ for (i = 0; i < todo; ++i) ++ { ++ if (i < sub1->in_use) ++ { ++ sp1 = sub1->list.line[i].start; ++ ep1 = sub1->list.line[i].end; ++ } ++ else ++ { ++ sp1 = NULL; ++ ep1 = NULL; ++ } ++ if (i < sub2->in_use) ++ { ++ sp2 = sub2->list.line[i].start; ++ ep2 = sub2->list.line[i].end; ++ } ++ else ++ { ++ sp2 = NULL; ++ ep2 = NULL; ++ } ++ if (sp1 != sp2 || ep1 != ep2) ++ return FALSE; ++ } ++ } ++ ++ return TRUE; ++ } ++ + static void + addstate(l, state, sub, off) + nfa_list_T *l; /* runtime state list */ +*************** +*** 2623,2629 **** + int off; /* byte offset, when -1 go to next line */ + { + int subidx; +! nfa_thread_T *lastthread; + lpos_T save_lpos; + int save_in_use; + char_u *save_ptr; +--- 2714,2720 ---- + int off; /* byte offset, when -1 go to next line */ + { + int subidx; +! nfa_thread_T *thread; + lpos_T save_lpos; + int save_in_use; + char_u *save_ptr; +*************** +*** 2674,2696 **** + { + /* This state is already in the list, don't add it again, + * unless it is an MOPEN that is used for a backreference. */ +! return; + } + + /* add the state to the list */ + state->lastlist = l->id; +! lastthread = &l->t[l->n++]; +! lastthread->state = state; +! lastthread->sub.in_use = sub->in_use; + if (sub->in_use > 0) + { + /* Copy the match start and end positions. */ + if (REG_MULTI) +! mch_memmove(&lastthread->sub.list.multi[0], + &sub->list.multi[0], + sizeof(struct multipos) * sub->in_use); + else +! mch_memmove(&lastthread->sub.list.line[0], + &sub->list.line[0], + sizeof(struct linepos) * sub->in_use); + } +--- 2765,2808 ---- + { + /* This state is already in the list, don't add it again, + * unless it is an MOPEN that is used for a backreference. */ +! if (!nfa_has_backref) +! return; +! +! /* See if the same state is already in the list with the same +! * positions. */ +! for (i = 0; i < l->n; ++i) +! { +! thread = &l->t[i]; +! if (thread->state->id == state->id +! && sub_equal(&thread->sub, sub)) +! return; +! } +! } +! +! /* when there are backreferences the number of states may be (a +! * lot) bigger */ +! if (nfa_has_backref && l->n == l->len) +! { +! int newlen = l->len * 3 / 2 + 50; +! +! l->t = vim_realloc(l->t, newlen * sizeof(nfa_thread_T)); +! l->len = newlen; + } + + /* add the state to the list */ + state->lastlist = l->id; +! thread = &l->t[l->n++]; +! thread->state = state; +! thread->sub.in_use = sub->in_use; + if (sub->in_use > 0) + { + /* Copy the match start and end positions. */ + if (REG_MULTI) +! mch_memmove(&thread->sub.list.multi[0], + &sub->list.multi[0], + sizeof(struct multipos) * sub->in_use); + else +! mch_memmove(&thread->sub.list.line[0], + &sub->list.line[0], + sizeof(struct linepos) * sub->in_use); + } +*************** +*** 2909,2915 **** + + /* re-order to put the new state at the current position */ + count = l->n - tlen; +! if (count > 1) + { + /* make space for new states, then move them from the + * end to the current position */ +--- 3021,3032 ---- + + /* re-order to put the new state at the current position */ + count = l->n - tlen; +! if (count == 1) +! { +! /* overwrite the current state */ +! l->t[i] = l->t[l->n - 1]; +! } +! else if (count > 1) + { + /* make space for new states, then move them from the + * end to the current position */ +*************** +*** 2920,2930 **** + &(l->t[l->n - 1]), + sizeof(nfa_thread_T) * count); + } +- else +- { +- /* overwrite the current state */ +- l->t[i] = l->t[l->n - 1]; +- } + --l->n; + *ip = i - 1; + } +--- 3037,3042 ---- +*************** +*** 3183,3196 **** + + /* Allocate memory for the lists of nodes. */ + size = (nstate + 1) * sizeof(nfa_thread_T); +! list[0].t = (nfa_thread_T *)lalloc(size, TRUE); +! list[1].t = (nfa_thread_T *)lalloc(size, TRUE); +! list[2].t = (nfa_thread_T *)lalloc(size, TRUE); + if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) + goto theend; +- vim_memset(list[0].t, 0, size); +- vim_memset(list[1].t, 0, size); +- vim_memset(list[2].t, 0, size); + + #ifdef ENABLE_LOG + log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); +--- 3295,3308 ---- + + /* Allocate memory for the lists of nodes. */ + size = (nstate + 1) * sizeof(nfa_thread_T); +! list[0].t = (nfa_thread_T *)lalloc_clear(size, TRUE); +! list[0].len = nstate + 1; +! list[1].t = (nfa_thread_T *)lalloc_clear(size, TRUE); +! list[1].len = nstate + 1; +! list[2].t = (nfa_thread_T *)lalloc_clear(size, TRUE); +! list[2].len = nstate + 1; + if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) + goto theend; + + #ifdef ENABLE_LOG + log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); +*************** +*** 3970,3976 **** + vim_free(list[0].t); + vim_free(list[1].t); + vim_free(list[2].t); +- list[0].t = list[1].t = list[2].t = NULL; + vim_free(listids); + #undef ADD_POS_NEG_STATE + #ifdef NFA_REGEXP_DEBUG_LOG +--- 4082,4087 ---- +*************** +*** 4131,4136 **** +--- 4242,4248 ---- + reglnum = 0; /* relative to line */ + + nfa_has_zend = prog->has_zend; ++ nfa_has_backref = prog->has_backref; + nfa_nsubexpr = prog->nsubexp; + + nstate = prog->nstate; +*************** +*** 4225,4230 **** +--- 4337,4343 ---- + prog->engine = &nfa_regengine; + prog->nstate = nstate; + prog->has_zend = nfa_has_zend; ++ prog->has_backref = nfa_has_backref; + prog->nsubexp = regnpar; + #ifdef ENABLE_LOG + nfa_postfix_dump(expr, OK); +*** ../vim-7.3.1070/src/regexp.h 2013-05-29 21:14:37.000000000 +0200 +--- src/regexp.h 2013-05-30 15:54:53.000000000 +0200 +*************** +*** 87,92 **** +--- 87,93 ---- + regprog_T regprog; + nfa_state_T *start; + int has_zend; /* pattern contains \ze */ ++ int has_backref; /* pattern contains \1 .. \9 */ + int nsubexp; /* number of () */ + int nstate; + nfa_state_T state[0]; /* actually longer.. */ +*** ../vim-7.3.1070/src/testdir/test64.in 2013-05-30 11:51:04.000000000 +0200 +--- src/testdir/test64.in 2013-05-30 16:47:29.000000000 +0200 +*************** +*** 333,339 **** + :" + :"""" Back references + :call add(tl, [2, '\(\i\+\) \1', ' abc abc', 'abc abc', 'abc']) +! :"call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo']) + :call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i']) + :" + :"""" Look-behind with limit +--- 333,339 ---- + :" + :"""" Back references + :call add(tl, [2, '\(\i\+\) \1', ' abc abc', 'abc abc', 'abc']) +! :call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo']) + :call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i']) + :" + :"""" Look-behind with limit +*** ../vim-7.3.1070/src/testdir/test64.ok 2013-05-30 11:51:04.000000000 +0200 +--- src/testdir/test64.ok 2013-05-30 17:00:27.000000000 +0200 +*************** +*** 716,721 **** +--- 716,724 ---- + OK 0 - \(\i\+\) \1 + OK 1 - \(\i\+\) \1 + OK 2 - \(\i\+\) \1 ++ OK 0 - \(\i\+\) \1 ++ OK 1 - \(\i\+\) \1 ++ OK 2 - \(\i\+\) \1 + OK 0 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9 + OK 1 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9 + OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9 +*** ../vim-7.3.1070/src/version.c 2013-05-30 15:38:20.000000000 +0200 +--- src/version.c 2013-05-30 17:02:40.000000000 +0200 +*************** +*** 730,731 **** +--- 730,733 ---- + { /* Add new patch number below this line */ ++ /**/ ++ 1071, + /**/ + +-- +How To Keep A Healthy Level Of Insanity: +14. Put mosquito netting around your work area. Play a tape of jungle + sounds all day. + + /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ +/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ +\\\ an exciting new programming language -- http://www.Zimbu.org /// + \\\ help me help AIDS victims -- http://ICCF-Holland.org ///