| To: vim_dev@googlegroups.com |
| Subject: Patch 7.3.1071 |
| Fcc: outbox |
| From: Bram Moolenaar <Bram@moolenaar.net> |
| Mime-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| |
| Patch 7.3.1071 |
| Problem: New regexp engine: backreferences don't work correctly. |
| Solution: Add every possible start/end position on the state stack. |
| Files: src/regexp_nfa.c, src/regexp.h, src/testdir/test64.in, |
| src/testdir/test64.ok |
| |
| |
| |
| |
| |
| *** 184,189 **** |
| --- 184,192 ---- |
| /* NFA regexp \ze operator encountered. */ |
| static int nfa_has_zend; |
| |
| + /* NFA regexp \1 .. \9 encountered. */ |
| + static int nfa_has_backref; |
| + |
| /* Number of sub expressions actually being used during execution. 1 if only |
| * the whole match (subexpr 0) is used. */ |
| static int nfa_nsubexpr; |
| |
| *** 266,271 **** |
| --- 269,275 ---- |
| post_ptr = post_start; |
| post_end = post_start + nstate_max; |
| nfa_has_zend = FALSE; |
| + nfa_has_backref = FALSE; |
| |
| regcomp_start(expr, re_flags); |
| |
| |
| *** 750,764 **** |
| /* TODO: Not supported yet */ |
| return FAIL; |
| |
| ! case Magic('1'): EMIT(NFA_BACKREF1); break; |
| ! case Magic('2'): EMIT(NFA_BACKREF2); break; |
| ! case Magic('3'): EMIT(NFA_BACKREF3); break; |
| ! case Magic('4'): EMIT(NFA_BACKREF4); break; |
| ! case Magic('5'): EMIT(NFA_BACKREF5); break; |
| ! case Magic('6'): EMIT(NFA_BACKREF6); break; |
| ! case Magic('7'): EMIT(NFA_BACKREF7); break; |
| ! case Magic('8'): EMIT(NFA_BACKREF8); break; |
| ! case Magic('9'): EMIT(NFA_BACKREF9); break; |
| |
| case Magic('z'): |
| c = no_Magic(getchr()); |
| --- 754,771 ---- |
| /* TODO: Not supported yet */ |
| return FAIL; |
| |
| ! case Magic('1'): |
| ! case Magic('2'): |
| ! case Magic('3'): |
| ! case Magic('4'): |
| ! case Magic('5'): |
| ! case Magic('6'): |
| ! case Magic('7'): |
| ! case Magic('8'): |
| ! case Magic('9'): |
| ! EMIT(NFA_BACKREF1 + (no_Magic(c) - '1')); |
| ! nfa_has_backref = TRUE; |
| ! break; |
| |
| case Magic('z'): |
| c = no_Magic(getchr()); |
| |
| *** 2581,2587 **** |
| typedef struct |
| { |
| nfa_thread_T *t; /* allocated array of states */ |
| ! int n; /* nr of states in "t" */ |
| int id; /* ID of the list */ |
| } nfa_list_T; |
| |
| --- 2588,2595 ---- |
| typedef struct |
| { |
| nfa_thread_T *t; /* allocated array of states */ |
| ! int n; /* nr of states currently in "t" */ |
| ! int len; /* max nr of states in "t" */ |
| int id; /* ID of the list */ |
| } nfa_list_T; |
| |
| |
| *** 2612,2620 **** |
| --- 2620,2711 ---- |
| /* Used during execution: whether a match has been found. */ |
| static int nfa_match; |
| |
| + static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2)); |
| static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *sub, int off)); |
| static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *sub, int *ip)); |
| |
| + /* |
| + * Return TRUE if "sub1" and "sub2" have the same positions. |
| + */ |
| + static int |
| + sub_equal(sub1, sub2) |
| + regsub_T *sub1; |
| + regsub_T *sub2; |
| + { |
| + int i; |
| + int todo; |
| + linenr_T s1, e1; |
| + linenr_T s2, e2; |
| + char_u *sp1, *ep1; |
| + char_u *sp2, *ep2; |
| + |
| + todo = sub1->in_use > sub2->in_use ? sub1->in_use : sub2->in_use; |
| + if (REG_MULTI) |
| + { |
| + for (i = 0; i < todo; ++i) |
| + { |
| + if (i < sub1->in_use) |
| + { |
| + s1 = sub1->list.multi[i].start.lnum; |
| + e1 = sub1->list.multi[i].end.lnum; |
| + } |
| + else |
| + { |
| + s1 = 0; |
| + e1 = 0; |
| + } |
| + if (i < sub2->in_use) |
| + { |
| + s2 = sub2->list.multi[i].start.lnum; |
| + e2 = sub2->list.multi[i].end.lnum; |
| + } |
| + else |
| + { |
| + s2 = 0; |
| + e2 = 0; |
| + } |
| + if (s1 != s2 || e1 != e2) |
| + return FALSE; |
| + if (s1 != 0 && sub1->list.multi[i].start.col |
| + != sub2->list.multi[i].start.col) |
| + return FALSE; |
| + if (e1 != 0 && sub1->list.multi[i].end.col |
| + != sub2->list.multi[i].end.col) |
| + return FALSE; |
| + } |
| + } |
| + else |
| + { |
| + for (i = 0; i < todo; ++i) |
| + { |
| + if (i < sub1->in_use) |
| + { |
| + sp1 = sub1->list.line[i].start; |
| + ep1 = sub1->list.line[i].end; |
| + } |
| + else |
| + { |
| + sp1 = NULL; |
| + ep1 = NULL; |
| + } |
| + if (i < sub2->in_use) |
| + { |
| + sp2 = sub2->list.line[i].start; |
| + ep2 = sub2->list.line[i].end; |
| + } |
| + else |
| + { |
| + sp2 = NULL; |
| + ep2 = NULL; |
| + } |
| + if (sp1 != sp2 || ep1 != ep2) |
| + return FALSE; |
| + } |
| + } |
| + |
| + return TRUE; |
| + } |
| + |
| static void |
| addstate(l, state, sub, off) |
| nfa_list_T *l; /* runtime state list */ |
| |
| *** 2623,2629 **** |
| int off; /* byte offset, when -1 go to next line */ |
| { |
| int subidx; |
| ! nfa_thread_T *lastthread; |
| lpos_T save_lpos; |
| int save_in_use; |
| char_u *save_ptr; |
| --- 2714,2720 ---- |
| int off; /* byte offset, when -1 go to next line */ |
| { |
| int subidx; |
| ! nfa_thread_T *thread; |
| lpos_T save_lpos; |
| int save_in_use; |
| char_u *save_ptr; |
| |
| *** 2674,2696 **** |
| { |
| /* This state is already in the list, don't add it again, |
| * unless it is an MOPEN that is used for a backreference. */ |
| ! return; |
| } |
| |
| /* add the state to the list */ |
| state->lastlist = l->id; |
| ! lastthread = &l->t[l->n++]; |
| ! lastthread->state = state; |
| ! lastthread->sub.in_use = sub->in_use; |
| if (sub->in_use > 0) |
| { |
| /* Copy the match start and end positions. */ |
| if (REG_MULTI) |
| ! mch_memmove(&lastthread->sub.list.multi[0], |
| &sub->list.multi[0], |
| sizeof(struct multipos) * sub->in_use); |
| else |
| ! mch_memmove(&lastthread->sub.list.line[0], |
| &sub->list.line[0], |
| sizeof(struct linepos) * sub->in_use); |
| } |
| --- 2765,2808 ---- |
| { |
| /* This state is already in the list, don't add it again, |
| * unless it is an MOPEN that is used for a backreference. */ |
| ! if (!nfa_has_backref) |
| ! return; |
| ! |
| ! /* See if the same state is already in the list with the same |
| ! * positions. */ |
| ! for (i = 0; i < l->n; ++i) |
| ! { |
| ! thread = &l->t[i]; |
| ! if (thread->state->id == state->id |
| ! && sub_equal(&thread->sub, sub)) |
| ! return; |
| ! } |
| ! } |
| ! |
| ! /* when there are backreferences the number of states may be (a |
| ! * lot) bigger */ |
| ! if (nfa_has_backref && l->n == l->len) |
| ! { |
| ! int newlen = l->len * 3 / 2 + 50; |
| ! |
| ! l->t = vim_realloc(l->t, newlen * sizeof(nfa_thread_T)); |
| ! l->len = newlen; |
| } |
| |
| /* add the state to the list */ |
| state->lastlist = l->id; |
| ! thread = &l->t[l->n++]; |
| ! thread->state = state; |
| ! thread->sub.in_use = sub->in_use; |
| if (sub->in_use > 0) |
| { |
| /* Copy the match start and end positions. */ |
| if (REG_MULTI) |
| ! mch_memmove(&thread->sub.list.multi[0], |
| &sub->list.multi[0], |
| sizeof(struct multipos) * sub->in_use); |
| else |
| ! mch_memmove(&thread->sub.list.line[0], |
| &sub->list.line[0], |
| sizeof(struct linepos) * sub->in_use); |
| } |
| |
| *** 2909,2915 **** |
| |
| /* re-order to put the new state at the current position */ |
| count = l->n - tlen; |
| ! if (count > 1) |
| { |
| /* make space for new states, then move them from the |
| * end to the current position */ |
| --- 3021,3032 ---- |
| |
| /* re-order to put the new state at the current position */ |
| count = l->n - tlen; |
| ! if (count == 1) |
| ! { |
| ! /* overwrite the current state */ |
| ! l->t[i] = l->t[l->n - 1]; |
| ! } |
| ! else if (count > 1) |
| { |
| /* make space for new states, then move them from the |
| * end to the current position */ |
| |
| *** 2920,2930 **** |
| &(l->t[l->n - 1]), |
| sizeof(nfa_thread_T) * count); |
| } |
| - else |
| - { |
| - /* overwrite the current state */ |
| - l->t[i] = l->t[l->n - 1]; |
| - } |
| --l->n; |
| *ip = i - 1; |
| } |
| --- 3037,3042 ---- |
| |
| *** 3183,3196 **** |
| |
| /* Allocate memory for the lists of nodes. */ |
| size = (nstate + 1) * sizeof(nfa_thread_T); |
| ! list[0].t = (nfa_thread_T *)lalloc(size, TRUE); |
| ! list[1].t = (nfa_thread_T *)lalloc(size, TRUE); |
| ! list[2].t = (nfa_thread_T *)lalloc(size, TRUE); |
| if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) |
| goto theend; |
| - vim_memset(list[0].t, 0, size); |
| - vim_memset(list[1].t, 0, size); |
| - vim_memset(list[2].t, 0, size); |
| |
| #ifdef ENABLE_LOG |
| log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); |
| --- 3295,3308 ---- |
| |
| /* Allocate memory for the lists of nodes. */ |
| size = (nstate + 1) * sizeof(nfa_thread_T); |
| ! list[0].t = (nfa_thread_T *)lalloc_clear(size, TRUE); |
| ! list[0].len = nstate + 1; |
| ! list[1].t = (nfa_thread_T *)lalloc_clear(size, TRUE); |
| ! list[1].len = nstate + 1; |
| ! list[2].t = (nfa_thread_T *)lalloc_clear(size, TRUE); |
| ! list[2].len = nstate + 1; |
| if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) |
| goto theend; |
| |
| #ifdef ENABLE_LOG |
| log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); |
| |
| *** 3970,3976 **** |
| vim_free(list[0].t); |
| vim_free(list[1].t); |
| vim_free(list[2].t); |
| - list[0].t = list[1].t = list[2].t = NULL; |
| vim_free(listids); |
| #undef ADD_POS_NEG_STATE |
| #ifdef NFA_REGEXP_DEBUG_LOG |
| --- 4082,4087 ---- |
| |
| *** 4131,4136 **** |
| --- 4242,4248 ---- |
| reglnum = 0; /* relative to line */ |
| |
| nfa_has_zend = prog->has_zend; |
| + nfa_has_backref = prog->has_backref; |
| nfa_nsubexpr = prog->nsubexp; |
| |
| nstate = prog->nstate; |
| |
| *** 4225,4230 **** |
| --- 4337,4343 ---- |
| prog->engine = &nfa_regengine; |
| prog->nstate = nstate; |
| prog->has_zend = nfa_has_zend; |
| + prog->has_backref = nfa_has_backref; |
| prog->nsubexp = regnpar; |
| #ifdef ENABLE_LOG |
| nfa_postfix_dump(expr, OK); |
| |
| |
| |
| *** 87,92 **** |
| --- 87,93 ---- |
| regprog_T regprog; |
| nfa_state_T *start; |
| int has_zend; /* pattern contains \ze */ |
| + int has_backref; /* pattern contains \1 .. \9 */ |
| int nsubexp; /* number of () */ |
| int nstate; |
| nfa_state_T state[0]; /* actually longer.. */ |
| |
| |
| |
| *** 333,339 **** |
| :" |
| :"""" Back references |
| :call add(tl, [2, '\(\i\+\) \1', ' abc abc', 'abc abc', 'abc']) |
| ! :"call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo']) |
| :call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i']) |
| :" |
| :"""" Look-behind with limit |
| --- 333,339 ---- |
| :" |
| :"""" Back references |
| :call add(tl, [2, '\(\i\+\) \1', ' abc abc', 'abc abc', 'abc']) |
| ! :call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo']) |
| :call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i']) |
| :" |
| :"""" Look-behind with limit |
| |
| |
| |
| *** 716,721 **** |
| --- 716,724 ---- |
| OK 0 - \(\i\+\) \1 |
| OK 1 - \(\i\+\) \1 |
| OK 2 - \(\i\+\) \1 |
| + OK 0 - \(\i\+\) \1 |
| + OK 1 - \(\i\+\) \1 |
| + OK 2 - \(\i\+\) \1 |
| OK 0 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9 |
| OK 1 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9 |
| OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9 |
| |
| |
| |
| *** 730,731 **** |
| --- 730,733 ---- |
| { /* Add new patch number below this line */ |
| + /**/ |
| + 1071, |
| /**/ |
| |
| -- |
| How To Keep A Healthy Level Of Insanity: |
| 14. Put mosquito netting around your work area. Play a tape of jungle |
| sounds all day. |
| |
| /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ |
| /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ |
| \\\ an exciting new programming language -- http://www.Zimbu.org /// |
| \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |