diff --git a/7.3.1017 b/7.3.1017 new file mode 100644 index 0000000..6e043e3 --- /dev/null +++ b/7.3.1017 @@ -0,0 +1,416 @@ +To: vim_dev@googlegroups.com +Subject: Patch 7.3.1017 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.3.1017 +Problem: Zero width match changes length of match. +Solution: For a zero width match put new states in the current position in + the state list. +Files: src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok, + src/regexp.h + + +*** ../vim-7.3.1016/src/regexp_nfa.c 2013-05-25 15:31:02.000000000 +0200 +--- src/regexp_nfa.c 2013-05-25 20:18:25.000000000 +0200 +*************** +*** 2471,2494 **** + * NFA execution code. + ****************************************************************/ + +! /* thread_T contains runtime information of a NFA state */ +! struct thread + { + nfa_state_T *state; +! regsub_T sub; /* submatch info */ +! }; + + typedef struct + { +! thread_T *t; +! int n; +! } List; + +! static void addstate __ARGS((List *l, nfa_state_T *state, regsub_T *m, int off, int lid, int *match)); + + static void + addstate(l, state, m, off, lid, match) +! List *l; /* runtime state list */ + nfa_state_T *state; /* state to update */ + regsub_T *m; /* pointers to subexpressions */ + int off; /* byte offset, when -1 go to next line */ +--- 2471,2497 ---- + * NFA execution code. + ****************************************************************/ + +! /* nfa_thread_T contains runtime information of a NFA state */ +! typedef struct + { + nfa_state_T *state; +! regsub_T sub; /* Submatch info. TODO: expensive! */ +! } nfa_thread_T; +! + + typedef struct + { +! nfa_thread_T *t; +! int n; +! } nfa_list_T; + +! static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid, int *match)); +! +! static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *match, int *ip)); + + static void + addstate(l, state, m, off, lid, match) +! nfa_list_T *l; /* runtime state list */ + nfa_state_T *state; /* state to update */ + regsub_T *m; /* pointers to subexpressions */ + int off; /* byte offset, when -1 go to next line */ +*************** +*** 2497,2503 **** + { + regsub_T save; + int subidx = 0; +! thread_T *lastthread; + + if (l == NULL || state == NULL) + return; +--- 2500,2506 ---- + { + regsub_T save; + int subidx = 0; +! nfa_thread_T *lastthread; + + if (l == NULL || state == NULL) + return; +*************** +*** 2533,2539 **** + state->lastlist = lid; + lastthread = &l->t[l->n++]; + lastthread->state = state; +! lastthread->sub = *m; + } + } + +--- 2536,2542 ---- + state->lastlist = lid; + lastthread = &l->t[l->n++]; + lastthread->state = state; +! lastthread->sub = *m; /* TODO: expensive! */ + } + } + +*************** +*** 2698,2703 **** +--- 2701,2754 ---- + } + + /* ++ * Like addstate(), but the new state(s) are put at position "*ip". ++ * Used for zero-width matches, next state to use is the added one. ++ * This makes sure the order of states to be tried does not change, which ++ * matters for alternatives. ++ */ ++ static void ++ addstate_here(l, state, m, lid, matchp, ip) ++ nfa_list_T *l; /* runtime state list */ ++ nfa_state_T *state; /* state to update */ ++ regsub_T *m; /* pointers to subexpressions */ ++ int lid; ++ int *matchp; /* found match? */ ++ int *ip; ++ { ++ int tlen = l->n; ++ int count; ++ int i = *ip; ++ ++ /* first add the state(s) at the end, so that we know how many there are */ ++ addstate(l, state, m, 0, lid, matchp); ++ ++ /* when "*ip" was at the end of the list, nothing to do */ ++ if (i + 1 == tlen) ++ return; ++ ++ /* re-order to put the new state at the current position */ ++ count = l->n - tlen; ++ if (count > 1) ++ { ++ /* make space for new states, then move them from the ++ * end to the current position */ ++ mch_memmove(&(l->t[i + count]), ++ &(l->t[i + 1]), ++ sizeof(nfa_thread_T) * (l->n - i - 1)); ++ mch_memmove(&(l->t[i]), ++ &(l->t[l->n - 1]), ++ sizeof(nfa_thread_T) * count); ++ } ++ else ++ { ++ /* overwrite the current state */ ++ l->t[i] = l->t[l->n - 1]; ++ } ++ --l->n; ++ *ip = i - 1; ++ } ++ ++ /* + * Check character class "class" against current character c. + */ + static int +*************** +*** 2872,2888 **** + int match = FALSE; + int flag = 0; + int old_reglnum = -1; +! int go_to_nextline; +! thread_T *t; + char_u *old_reginput = NULL; + char_u *old_regline = NULL; +! List list[3]; +! List *listtbl[2][2]; +! List *ll; + int listid = 1; +! List *thislist; +! List *nextlist; +! List *neglist; + int *listids = NULL; + int j = 0; + #ifdef NFA_REGEXP_DEBUG_LOG +--- 2923,2939 ---- + int match = FALSE; + int flag = 0; + int old_reglnum = -1; +! int go_to_nextline = FALSE; +! nfa_thread_T *t; + char_u *old_reginput = NULL; + char_u *old_regline = NULL; +! nfa_list_T list[3]; +! nfa_list_T *listtbl[2][2]; +! nfa_list_T *ll; + int listid = 1; +! nfa_list_T *thislist; +! nfa_list_T *nextlist; +! nfa_list_T *neglist; + int *listids = NULL; + int j = 0; + #ifdef NFA_REGEXP_DEBUG_LOG +*************** +*** 2896,2905 **** + #endif + + /* Allocate memory for the lists of nodes */ +! size = (nstate + 1) * sizeof(thread_T); +! list[0].t = (thread_T *)lalloc(size, TRUE); +! list[1].t = (thread_T *)lalloc(size, TRUE); +! list[2].t = (thread_T *)lalloc(size, TRUE); + if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) + goto theend; + vim_memset(list[0].t, 0, size); +--- 2947,2956 ---- + #endif + + /* Allocate memory for the lists of nodes */ +! size = (nstate + 1) * sizeof(nfa_thread_T); +! list[0].t = (nfa_thread_T *)lalloc(size, TRUE); +! list[1].t = (nfa_thread_T *)lalloc(size, TRUE); +! list[2].t = (nfa_thread_T *)lalloc(size, TRUE); + if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) + goto theend; + vim_memset(list[0].t, 0, size); +*************** +*** 3056,3063 **** + * nfa_regmatch(). Submatches are stored in *m, and used in + * the parent call. */ + if (start->c == NFA_MOPEN + 0) +! addstate(thislist, t->state->out, &t->sub, 0, listid, +! &match); + else + { + *m = t->sub; +--- 3107,3114 ---- + * nfa_regmatch(). Submatches are stored in *m, and used in + * the parent call. */ + if (start->c == NFA_MOPEN + 0) +! addstate_here(thislist, t->state->out, &t->sub, listid, +! &match, &i); + else + { + *m = t->sub; +*************** +*** 3130,3137 **** + t->sub.end[j] = m->end[j]; + } + /* t->state->out1 is the corresponding END_INVISIBLE node */ +! addstate(thislist, t->state->out1->out, &t->sub, 0, listid, +! &match); + } + else + { +--- 3181,3188 ---- + t->sub.end[j] = m->end[j]; + } + /* t->state->out1 is the corresponding END_INVISIBLE node */ +! addstate_here(thislist, t->state->out1->out, &t->sub, +! listid, &match, &i); + } + else + { +*************** +*** 3142,3155 **** + + case NFA_BOL: + if (reginput == regline) +! addstate(thislist, t->state->out, &t->sub, 0, listid, +! &match); + break; + + case NFA_EOL: + if (c == NUL) +! addstate(thislist, t->state->out, &t->sub, 0, listid, +! &match); + break; + + case NFA_BOW: +--- 3193,3206 ---- + + case NFA_BOL: + if (reginput == regline) +! addstate_here(thislist, t->state->out, &t->sub, listid, +! &match, &i); + break; + + case NFA_EOL: + if (c == NUL) +! addstate_here(thislist, t->state->out, &t->sub, listid, +! &match, &i); + break; + + case NFA_BOW: +*************** +*** 3176,3183 **** + && vim_iswordc_buf(reginput[-1], reg_buf))) + bow = FALSE; + if (bow) +! addstate(thislist, t->state->out, &t->sub, 0, listid, +! &match); + break; + } + +--- 3227,3234 ---- + && vim_iswordc_buf(reginput[-1], reg_buf))) + bow = FALSE; + if (bow) +! addstate_here(thislist, t->state->out, &t->sub, listid, +! &match, &i); + break; + } + +*************** +*** 3204,3211 **** + || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf))) + eow = FALSE; + if (eow) +! addstate(thislist, t->state->out, &t->sub, 0, listid, +! &match); + break; + } + +--- 3255,3262 ---- + || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf))) + eow = FALSE; + if (eow) +! addstate_here(thislist, t->state->out, &t->sub, listid, +! &match, &i); + break; + } + +*** ../vim-7.3.1016/src/testdir/test64.in 2013-05-21 13:30:17.000000000 +0200 +--- src/testdir/test64.in 2013-05-25 19:54:40.000000000 +0200 +*************** +*** 270,275 **** +--- 270,276 ---- + :call add(tl, ['aa \zsax', ' ax']) " must match before \zs + :call add(tl, ['abc \zsmatch\ze abc', 'abc abc abc match abc abc', 'match']) + :call add(tl, ['\v(a \zsif .*){2}', 'a if then a if last', 'if last', 'a if last']) ++ :call add(tl, ['\>\zs.', 'aword. ', '.']) + + :"""" Tests for \@ features + :call add(tl, ['abc\@=', 'abc', 'ab']) +*************** +*** 299,304 **** +--- 300,311 ---- + :call add(tl, ['\%u0020', 'yes no', ' ']) + :call add(tl, ['\%U00000020', 'yes no', ' ']) + ++ :"""" Alternatives, must use first longest match ++ :call add(tl, ['goo\|go', 'google', 'goo']) ++ :call add(tl, ['\\zs. + OK - abc\@= + OK - abc\@=cd + OK - abc\@= +*************** +*** 231,234 **** +--- 232,238 ---- + OK - \%x20 + OK - \%u0020 + OK - \%U00000020 ++ OK - goo\|go ++ OK - \