| To: vim_dev@googlegroups.com |
| Subject: Patch 7.3.1017 |
| Fcc: outbox |
| From: Bram Moolenaar <Bram@moolenaar.net> |
| Mime-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| |
| Patch 7.3.1017 |
| Problem: Zero width match changes length of match. |
| Solution: For a zero width match put new states in the current position in |
| the state list. |
| Files: src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok, |
| src/regexp.h |
| |
| |
| |
| |
| |
| *** 2471,2494 **** |
| * NFA execution code. |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| *** 2497,2503 **** |
| { |
| regsub_T save; |
| int subidx = 0; |
| ! thread_T *lastthread; |
| |
| if (l == NULL || state == NULL) |
| return; |
| --- 2500,2506 ---- |
| { |
| regsub_T save; |
| int subidx = 0; |
| ! nfa_thread_T *lastthread; |
| |
| if (l == NULL || state == NULL) |
| return; |
| |
| *** 2533,2539 **** |
| state->lastlist = lid; |
| lastthread = &l->t[l->n++]; |
| lastthread->state = state; |
| ! lastthread->sub = *m; |
| } |
| } |
| |
| --- 2536,2542 ---- |
| state->lastlist = lid; |
| lastthread = &l->t[l->n++]; |
| lastthread->state = state; |
| ! lastthread->sub = *m; /* TODO: expensive! */ |
| } |
| } |
| |
| |
| *** 2698,2703 **** |
| --- 2701,2754 ---- |
| } |
| |
| /* |
| + * Like addstate(), but the new state(s) are put at position "*ip". |
| + * Used for zero-width matches, next state to use is the added one. |
| + * This makes sure the order of states to be tried does not change, which |
| + * matters for alternatives. |
| + */ |
| + static void |
| + addstate_here(l, state, m, lid, matchp, ip) |
| + nfa_list_T *l; /* runtime state list */ |
| + nfa_state_T *state; /* state to update */ |
| + regsub_T *m; /* pointers to subexpressions */ |
| + int lid; |
| + int *matchp; /* found match? */ |
| + int *ip; |
| + { |
| + int tlen = l->n; |
| + int count; |
| + int i = *ip; |
| + |
| + /* first add the state(s) at the end, so that we know how many there are */ |
| + addstate(l, state, m, 0, lid, matchp); |
| + |
| + /* when "*ip" was at the end of the list, nothing to do */ |
| + if (i + 1 == tlen) |
| + return; |
| + |
| + /* re-order to put the new state at the current position */ |
| + count = l->n - tlen; |
| + if (count > 1) |
| + { |
| + /* make space for new states, then move them from the |
| + * end to the current position */ |
| + mch_memmove(&(l->t[i + count]), |
| + &(l->t[i + 1]), |
| + sizeof(nfa_thread_T) * (l->n - i - 1)); |
| + mch_memmove(&(l->t[i]), |
| + &(l->t[l->n - 1]), |
| + sizeof(nfa_thread_T) * count); |
| + } |
| + else |
| + { |
| + /* overwrite the current state */ |
| + l->t[i] = l->t[l->n - 1]; |
| + } |
| + --l->n; |
| + *ip = i - 1; |
| + } |
| + |
| + /* |
| * Check character class "class" against current character c. |
| */ |
| static int |
| |
| *** 2872,2888 **** |
| int match = FALSE; |
| int flag = 0; |
| int old_reglnum = -1; |
| ! int go_to_nextline; |
| ! thread_T *t; |
| char_u *old_reginput = NULL; |
| char_u *old_regline = NULL; |
| ! List list[3]; |
| ! List *listtbl[2][2]; |
| ! List *ll; |
| int listid = 1; |
| ! List *thislist; |
| ! List *nextlist; |
| ! List *neglist; |
| int *listids = NULL; |
| int j = 0; |
| #ifdef NFA_REGEXP_DEBUG_LOG |
| --- 2923,2939 ---- |
| int match = FALSE; |
| int flag = 0; |
| int old_reglnum = -1; |
| ! int go_to_nextline = FALSE; |
| ! nfa_thread_T *t; |
| char_u *old_reginput = NULL; |
| char_u *old_regline = NULL; |
| ! nfa_list_T list[3]; |
| ! nfa_list_T *listtbl[2][2]; |
| ! nfa_list_T *ll; |
| int listid = 1; |
| ! nfa_list_T *thislist; |
| ! nfa_list_T *nextlist; |
| ! nfa_list_T *neglist; |
| int *listids = NULL; |
| int j = 0; |
| #ifdef NFA_REGEXP_DEBUG_LOG |
| |
| *** 2896,2905 **** |
| #endif |
| |
| /* Allocate memory for the lists of nodes */ |
| ! size = (nstate + 1) * sizeof(thread_T); |
| ! list[0].t = (thread_T *)lalloc(size, TRUE); |
| ! list[1].t = (thread_T *)lalloc(size, TRUE); |
| ! list[2].t = (thread_T *)lalloc(size, TRUE); |
| if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) |
| goto theend; |
| vim_memset(list[0].t, 0, size); |
| --- 2947,2956 ---- |
| #endif |
| |
| /* Allocate memory for the lists of nodes */ |
| ! size = (nstate + 1) * sizeof(nfa_thread_T); |
| ! list[0].t = (nfa_thread_T *)lalloc(size, TRUE); |
| ! list[1].t = (nfa_thread_T *)lalloc(size, TRUE); |
| ! list[2].t = (nfa_thread_T *)lalloc(size, TRUE); |
| if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) |
| goto theend; |
| vim_memset(list[0].t, 0, size); |
| |
| *** 3056,3063 **** |
| * nfa_regmatch(). Submatches are stored in *m, and used in |
| * the parent call. */ |
| if (start->c == NFA_MOPEN + 0) |
| ! addstate(thislist, t->state->out, &t->sub, 0, listid, |
| ! &match); |
| else |
| { |
| *m = t->sub; |
| --- 3107,3114 ---- |
| * nfa_regmatch(). Submatches are stored in *m, and used in |
| * the parent call. */ |
| if (start->c == NFA_MOPEN + 0) |
| ! addstate_here(thislist, t->state->out, &t->sub, listid, |
| ! &match, &i); |
| else |
| { |
| *m = t->sub; |
| |
| *** 3130,3137 **** |
| t->sub.end[j] = m->end[j]; |
| } |
| /* t->state->out1 is the corresponding END_INVISIBLE node */ |
| ! addstate(thislist, t->state->out1->out, &t->sub, 0, listid, |
| ! &match); |
| } |
| else |
| { |
| --- 3181,3188 ---- |
| t->sub.end[j] = m->end[j]; |
| } |
| /* t->state->out1 is the corresponding END_INVISIBLE node */ |
| ! addstate_here(thislist, t->state->out1->out, &t->sub, |
| ! listid, &match, &i); |
| } |
| else |
| { |
| |
| *** 3142,3155 **** |
| |
| case NFA_BOL: |
| if (reginput == regline) |
| ! addstate(thislist, t->state->out, &t->sub, 0, listid, |
| ! &match); |
| break; |
| |
| case NFA_EOL: |
| if (c == NUL) |
| ! addstate(thislist, t->state->out, &t->sub, 0, listid, |
| ! &match); |
| break; |
| |
| case NFA_BOW: |
| --- 3193,3206 ---- |
| |
| case NFA_BOL: |
| if (reginput == regline) |
| ! addstate_here(thislist, t->state->out, &t->sub, listid, |
| ! &match, &i); |
| break; |
| |
| case NFA_EOL: |
| if (c == NUL) |
| ! addstate_here(thislist, t->state->out, &t->sub, listid, |
| ! &match, &i); |
| break; |
| |
| case NFA_BOW: |
| |
| *** 3176,3183 **** |
| && vim_iswordc_buf(reginput[-1], reg_buf))) |
| bow = FALSE; |
| if (bow) |
| ! addstate(thislist, t->state->out, &t->sub, 0, listid, |
| ! &match); |
| break; |
| } |
| |
| --- 3227,3234 ---- |
| && vim_iswordc_buf(reginput[-1], reg_buf))) |
| bow = FALSE; |
| if (bow) |
| ! addstate_here(thislist, t->state->out, &t->sub, listid, |
| ! &match, &i); |
| break; |
| } |
| |
| |
| *** 3204,3211 **** |
| || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf))) |
| eow = FALSE; |
| if (eow) |
| ! addstate(thislist, t->state->out, &t->sub, 0, listid, |
| ! &match); |
| break; |
| } |
| |
| --- 3255,3262 ---- |
| || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf))) |
| eow = FALSE; |
| if (eow) |
| ! addstate_here(thislist, t->state->out, &t->sub, listid, |
| ! &match, &i); |
| break; |
| } |
| |
| |
| |
| |
| *** 270,275 **** |
| --- 270,276 ---- |
| :call add(tl, ['aa \zsax', ' ax']) " must match before \zs |
| :call add(tl, ['abc \zsmatch\ze abc', 'abc abc abc match abc abc', 'match']) |
| :call add(tl, ['\v(a \zsif .*){2}', 'a if then a if last', 'if last', 'a if last']) |
| + :call add(tl, ['\>\zs.', 'aword. ', '.']) |
| |
| :"""" Tests for \@ features |
| :call add(tl, ['abc\@=', 'abc', 'ab']) |
| |
| *** 299,304 **** |
| --- 300,311 ---- |
| :call add(tl, ['\%u0020', 'yes no', ' ']) |
| :call add(tl, ['\%U00000020', 'yes no', ' ']) |
| |
| + :"""" Alternatives, must use first longest match |
| + :call add(tl, ['goo\|go', 'google', 'goo']) |
| + :call add(tl, ['\<goo\|\<go', 'google', 'goo']) |
| + :call add(tl, ['\<goo\|go', 'google', 'goo']) |
| + |
| + |
| :"""" Run the tests |
| |
| :" |
| |
| |
| |
| *** 209,214 **** |
| --- 209,215 ---- |
| OK - aa \zsax |
| OK - abc \zsmatch\ze abc |
| OK - \v(a \zsif .*){2} |
| + OK - \>\zs. |
| OK - abc\@= |
| OK - abc\@=cd |
| OK - abc\@= |
| |
| *** 231,234 **** |
| --- 232,238 ---- |
| OK - \%x20 |
| OK - \%u0020 |
| OK - \%U00000020 |
| + OK - goo\|go |
| + OK - \<goo\|\<go |
| + OK - \<goo\|go |
| 192.168.0.1 |
| |
| |
| |
| *** 29,36 **** |
| |
| typedef struct regengine regengine_T; |
| |
| - typedef struct thread thread_T; |
| - |
| /* |
| * Structure returned by vim_regcomp() to pass on to vim_regexec(). |
| * This is the general structure. For the actual matcher, two specific |
| --- 29,34 ---- |
| |
| |
| |
| *** 730,731 **** |
| --- 730,733 ---- |
| { /* Add new patch number below this line */ |
| + /**/ |
| + 1017, |
| /**/ |
| |
| -- |
| A salesperson says: Translation: |
| "backward compatible" Old technology |
| "Premium" Overpriced |
| "Can't keep it on the shelf" Unavailable |
| "Stands alone" Piece of shit |
| "Proprietary" Incompatible |
| (Scott Adams - The Dilbert principle) |
| |
| /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ |
| /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ |
| \\\ an exciting new programming language -- http://www.Zimbu.org /// |
| \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |