To: vim_dev@googlegroups.com Subject: Patch 7.4.036 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.4.036 Problem: NFA engine does not capture group correctly when using \@>. (ZyX) Solution: Copy submatches before doing the recursive match. Files: src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok *** ../vim-7.4.035/src/regexp_nfa.c 2013-09-22 13:57:19.000000000 +0200 --- src/regexp_nfa.c 2013-09-25 16:35:54.000000000 +0200 *************** *** 36,42 **** { NFA_SPLIT = -1024, NFA_MATCH, ! NFA_SKIP_CHAR, /* matches a 0-length char */ NFA_START_COLL, /* [abc] start */ NFA_END_COLL, /* [abc] end */ --- 36,42 ---- { NFA_SPLIT = -1024, NFA_MATCH, ! NFA_EMPTY, /* matches 0-length */ NFA_START_COLL, /* [abc] start */ NFA_END_COLL, /* [abc] end */ *************** *** 2005,2012 **** { /* Ignore result of previous call to nfa_regatom() */ post_ptr = post_start + my_post_start; ! /* NFA_SKIP_CHAR has 0-length and works everywhere */ ! EMIT(NFA_SKIP_CHAR); return OK; } --- 2005,2012 ---- { /* Ignore result of previous call to nfa_regatom() */ post_ptr = post_start + my_post_start; ! /* NFA_EMPTY is 0-length and works everywhere */ ! EMIT(NFA_EMPTY); return OK; } *************** *** 2170,2185 **** old_post_pos = (int)(post_ptr - post_start); if (nfa_regconcat() == FAIL) return FAIL; ! /* if concat is empty, skip a input char. But do emit a node */ if (old_post_pos == (int)(post_ptr - post_start)) ! EMIT(NFA_SKIP_CHAR); EMIT(NFA_CONCAT); ch = peekchr(); } ! /* Even if a branch is empty, emit one node for it */ if (old_post_pos == (int)(post_ptr - post_start)) ! EMIT(NFA_SKIP_CHAR); return OK; } --- 2170,2185 ---- old_post_pos = (int)(post_ptr - post_start); if (nfa_regconcat() == FAIL) return FAIL; ! /* if concat is empty do emit a node */ if (old_post_pos == (int)(post_ptr - post_start)) ! EMIT(NFA_EMPTY); EMIT(NFA_CONCAT); ch = peekchr(); } ! /* if a branch is empty, emit one node for it */ if (old_post_pos == (int)(post_ptr - post_start)) ! EMIT(NFA_EMPTY); return OK; } *************** *** 2423,2429 **** case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break; case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; ! case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break; case NFA_OR: STRCPY(code, "NFA_OR"); break; case NFA_START_COLL: STRCPY(code, "NFA_START_COLL"); break; --- 2423,2429 ---- case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break; case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; ! case NFA_EMPTY: STRCPY(code, "NFA_EMPTY"); break; case NFA_OR: STRCPY(code, "NFA_OR"); break; case NFA_START_COLL: STRCPY(code, "NFA_START_COLL"); break; *************** *** 3067,3073 **** case NFA_ZSTART: case NFA_ZEND: case NFA_OPT_CHARS: ! case NFA_SKIP_CHAR: case NFA_START_PATTERN: case NFA_END_PATTERN: case NFA_COMPOSING: --- 3067,3073 ---- case NFA_ZSTART: case NFA_ZEND: case NFA_OPT_CHARS: ! case NFA_EMPTY: case NFA_START_PATTERN: case NFA_END_PATTERN: case NFA_COMPOSING: *************** *** 3265,3279 **** PUSH(frag(e1.start, e2.out)); break; ! case NFA_SKIP_CHAR: ! /* Symbol of 0-length, Used in a repetition ! * with max/min count of 0 */ if (nfa_calc_size == TRUE) { nstate++; break; } ! s = alloc_state(NFA_SKIP_CHAR, NULL, NULL); if (s == NULL) goto theend; PUSH(frag(s, list1(&s->out))); --- 3265,3278 ---- PUSH(frag(e1.start, e2.out)); break; ! case NFA_EMPTY: ! /* 0-length, used in a repetition with max/min count of 0 */ if (nfa_calc_size == TRUE) { nstate++; break; } ! s = alloc_state(NFA_EMPTY, NULL, NULL); if (s == NULL) goto theend; PUSH(frag(s, list1(&s->out))); *************** *** 4209,4215 **** case NFA_MOPEN: case NFA_ZEND: case NFA_SPLIT: ! case NFA_SKIP_CHAR: /* These nodes are not added themselves but their "out" and/or * "out1" may be added below. */ break; --- 4208,4214 ---- case NFA_MOPEN: case NFA_ZEND: case NFA_SPLIT: ! case NFA_EMPTY: /* These nodes are not added themselves but their "out" and/or * "out1" may be added below. */ break; *************** *** 4337,4343 **** subs = addstate(l, state->out1, subs, pim, off); break; ! case NFA_SKIP_CHAR: case NFA_NOPEN: case NFA_NCLOSE: subs = addstate(l, state->out, subs, pim, off); --- 4336,4342 ---- subs = addstate(l, state->out1, subs, pim, off); break; ! case NFA_EMPTY: case NFA_NOPEN: case NFA_NCLOSE: subs = addstate(l, state->out, subs, pim, off); *************** *** 5604,5612 **** { int in_use = m->norm.in_use; ! /* Copy submatch info for the recursive call, so that ! * \1 can be matched. */ copy_sub_off(&m->norm, &t->subs.norm); /* * First try matching the invisible match, then what --- 5603,5615 ---- { int in_use = m->norm.in_use; ! /* Copy submatch info for the recursive call, opposite ! * of what happens on success below. */ copy_sub_off(&m->norm, &t->subs.norm); + #ifdef FEAT_SYN_HL + if (nfa_has_zsubexpr) + copy_sub_off(&m->synt, &t->subs.synt); + #endif /* * First try matching the invisible match, then what *************** *** 5713,5718 **** --- 5716,5728 ---- #endif break; } + /* Copy submatch info to the recursive call, opposite of what + * happens afterwards. */ + copy_sub_off(&m->norm, &t->subs.norm); + #ifdef FEAT_SYN_HL + if (nfa_has_zsubexpr) + copy_sub_off(&m->synt, &t->subs.synt); + #endif /* First try matching the pattern. */ result = recursive_regmatch(t->state, NULL, prog, *** ../vim-7.4.035/src/testdir/test64.in 2013-09-22 13:57:19.000000000 +0200 --- src/testdir/test64.in 2013-09-25 15:51:12.000000000 +0200 *************** *** 430,435 **** --- 430,436 ---- :call add(tl, [2, '\(a*\)\@>a', 'aaaa']) :call add(tl, [2, '\(a*\)\@>b', 'aaab', 'aaab', 'aaa']) :call add(tl, [2, '^\(.\{-}b\)\@>.', ' abcbd', ' abc', ' ab']) + :call add(tl, [2, '\(.\{-}\)\(\)\@>$', 'abc', 'abc', 'abc', '']) :" TODO: BT engine does not restore submatch after failure :call add(tl, [1, '\(a*\)\@>a\|a\+', 'aaaa', 'aaaa']) :" *** ../vim-7.4.035/src/testdir/test64.ok 2013-09-22 13:57:19.000000000 +0200 --- src/testdir/test64.ok 2013-09-25 16:39:31.000000000 +0200 *************** *** 992,997 **** --- 992,1000 ---- OK 0 - ^\(.\{-}b\)\@>. OK 1 - ^\(.\{-}b\)\@>. OK 2 - ^\(.\{-}b\)\@>. + OK 0 - \(.\{-}\)\(\)\@>$ + OK 1 - \(.\{-}\)\(\)\@>$ + OK 2 - \(.\{-}\)\(\)\@>$ OK 0 - \(a*\)\@>a\|a\+ OK 2 - \(a*\)\@>a\|a\+ OK 0 - \_[^8-9]\+ *** ../vim-7.4.035/src/version.c 2013-09-22 15:43:34.000000000 +0200 --- src/version.c 2013-09-25 16:40:01.000000000 +0200 *************** *** 740,741 **** --- 740,743 ---- { /* Add new patch number below this line */ + /**/ + 36, /**/ -- There is a fine line between courage and foolishness. Unfortunately, it's not a fence. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///