diff --git a/7.3.1033 b/7.3.1033 new file mode 100644 index 0000000..e7ff7cb --- /dev/null +++ b/7.3.1033 @@ -0,0 +1,1346 @@ +To: vim_dev@googlegroups.com +Subject: Patch 7.3.1033 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.3.1033 +Problem: "\1" .. "\9" are not supported in the new regexp engine. +Solution: Implement them. Add a few more tests. +Files: src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok, + src/regexp.h + + +*** ../vim-7.3.1032/src/regexp_nfa.c 2013-05-27 20:10:40.000000000 +0200 +--- src/regexp_nfa.c 2013-05-28 21:57:24.000000000 +0200 +*************** +*** 73,78 **** +--- 73,89 ---- + NFA_PREV_ATOM_JUST_BEFORE_NEG, /* Used for \@ */ + ++ NFA_BACKREF1, /* \1 */ ++ NFA_BACKREF2, /* \2 */ ++ NFA_BACKREF3, /* \3 */ ++ NFA_BACKREF4, /* \4 */ ++ NFA_BACKREF5, /* \5 */ ++ NFA_BACKREF6, /* \6 */ ++ NFA_BACKREF7, /* \7 */ ++ NFA_BACKREF8, /* \8 */ ++ NFA_BACKREF9, /* \9 */ ++ NFA_SKIP, /* Skip characters */ ++ + NFA_MOPEN, + NFA_MCLOSE = NFA_MOPEN + NSUBEXP, + +*************** +*** 709,715 **** + p = vim_strchr(classchars, no_Magic(c)); + if (p == NULL) + { +! return FAIL; /* runtime error */ + } + #ifdef FEAT_MBYTE + /* When '.' is followed by a composing char ignore the dot, so that +--- 720,727 ---- + p = vim_strchr(classchars, no_Magic(c)); + if (p == NULL) + { +! EMSGN("INTERNAL: Unknown character class char: %ld", c); +! return FAIL; + } + #ifdef FEAT_MBYTE + /* When '.' is followed by a composing char ignore the dot, so that +*************** +*** 766,785 **** + return FAIL; + + case Magic('~'): /* previous substitute pattern */ +! /* Not supported yet */ + return FAIL; + +! case Magic('1'): +! case Magic('2'): +! case Magic('3'): +! case Magic('4'): +! case Magic('5'): +! case Magic('6'): +! case Magic('7'): +! case Magic('8'): +! case Magic('9'): +! /* not supported yet */ +! return FAIL; + + case Magic('z'): + c = no_Magic(getchr()); +--- 778,795 ---- + return FAIL; + + case Magic('~'): /* previous substitute pattern */ +! /* TODO: Not supported yet */ + return FAIL; + +! case Magic('1'): EMIT(NFA_BACKREF1); break; +! case Magic('2'): EMIT(NFA_BACKREF2); break; +! case Magic('3'): EMIT(NFA_BACKREF3); break; +! case Magic('4'): EMIT(NFA_BACKREF4); break; +! case Magic('5'): EMIT(NFA_BACKREF5); break; +! case Magic('6'): EMIT(NFA_BACKREF6); break; +! case Magic('7'): EMIT(NFA_BACKREF7); break; +! case Magic('8'): EMIT(NFA_BACKREF8); break; +! case Magic('9'): EMIT(NFA_BACKREF9); break; + + case Magic('z'): + c = no_Magic(getchr()); +*************** +*** 802,808 **** + case '8': + case '9': + case '(': +! /* \z1...\z9 and \z( not yet supported */ + return FAIL; + default: + syntax_error = TRUE; +--- 812,818 ---- + case '8': + case '9': + case '(': +! /* TODO: \z1...\z9 and \z( not yet supported */ + return FAIL; + default: + syntax_error = TRUE; +*************** +*** 854,885 **** + * pattern -- regardless of whether or not it makes sense. */ + case '^': + EMIT(NFA_BOF); +! /* Not yet supported */ + return FAIL; + break; + + case '$': + EMIT(NFA_EOF); +! /* Not yet supported */ + return FAIL; + break; + + case '#': +! /* not supported yet */ + return FAIL; + break; + + case 'V': +! /* not supported yet */ + return FAIL; + break; + + case '[': +! /* \%[abc] not supported yet */ + return FAIL; + + default: +! /* not supported yet */ + return FAIL; + } + break; +--- 864,913 ---- + * pattern -- regardless of whether or not it makes sense. */ + case '^': + EMIT(NFA_BOF); +! /* TODO: Not yet supported */ + return FAIL; + break; + + case '$': + EMIT(NFA_EOF); +! /* TODO: Not yet supported */ + return FAIL; + break; + + case '#': +! /* TODO: not supported yet */ + return FAIL; + break; + + case 'V': +! /* TODO: not supported yet */ + return FAIL; + break; + + case '[': +! /* TODO: \%[abc] not supported yet */ +! return FAIL; +! +! case '0': +! case '1': +! case '2': +! case '3': +! case '4': +! case '5': +! case '6': +! case '7': +! case '8': +! case '9': +! case '<': +! case '>': +! case '\'': +! /* TODO: not supported yet */ + return FAIL; + + default: +! syntax_error = TRUE; +! EMSGN(_("E867: (NFA) Unknown operator '\\%%%c'"), +! no_Magic(c)); + return FAIL; + } + break; +*************** +*** 1672,1677 **** +--- 1700,1716 ---- + case NFA_ZSTART: STRCPY(code, "NFA_ZSTART"); break; + case NFA_ZEND: STRCPY(code, "NFA_ZEND"); break; + ++ case NFA_BACKREF1: STRCPY(code, "NFA_BACKREF1"); break; ++ case NFA_BACKREF2: STRCPY(code, "NFA_BACKREF2"); break; ++ case NFA_BACKREF3: STRCPY(code, "NFA_BACKREF3"); break; ++ case NFA_BACKREF4: STRCPY(code, "NFA_BACKREF4"); break; ++ case NFA_BACKREF5: STRCPY(code, "NFA_BACKREF5"); break; ++ case NFA_BACKREF6: STRCPY(code, "NFA_BACKREF6"); break; ++ case NFA_BACKREF7: STRCPY(code, "NFA_BACKREF7"); break; ++ case NFA_BACKREF8: STRCPY(code, "NFA_BACKREF8"); break; ++ case NFA_BACKREF9: STRCPY(code, "NFA_BACKREF9"); break; ++ case NFA_SKIP: STRCPY(code, "NFA_SKIP"); break; ++ + case NFA_PREV_ATOM_NO_WIDTH: + STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break; + case NFA_NOPEN: STRCPY(code, "NFA_MOPEN_INVISIBLE"); break; +*************** +*** 1949,1955 **** + + s->id = istate; + s->lastlist = 0; +- s->visits = 0; + s->negated = FALSE; + + return s; +--- 1988,1993 ---- +*************** +*** 2416,2421 **** +--- 2454,2483 ---- + PUSH(frag(s, list1(&s1->out))); + break; + ++ case NFA_BACKREF1: ++ case NFA_BACKREF2: ++ case NFA_BACKREF3: ++ case NFA_BACKREF4: ++ case NFA_BACKREF5: ++ case NFA_BACKREF6: ++ case NFA_BACKREF7: ++ case NFA_BACKREF8: ++ case NFA_BACKREF9: ++ if (nfa_calc_size == TRUE) ++ { ++ nstate += 2; ++ break; ++ } ++ s = new_state(*p, NULL, NULL); ++ if (s == NULL) ++ goto theend; ++ s1 = new_state(NFA_SKIP, NULL, NULL); ++ if (s1 == NULL) ++ goto theend; ++ patch(list1(&s->out), s1); ++ PUSH(frag(s, list1(&s1->out))); ++ break; ++ + case NFA_ZSTART: + case NFA_ZEND: + default: +*************** +*** 2495,2523 **** + typedef struct + { + nfa_state_T *state; + regsub_T sub; /* submatch info, only party used */ + } nfa_thread_T; + + /* nfa_list_T contains the alternative NFA execution states. */ + typedef struct + { +! nfa_thread_T *t; +! int n; + } nfa_list_T; + + /* Used during execution: whether a match has been found. */ + static int nfa_match; + +! static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid)); +! static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *ip)); + + static void +! addstate(l, state, m, off, lid) + nfa_list_T *l; /* runtime state list */ + nfa_state_T *state; /* state to update */ +! regsub_T *m; /* pointers to subexpressions */ + int off; /* byte offset, when -1 go to next line */ +- int lid; + { + int subidx; + nfa_thread_T *lastthread; +--- 2557,2610 ---- + typedef struct + { + nfa_state_T *state; ++ int count; + regsub_T sub; /* submatch info, only party used */ + } nfa_thread_T; + + /* nfa_list_T contains the alternative NFA execution states. */ + typedef struct + { +! nfa_thread_T *t; /* allocated array of states */ +! int n; /* nr of states in "t" */ +! int id; /* ID of the list */ + } nfa_list_T; + ++ #ifdef ENABLE_LOG ++ static void ++ log_subexpr(sub) ++ regsub_T *sub; ++ { ++ int j; ++ ++ for (j = 0; j < sub->in_use; j++) ++ if (REG_MULTI) ++ fprintf(log_fd, "\n *** group %d, start: c=%d, l=%d, end: c=%d, l=%d", ++ j, ++ sub->multilist[j].start.col, ++ (int)sub->multilist[j].start.lnum, ++ sub->multilist[j].end.col, ++ (int)sub->multilist[j].end.lnum); ++ else ++ fprintf(log_fd, "\n *** group %d, start: \"%s\", end: \"%s\"", ++ j, ++ (char *)sub->linelist[j].start, ++ (char *)sub->linelist[j].end); ++ fprintf(log_fd, "\n"); ++ } ++ #endif ++ + /* Used during execution: whether a match has been found. */ + static int nfa_match; + +! static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *sub, int off)); +! static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *sub, int *ip)); + + static void +! addstate(l, state, sub, off) + nfa_list_T *l; /* runtime state list */ + nfa_state_T *state; /* state to update */ +! regsub_T *sub; /* pointers to subexpressions */ + int off; /* byte offset, when -1 go to next line */ + { + int subidx; + nfa_thread_T *lastthread; +*************** +*** 2545,2585 **** + case NFA_MCLOSE + 7: + case NFA_MCLOSE + 8: + case NFA_MCLOSE + 9: +! /* Do not remember these nodes in list "thislist" or "nextlist" */ + break; + + default: +! if (state->lastlist == lid) + { +! if (++state->visits > 2) +! return; + } +! else + { +! /* add the state to the list */ +! state->lastlist = lid; +! lastthread = &l->t[l->n++]; +! lastthread->state = state; +! lastthread->sub.in_use = m->in_use; +! if (m->in_use > 0) +! { +! /* Copy the match start and end positions. */ +! if (REG_MULTI) +! mch_memmove(&lastthread->sub.multilist[0], +! &m->multilist[0], +! sizeof(struct multipos) * m->in_use); +! else +! mch_memmove(&lastthread->sub.linelist[0], +! &m->linelist[0], +! sizeof(struct linepos) * m->in_use); +! } + } + } + + #ifdef ENABLE_LOG + nfa_set_code(state->c); +! fprintf(log_fd, "> Adding state %d to list. Character %s, code %d\n", +! abs(state->id), code, state->c); + #endif + switch (state->c) + { +--- 2632,2689 ---- + case NFA_MCLOSE + 7: + case NFA_MCLOSE + 8: + case NFA_MCLOSE + 9: +! /* These nodes are not added themselves but their "out" and/or +! * "out1" may be added below. */ +! break; +! +! case NFA_MOPEN: +! case NFA_MOPEN + 1: +! case NFA_MOPEN + 2: +! case NFA_MOPEN + 3: +! case NFA_MOPEN + 4: +! case NFA_MOPEN + 5: +! case NFA_MOPEN + 6: +! case NFA_MOPEN + 7: +! case NFA_MOPEN + 8: +! case NFA_MOPEN + 9: +! /* These nodes do not need to be added, but we need to bail out +! * when it was tried to be added to this list before. */ +! if (state->lastlist == l->id) +! return; +! state->lastlist = l->id; + break; + + default: +! if (state->lastlist == l->id) + { +! /* This state is already in the list, don't add it again, +! * unless it is an MOPEN that is used for a backreference. */ +! return; + } +! +! /* add the state to the list */ +! state->lastlist = l->id; +! lastthread = &l->t[l->n++]; +! lastthread->state = state; +! lastthread->sub.in_use = sub->in_use; +! if (sub->in_use > 0) + { +! /* Copy the match start and end positions. */ +! if (REG_MULTI) +! mch_memmove(&lastthread->sub.multilist[0], +! &sub->multilist[0], +! sizeof(struct multipos) * sub->in_use); +! else +! mch_memmove(&lastthread->sub.linelist[0], +! &sub->linelist[0], +! sizeof(struct linepos) * sub->in_use); + } + } + + #ifdef ENABLE_LOG + nfa_set_code(state->c); +! fprintf(log_fd, "> Adding state %d to list. Character %d: %s\n", +! abs(state->id), state->c, code); + #endif + switch (state->c) + { +*************** +*** 2588,2599 **** + break; + + case NFA_SPLIT: +! addstate(l, state->out, m, off, lid); +! addstate(l, state->out1, m, off, lid); +! break; +! +! case NFA_SKIP_CHAR: +! addstate(l, state->out, m, off, lid); + break; + + #if 0 +--- 2692,2699 ---- + break; + + case NFA_SPLIT: +! addstate(l, state->out, sub, off); +! addstate(l, state->out1, sub, off); + break; + + #if 0 +*************** +*** 2613,2621 **** + break; + #endif + + case NFA_NOPEN: + case NFA_NCLOSE: +! addstate(l, state->out, m, off, lid); + break; + + /* If this state is reached, then a recursive call of nfa_regmatch() +--- 2713,2722 ---- + break; + #endif + ++ case NFA_SKIP_CHAR: + case NFA_NOPEN: + case NFA_NCLOSE: +! addstate(l, state->out, sub, off); + break; + + /* If this state is reached, then a recursive call of nfa_regmatch() +*************** +*** 2646,2709 **** + * restore it when it was in use. Otherwise fill any gap. */ + if (REG_MULTI) + { +! if (subidx < m->in_use) + { +! save_lpos = m->multilist[subidx].start; + save_in_use = -1; + } + else + { +! save_in_use = m->in_use; +! for (i = m->in_use; i < subidx; ++i) + { +! m->multilist[i].start.lnum = -1; +! m->multilist[i].end.lnum = -1; + } +! m->in_use = subidx + 1; + } + if (off == -1) + { +! m->multilist[subidx].start.lnum = reglnum + 1; +! m->multilist[subidx].start.col = 0; + } + else + { +! m->multilist[subidx].start.lnum = reglnum; +! m->multilist[subidx].start.col = + (colnr_T)(reginput - regline + off); + } + } + else + { +! if (subidx < m->in_use) + { +! save_ptr = m->linelist[subidx].start; + save_in_use = -1; + } + else + { +! save_in_use = m->in_use; +! for (i = m->in_use; i < subidx; ++i) + { +! m->linelist[i].start = NULL; +! m->linelist[i].end = NULL; + } +! m->in_use = subidx + 1; + } +! m->linelist[subidx].start = reginput + off; + } + +! addstate(l, state->out, m, off, lid); + + if (save_in_use == -1) + { + if (REG_MULTI) +! m->multilist[subidx].start = save_lpos; + else +! m->linelist[subidx].start = save_ptr; + } + else +! m->in_use = save_in_use; + break; + + case NFA_MCLOSE + 0: +--- 2747,2810 ---- + * restore it when it was in use. Otherwise fill any gap. */ + if (REG_MULTI) + { +! if (subidx < sub->in_use) + { +! save_lpos = sub->multilist[subidx].start; + save_in_use = -1; + } + else + { +! save_in_use = sub->in_use; +! for (i = sub->in_use; i < subidx; ++i) + { +! sub->multilist[i].start.lnum = -1; +! sub->multilist[i].end.lnum = -1; + } +! sub->in_use = subidx + 1; + } + if (off == -1) + { +! sub->multilist[subidx].start.lnum = reglnum + 1; +! sub->multilist[subidx].start.col = 0; + } + else + { +! sub->multilist[subidx].start.lnum = reglnum; +! sub->multilist[subidx].start.col = + (colnr_T)(reginput - regline + off); + } + } + else + { +! if (subidx < sub->in_use) + { +! save_ptr = sub->linelist[subidx].start; + save_in_use = -1; + } + else + { +! save_in_use = sub->in_use; +! for (i = sub->in_use; i < subidx; ++i) + { +! sub->linelist[i].start = NULL; +! sub->linelist[i].end = NULL; + } +! sub->in_use = subidx + 1; + } +! sub->linelist[subidx].start = reginput + off; + } + +! addstate(l, state->out, sub, off); + + if (save_in_use == -1) + { + if (REG_MULTI) +! sub->multilist[subidx].start = save_lpos; + else +! sub->linelist[subidx].start = save_ptr; + } + else +! sub->in_use = save_in_use; + break; + + case NFA_MCLOSE + 0: +*************** +*** 2711,2717 **** + { + /* Do not overwrite the position set by \ze. If no \ze + * encountered end will be set in nfa_regtry(). */ +! addstate(l, state->out, m, off, lid); + break; + } + case NFA_MCLOSE + 1: +--- 2812,2818 ---- + { + /* Do not overwrite the position set by \ze. If no \ze + * encountered end will be set in nfa_regtry(). */ +! addstate(l, state->out, sub, off); + break; + } + case NFA_MCLOSE + 1: +*************** +*** 2731,2767 **** + + /* We don't fill in gaps here, there must have been an MOPEN that + * has done that. */ +! save_in_use = m->in_use; +! if (m->in_use <= subidx) +! m->in_use = subidx + 1; + if (REG_MULTI) + { +! save_lpos = m->multilist[subidx].end; + if (off == -1) + { +! m->multilist[subidx].end.lnum = reglnum + 1; +! m->multilist[subidx].end.col = 0; + } + else + { +! m->multilist[subidx].end.lnum = reglnum; +! m->multilist[subidx].end.col = + (colnr_T)(reginput - regline + off); + } + } + else + { +! save_ptr = m->linelist[subidx].end; +! m->linelist[subidx].end = reginput + off; + } + +! addstate(l, state->out, m, off, lid); + + if (REG_MULTI) +! m->multilist[subidx].end = save_lpos; + else +! m->linelist[subidx].end = save_ptr; +! m->in_use = save_in_use; + break; + } + } +--- 2832,2868 ---- + + /* We don't fill in gaps here, there must have been an MOPEN that + * has done that. */ +! save_in_use = sub->in_use; +! if (sub->in_use <= subidx) +! sub->in_use = subidx + 1; + if (REG_MULTI) + { +! save_lpos = sub->multilist[subidx].end; + if (off == -1) + { +! sub->multilist[subidx].end.lnum = reglnum + 1; +! sub->multilist[subidx].end.col = 0; + } + else + { +! sub->multilist[subidx].end.lnum = reglnum; +! sub->multilist[subidx].end.col = + (colnr_T)(reginput - regline + off); + } + } + else + { +! save_ptr = sub->linelist[subidx].end; +! sub->linelist[subidx].end = reginput + off; + } + +! addstate(l, state->out, sub, off); + + if (REG_MULTI) +! sub->multilist[subidx].end = save_lpos; + else +! sub->linelist[subidx].end = save_ptr; +! sub->in_use = save_in_use; + break; + } + } +*************** +*** 2773,2783 **** + * matters for alternatives. + */ + static void +! addstate_here(l, state, m, lid, ip) + nfa_list_T *l; /* runtime state list */ + nfa_state_T *state; /* state to update */ +! regsub_T *m; /* pointers to subexpressions */ +! int lid; + int *ip; + { + int tlen = l->n; +--- 2874,2883 ---- + * matters for alternatives. + */ + static void +! addstate_here(l, state, sub, ip) + nfa_list_T *l; /* runtime state list */ + nfa_state_T *state; /* state to update */ +! regsub_T *sub; /* pointers to subexpressions */ + int *ip; + { + int tlen = l->n; +*************** +*** 2785,2791 **** + int i = *ip; + + /* first add the state(s) at the end, so that we know how many there are */ +! addstate(l, state, m, 0, lid); + + /* when "*ip" was at the end of the list, nothing to do */ + if (i + 1 == tlen) +--- 2885,2891 ---- + int i = *ip; + + /* first add the state(s) at the end, so that we know how many there are */ +! addstate(l, state, sub, 0); + + /* when "*ip" was at the end of the list, nothing to do */ + if (i + 1 == tlen) +*************** +*** 2895,2900 **** +--- 2995,3052 ---- + return FAIL; + } + ++ static int match_backref __ARGS((regsub_T *sub, int subidx, int *bytelen)); ++ ++ /* ++ * Check for a match with subexpression "subidx". ++ * return TRUE if it matches. ++ */ ++ static int ++ match_backref(sub, subidx, bytelen) ++ regsub_T *sub; /* pointers to subexpressions */ ++ int subidx; ++ int *bytelen; /* out: length of match in bytes */ ++ { ++ int len; ++ ++ if (sub->in_use <= subidx) ++ { ++ retempty: ++ /* backref was not set, match an empty string */ ++ *bytelen = 0; ++ return TRUE; ++ } ++ ++ if (REG_MULTI) ++ { ++ if (sub->multilist[subidx].start.lnum < 0 ++ || sub->multilist[subidx].end.lnum < 0) ++ goto retempty; ++ /* TODO: line breaks */ ++ len = sub->multilist[subidx].end.col ++ - sub->multilist[subidx].start.col; ++ if (cstrncmp(regline + sub->multilist[subidx].start.col, ++ reginput, &len) == 0) ++ { ++ *bytelen = len; ++ return TRUE; ++ } ++ } ++ else ++ { ++ if (sub->linelist[subidx].start == NULL ++ || sub->linelist[subidx].end == NULL) ++ goto retempty; ++ len = (int)(sub->linelist[subidx].end - sub->linelist[subidx].start); ++ if (cstrncmp(sub->linelist[subidx].start, reginput, &len) == 0) ++ { ++ *bytelen = len; ++ return TRUE; ++ } ++ } ++ return FALSE; ++ } ++ + /* + * Set all NFA nodes' list ID equal to -1. + */ +*************** +*** 2902,2910 **** + nfa_set_neg_listids(start) + nfa_state_T *start; + { +! if (start == NULL) +! return; +! if (start->lastlist >= 0) + { + start->lastlist = -1; + nfa_set_neg_listids(start->out); +--- 3054,3060 ---- + nfa_set_neg_listids(start) + nfa_state_T *start; + { +! if (start != NULL && start->lastlist >= 0) + { + start->lastlist = -1; + nfa_set_neg_listids(start->out); +*************** +*** 2919,2927 **** + nfa_set_null_listids(start) + nfa_state_T *start; + { +! if (start == NULL) +! return; +! if (start->lastlist == -1) + { + start->lastlist = 0; + nfa_set_null_listids(start->out); +--- 3069,3075 ---- + nfa_set_null_listids(start) + nfa_state_T *start; + { +! if (start != NULL && start->lastlist == -1) + { + start->lastlist = 0; + nfa_set_null_listids(start->out); +*************** +*** 2937,2945 **** + nfa_state_T *start; + int *list; + { +! if (start == NULL) +! return; +! if (start->lastlist != -1) + { + list[abs(start->id)] = start->lastlist; + start->lastlist = -1; +--- 3085,3091 ---- + nfa_state_T *start; + int *list; + { +! if (start != NULL && start->lastlist != -1) + { + list[abs(start->id)] = start->lastlist; + start->lastlist = -1; +*************** +*** 2956,2964 **** + nfa_state_T *start; + int *list; + { +! if (start == NULL) +! return; +! if (start->lastlist == -1) + { + start->lastlist = list[abs(start->id)]; + nfa_restore_listids(start->out, list); +--- 3102,3108 ---- + nfa_state_T *start; + int *list; + { +! if (start != NULL && start->lastlist == -1) + { + start->lastlist = list[abs(start->id)]; + nfa_restore_listids(start->out, list); +*************** +*** 3047,3053 **** + #ifdef ENABLE_LOG + fprintf(log_fd, "(---) STARTSTATE\n"); + #endif +! addstate(thislist, start, m, 0, listid); + + /* There are two cases when the NFA advances: 1. input char matches the + * NFA node and 2. input char does not match the NFA node, but the next +--- 3191,3198 ---- + #ifdef ENABLE_LOG + fprintf(log_fd, "(---) STARTSTATE\n"); + #endif +! thislist->id = listid; +! addstate(thislist, start, m, 0); + + /* There are two cases when the NFA advances: 1. input char matches the + * NFA node and 2. input char does not match the NFA node, but the next +*************** +*** 3060,3066 **** + #define ADD_POS_NEG_STATE(node) \ + ll = listtbl[result ? 1 : 0][node->negated]; \ + if (ll != NULL) \ +! addstate(ll, node->out , &t->sub, clen, listid + 1); + + + /* +--- 3205,3211 ---- + #define ADD_POS_NEG_STATE(node) \ + ll = listtbl[result ? 1 : 0][node->negated]; \ + if (ll != NULL) \ +! addstate(ll, node->out , &t->sub, clen); + + + /* +*************** +*** 3092,3100 **** + /* swap lists */ + thislist = &list[flag]; + nextlist = &list[flag ^= 1]; +! nextlist->n = 0; /* `clear' nextlist */ + listtbl[1][0] = nextlist; + ++listid; + + #ifdef ENABLE_LOG + fprintf(log_fd, "------------------------------------------\n"); +--- 3237,3248 ---- + /* swap lists */ + thislist = &list[flag]; + nextlist = &list[flag ^= 1]; +! nextlist->n = 0; /* clear nextlist */ + listtbl[1][0] = nextlist; + ++listid; ++ thislist->id = listid; ++ nextlist->id = listid + 1; ++ neglist->id = listid + 1; + + #ifdef ENABLE_LOG + fprintf(log_fd, "------------------------------------------\n"); +*************** +*** 3156,3162 **** + if (REG_MULTI) + for (j = 0; j < submatch->in_use; j++) + { +! submatch->multilist[j].start = t->sub.multilist[j].start; + submatch->multilist[j].end = t->sub.multilist[j].end; + } + else +--- 3304,3311 ---- + if (REG_MULTI) + for (j = 0; j < submatch->in_use; j++) + { +! submatch->multilist[j].start = +! t->sub.multilist[j].start; + submatch->multilist[j].end = t->sub.multilist[j].end; + } + else +*************** +*** 3166,3185 **** + submatch->linelist[j].end = t->sub.linelist[j].end; + } + #ifdef ENABLE_LOG +! for (j = 0; j < t->sub.in_use; j++) +! if (REG_MULTI) +! fprintf(log_fd, "\n *** group %d, start: c=%d, l=%d, end: c=%d, l=%d", +! j, +! t->sub.multilist[j].start.col, +! (int)t->sub.multilist[j].start.lnum, +! t->sub.multilist[j].end.col, +! (int)t->sub.multilist[j].end.lnum); +! else +! fprintf(log_fd, "\n *** group %d, start: \"%s\", end: \"%s\"", +! j, +! (char *)t->sub.linelist[j].start, +! (char *)t->sub.linelist[j].end); +! fprintf(log_fd, "\n"); + #endif + /* Found the left-most longest match, do not look at any other + * states at this position. When the list of states is going +--- 3315,3321 ---- + submatch->linelist[j].end = t->sub.linelist[j].end; + } + #ifdef ENABLE_LOG +! log_subexpr(&t->sub); + #endif + /* Found the left-most longest match, do not look at any other + * states at this position. When the list of states is going +*************** +*** 3198,3205 **** + * nfa_regmatch(). Submatches are stored in *m, and used in + * the parent call. */ + if (start->c == NFA_MOPEN + 0) +! addstate_here(thislist, t->state->out, &t->sub, listid, +! &listidx); + else + { + *m = t->sub; +--- 3334,3340 ---- + * nfa_regmatch(). Submatches are stored in *m, and used in + * the parent call. */ + if (start->c == NFA_MOPEN + 0) +! addstate_here(thislist, t->state->out, &t->sub, &listidx); + else + { + *m = t->sub; +*************** +*** 3277,3283 **** + + /* t->state->out1 is the corresponding END_INVISIBLE node */ + addstate_here(thislist, t->state->out1->out, &t->sub, +! listid, &listidx); + } + else + { +--- 3412,3418 ---- + + /* t->state->out1 is the corresponding END_INVISIBLE node */ + addstate_here(thislist, t->state->out1->out, &t->sub, +! &listidx); + } + else + { +*************** +*** 3288,3301 **** + + case NFA_BOL: + if (reginput == regline) +! addstate_here(thislist, t->state->out, &t->sub, listid, +! &listidx); + break; + + case NFA_EOL: + if (curc == NUL) +! addstate_here(thislist, t->state->out, &t->sub, listid, +! &listidx); + break; + + case NFA_BOW: +--- 3423,3434 ---- + + case NFA_BOL: + if (reginput == regline) +! addstate_here(thislist, t->state->out, &t->sub, &listidx); + break; + + case NFA_EOL: + if (curc == NUL) +! addstate_here(thislist, t->state->out, &t->sub, &listidx); + break; + + case NFA_BOW: +*************** +*** 3322,3329 **** + && vim_iswordc_buf(reginput[-1], reg_buf))) + bow = FALSE; + if (bow) +! addstate_here(thislist, t->state->out, &t->sub, listid, +! &listidx); + break; + } + +--- 3455,3461 ---- + && vim_iswordc_buf(reginput[-1], reg_buf))) + bow = FALSE; + if (bow) +! addstate_here(thislist, t->state->out, &t->sub, &listidx); + break; + } + +*************** +*** 3351,3358 **** + && vim_iswordc_buf(curc, reg_buf))) + eow = FALSE; + if (eow) +! addstate_here(thislist, t->state->out, &t->sub, listid, +! &listidx); + break; + } + +--- 3483,3489 ---- + && vim_iswordc_buf(curc, reg_buf))) + eow = FALSE; + if (eow) +! addstate_here(thislist, t->state->out, &t->sub, &listidx); + break; + } + +*************** +*** 3442,3453 **** + go_to_nextline = TRUE; + /* Pass -1 for the offset, which means taking the position + * at the start of the next line. */ +! addstate(nextlist, t->state->out, &t->sub, -1, listid + 1); + } + else if (curc == '\n' && reg_line_lbr) + { + /* match \n as if it is an ordinary character */ +! addstate(nextlist, t->state->out, &t->sub, 1, listid + 1); + } + break; + +--- 3573,3584 ---- + go_to_nextline = TRUE; + /* Pass -1 for the offset, which means taking the position + * at the start of the next line. */ +! addstate(nextlist, t->state->out, &t->sub, -1); + } + else if (curc == '\n' && reg_line_lbr) + { + /* match \n as if it is an ordinary character */ +! addstate(nextlist, t->state->out, &t->sub, 1); + } + break; + +*************** +*** 3475,3489 **** + /* This follows a series of negated nodes, like: + * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */ + if (curc > 0) +! addstate(nextlist, t->state->out, &t->sub, clen, +! listid + 1); + break; + + case NFA_ANY: + /* Any char except '\0', (end of input) does not match. */ + if (curc > 0) +! addstate(nextlist, t->state->out, &t->sub, clen, +! listid + 1); + break; + + /* +--- 3606,3618 ---- + /* This follows a series of negated nodes, like: + * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */ + if (curc > 0) +! addstate(nextlist, t->state->out, &t->sub, clen); + break; + + case NFA_ANY: + /* Any char except '\0', (end of input) does not match. */ + if (curc > 0) +! addstate(nextlist, t->state->out, &t->sub, clen); + break; + + /* +*************** +*** 3620,3637 **** + ADD_POS_NEG_STATE(t->state); + break; + +! case NFA_MOPEN + 0: +! case NFA_MOPEN + 1: +! case NFA_MOPEN + 2: +! case NFA_MOPEN + 3: +! case NFA_MOPEN + 4: +! case NFA_MOPEN + 5: +! case NFA_MOPEN + 6: +! case NFA_MOPEN + 7: +! case NFA_MOPEN + 8: +! case NFA_MOPEN + 9: +! /* handled below */ + break; + + case NFA_SKIP_CHAR: + case NFA_ZSTART: +--- 3749,3822 ---- + ADD_POS_NEG_STATE(t->state); + break; + +! case NFA_BACKREF1: +! case NFA_BACKREF2: +! case NFA_BACKREF3: +! case NFA_BACKREF4: +! case NFA_BACKREF5: +! case NFA_BACKREF6: +! case NFA_BACKREF7: +! case NFA_BACKREF8: +! case NFA_BACKREF9: +! /* \1 .. \9 */ +! { +! int subidx = t->state->c - NFA_BACKREF1 + 1; +! int bytelen; +! +! result = match_backref(&t->sub, subidx, &bytelen); +! if (result) +! { +! if (bytelen == 0) +! { +! /* empty match always works, add NFA_SKIP with zero to +! * be used next */ +! addstate_here(thislist, t->state->out, &t->sub, +! &listidx); +! thislist->t[listidx + 1].count = 0; +! } +! else if (bytelen <= clen) +! { +! /* match current character, jump ahead to out of +! * NFA_SKIP */ +! addstate(nextlist, t->state->out->out, &t->sub, clen); +! #ifdef ENABLE_LOG +! log_subexpr(&nextlist->t[nextlist->n - 1].sub); +! #endif +! } +! else +! { +! /* skip ofer the matched characters, set character +! * count in NFA_SKIP */ +! addstate(nextlist, t->state->out, &t->sub, bytelen); +! nextlist->t[nextlist->n - 1].count = bytelen - clen; +! #ifdef ENABLE_LOG +! log_subexpr(&nextlist->t[nextlist->n - 1].sub); +! #endif +! } +! +! } + break; ++ } ++ case NFA_SKIP: ++ /* charater of previous matching \1 .. \9 */ ++ if (t->count - clen <= 0) ++ { ++ /* end of match, go to what follows */ ++ addstate(nextlist, t->state->out, &t->sub, clen); ++ #ifdef ENABLE_LOG ++ log_subexpr(&nextlist->t[nextlist->n - 1].sub); ++ #endif ++ } ++ else ++ { ++ /* add state again with decremented count */ ++ addstate(nextlist, t->state, &t->sub, 0); ++ nextlist->t[nextlist->n - 1].count = t->count - clen; ++ #ifdef ENABLE_LOG ++ log_subexpr(&nextlist->t[nextlist->n - 1].sub); ++ #endif ++ } ++ break; + + case NFA_SKIP_CHAR: + case NFA_ZSTART: +*************** +*** 3680,3686 **** + #ifdef ENABLE_LOG + fprintf(log_fd, "(---) STARTSTATE\n"); + #endif +! addstate(nextlist, start, m, clen, listid + 1); + } + + #ifdef ENABLE_LOG +--- 3865,3871 ---- + #ifdef ENABLE_LOG + fprintf(log_fd, "(---) STARTSTATE\n"); + #endif +! addstate(nextlist, start, m, clen); + } + + #ifdef ENABLE_LOG +*************** +*** 3884,3890 **** + { + prog->state[i].id = i; + prog->state[i].lastlist = 0; +- prog->state[i].visits = 0; + } + + retval = nfa_regtry(prog->start, col); +--- 4069,4074 ---- +*** ../vim-7.3.1032/src/testdir/test64.in 2013-05-27 20:10:40.000000000 +0200 +--- src/testdir/test64.in 2013-05-28 20:24:11.000000000 +0200 +*************** +*** 331,336 **** +--- 331,340 ---- + :call add(tl, [2, '\