| To: vim_dev@googlegroups.com |
| Subject: Patch 7.3.1139 |
| Fcc: outbox |
| From: Bram Moolenaar <Bram@moolenaar.net> |
| Mime-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| |
| Patch 7.3.1139 |
| Problem: New regexp engine: negated flag is hardly used. |
| Solution: Add separate _NEG states, remove negated flag. |
| Files: src/regexp_nfa.c, src/regexp.h |
| |
| |
| |
| |
| |
| *** 64,72 **** |
| --- 64,75 ---- |
| NFA_NOPEN, /* Start of subexpression marked with \%( */ |
| NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */ |
| NFA_START_INVISIBLE, |
| + NFA_START_INVISIBLE_NEG, |
| NFA_START_INVISIBLE_BEFORE, |
| + NFA_START_INVISIBLE_BEFORE_NEG, |
| NFA_START_PATTERN, |
| NFA_END_INVISIBLE, |
| + NFA_END_INVISIBLE_NEG, |
| NFA_END_PATTERN, |
| NFA_COMPOSING, /* Next nodes in NFA are part of the |
| composing multibyte char */ |
| |
| *** 481,487 **** |
| } |
| |
| default: |
| ! if (p->c > 0 && !p->negated) |
| return p->c; /* yes! */ |
| return 0; |
| } |
| --- 484,490 ---- |
| } |
| |
| default: |
| ! if (p->c > 0) |
| return p->c; /* yes! */ |
| return 0; |
| } |
| |
| *** 1991,2000 **** |
| --- 1994,2008 ---- |
| case NFA_NOPEN: STRCPY(code, "NFA_NOPEN"); break; |
| case NFA_NCLOSE: STRCPY(code, "NFA_NCLOSE"); break; |
| case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; |
| + case NFA_START_INVISIBLE_NEG: |
| + STRCPY(code, "NFA_START_INVISIBLE_NEG"); break; |
| case NFA_START_INVISIBLE_BEFORE: |
| STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break; |
| + case NFA_START_INVISIBLE_BEFORE_NEG: |
| + STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG"); break; |
| case NFA_START_PATTERN: STRCPY(code, "NFA_START_PATTERN"); break; |
| case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; |
| + case NFA_END_INVISIBLE_NEG: STRCPY(code, "NFA_END_INVISIBLE_NEG"); break; |
| case NFA_END_PATTERN: STRCPY(code, "NFA_END_PATTERN"); break; |
| |
| case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; |
| |
| *** 2227,2234 **** |
| fprintf(debugf, " %s", p); |
| |
| nfa_set_code(state->c); |
| ! fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n", |
| ! state->negated ? "NOT " : "", |
| code, |
| state->c, |
| abs(state->id), |
| --- 2235,2241 ---- |
| fprintf(debugf, " %s", p); |
| |
| nfa_set_code(state->c); |
| ! fprintf(debugf, "%s (%d) (id=%d) val=%d\n", |
| code, |
| state->c, |
| abs(state->id), |
| |
| *** 2330,2336 **** |
| s->id = istate; |
| s->lastlist[0] = 0; |
| s->lastlist[1] = 0; |
| - s->negated = FALSE; |
| |
| return s; |
| } |
| --- 2337,2342 ---- |
| |
| *** 2741,2763 **** |
| case NFA_PREV_ATOM_JUST_BEFORE_NEG: |
| case NFA_PREV_ATOM_LIKE_PATTERN: |
| { |
| - int neg = (*p == NFA_PREV_ATOM_NO_WIDTH_NEG |
| - || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); |
| int before = (*p == NFA_PREV_ATOM_JUST_BEFORE |
| || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); |
| int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN); |
| ! int start_state = NFA_START_INVISIBLE; |
| ! int end_state = NFA_END_INVISIBLE; |
| int n = 0; |
| nfa_state_T *zend; |
| nfa_state_T *skip; |
| |
| ! if (before) |
| ! start_state = NFA_START_INVISIBLE_BEFORE; |
| ! else if (pattern) |
| { |
| ! start_state = NFA_START_PATTERN; |
| ! end_state = NFA_END_PATTERN; |
| } |
| |
| if (before) |
| --- 2747,2783 ---- |
| case NFA_PREV_ATOM_JUST_BEFORE_NEG: |
| case NFA_PREV_ATOM_LIKE_PATTERN: |
| { |
| int before = (*p == NFA_PREV_ATOM_JUST_BEFORE |
| || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); |
| int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN); |
| ! int start_state; |
| ! int end_state; |
| int n = 0; |
| nfa_state_T *zend; |
| nfa_state_T *skip; |
| |
| ! switch (*p) |
| { |
| ! case NFA_PREV_ATOM_NO_WIDTH: |
| ! start_state = NFA_START_INVISIBLE; |
| ! end_state = NFA_END_INVISIBLE; |
| ! break; |
| ! case NFA_PREV_ATOM_NO_WIDTH_NEG: |
| ! start_state = NFA_START_INVISIBLE_NEG; |
| ! end_state = NFA_END_INVISIBLE_NEG; |
| ! break; |
| ! case NFA_PREV_ATOM_JUST_BEFORE: |
| ! start_state = NFA_START_INVISIBLE_BEFORE; |
| ! end_state = NFA_END_INVISIBLE; |
| ! break; |
| ! case NFA_PREV_ATOM_JUST_BEFORE_NEG: |
| ! start_state = NFA_START_INVISIBLE_BEFORE_NEG; |
| ! end_state = NFA_END_INVISIBLE_NEG; |
| ! break; |
| ! case NFA_PREV_ATOM_LIKE_PATTERN: |
| ! start_state = NFA_START_PATTERN; |
| ! end_state = NFA_END_PATTERN; |
| ! break; |
| } |
| |
| if (before) |
| |
| *** 2783,2793 **** |
| s = alloc_state(start_state, e.start, s1); |
| if (s == NULL) |
| goto theend; |
| - if (neg) |
| - { |
| - s->negated = TRUE; |
| - s1->negated = TRUE; |
| - } |
| if (before) |
| s->val = n; /* store the count */ |
| if (pattern) |
| --- 2803,2808 ---- |
| |
| *** 3009,3015 **** |
| matchstate = &state_ptr[istate++]; /* the match state */ |
| matchstate->c = NFA_MATCH; |
| matchstate->out = matchstate->out1 = NULL; |
| - matchstate->negated = FALSE; |
| matchstate->id = 0; |
| |
| patch(e.out, matchstate); |
| --- 3024,3029 ---- |
| |
| *** 3772,3778 **** |
| return OK; |
| break; |
| case NFA_CLASS_SPACE: |
| ! if ((c >=9 && c <= 13) || (c == ' ')) |
| return OK; |
| break; |
| case NFA_CLASS_UPPER: |
| --- 3786,3792 ---- |
| return OK; |
| break; |
| case NFA_CLASS_SPACE: |
| ! if ((c >= 9 && c <= 13) || (c == ' ')) |
| return OK; |
| break; |
| case NFA_CLASS_UPPER: |
| |
| *** 3971,3977 **** |
| int result; |
| int need_restore = FALSE; |
| |
| ! if (state->c == NFA_START_INVISIBLE_BEFORE) |
| { |
| /* The recursive match must end at the current position. */ |
| endposp = &endpos; |
| --- 3985,3992 ---- |
| int result; |
| int need_restore = FALSE; |
| |
| ! if (state->c == NFA_START_INVISIBLE_BEFORE |
| ! || state->c == NFA_START_INVISIBLE_BEFORE_NEG) |
| { |
| /* The recursive match must end at the current position. */ |
| endposp = &endpos; |
| |
| *** 4452,4457 **** |
| --- 4467,4473 ---- |
| } |
| |
| case NFA_END_INVISIBLE: |
| + case NFA_END_INVISIBLE_NEG: |
| case NFA_END_PATTERN: |
| /* |
| * This is only encountered after a NFA_START_INVISIBLE or |
| |
| *** 4489,4495 **** |
| break; |
| |
| /* do not set submatches for \@! */ |
| ! if (!t->state->negated) |
| { |
| copy_sub(&m->norm, &t->subs.norm); |
| #ifdef FEAT_SYN_HL |
| --- 4505,4511 ---- |
| break; |
| |
| /* do not set submatches for \@! */ |
| ! if (t->state->c != NFA_END_INVISIBLE_NEG) |
| { |
| copy_sub(&m->norm, &t->subs.norm); |
| #ifdef FEAT_SYN_HL |
| |
| *** 4505,4511 **** |
| --- 4521,4529 ---- |
| break; |
| |
| case NFA_START_INVISIBLE: |
| + case NFA_START_INVISIBLE_NEG: |
| case NFA_START_INVISIBLE_BEFORE: |
| + case NFA_START_INVISIBLE_BEFORE_NEG: |
| { |
| nfa_pim_T *pim; |
| int cout = t->state->out1->out->c; |
| |
| *** 4524,4529 **** |
| --- 4542,4548 ---- |
| || cout == NFA_NCLOSE |
| || t->pim != NULL |
| || (t->state->c != NFA_START_INVISIBLE_BEFORE |
| + && t->state->c != NFA_START_INVISIBLE_BEFORE_NEG |
| && failure_chance(t->state->out1->out, 0) |
| < failure_chance(t->state->out, 0))) |
| { |
| |
| *** 4534,4541 **** |
| result = recursive_regmatch(t->state, prog, |
| submatch, m, &listids); |
| |
| ! /* for \@! it is a match when result is FALSE */ |
| ! if (result != t->state->negated) |
| { |
| /* Copy submatch info from the recursive call */ |
| copy_sub_off(&t->subs.norm, &m->norm); |
| --- 4553,4563 ---- |
| result = recursive_regmatch(t->state, prog, |
| submatch, m, &listids); |
| |
| ! /* for \@! and \@<! it is a match when the result is |
| ! * FALSE */ |
| ! if (result != (t->state->c == NFA_START_INVISIBLE_NEG |
| ! || t->state->c |
| ! == NFA_START_INVISIBLE_BEFORE_NEG)) |
| { |
| /* Copy submatch info from the recursive call */ |
| copy_sub_off(&t->subs.norm, &m->norm); |
| |
| *** 4646,4656 **** |
| break; |
| |
| case NFA_BOW: |
| ! { |
| ! int bow = TRUE; |
| |
| if (curc == NUL) |
| ! bow = FALSE; |
| #ifdef FEAT_MBYTE |
| else if (has_mbyte) |
| { |
| --- 4668,4677 ---- |
| break; |
| |
| case NFA_BOW: |
| ! result = TRUE; |
| |
| if (curc == NUL) |
| ! result = FALSE; |
| #ifdef FEAT_MBYTE |
| else if (has_mbyte) |
| { |
| |
| *** 4659,4685 **** |
| /* Get class of current and previous char (if it exists). */ |
| this_class = mb_get_class_buf(reginput, reg_buf); |
| if (this_class <= 1) |
| ! bow = FALSE; |
| else if (reg_prev_class() == this_class) |
| ! bow = FALSE; |
| } |
| #endif |
| else if (!vim_iswordc_buf(curc, reg_buf) |
| || (reginput > regline |
| && vim_iswordc_buf(reginput[-1], reg_buf))) |
| ! bow = FALSE; |
| ! if (bow) |
| addstate_here(thislist, t->state->out, &t->subs, |
| t->pim, &listidx); |
| break; |
| - } |
| |
| case NFA_EOW: |
| ! { |
| ! int eow = TRUE; |
| ! |
| if (reginput == regline) |
| ! eow = FALSE; |
| #ifdef FEAT_MBYTE |
| else if (has_mbyte) |
| { |
| --- 4680,4703 ---- |
| /* Get class of current and previous char (if it exists). */ |
| this_class = mb_get_class_buf(reginput, reg_buf); |
| if (this_class <= 1) |
| ! result = FALSE; |
| else if (reg_prev_class() == this_class) |
| ! result = FALSE; |
| } |
| #endif |
| else if (!vim_iswordc_buf(curc, reg_buf) |
| || (reginput > regline |
| && vim_iswordc_buf(reginput[-1], reg_buf))) |
| ! result = FALSE; |
| ! if (result) |
| addstate_here(thislist, t->state->out, &t->subs, |
| t->pim, &listidx); |
| break; |
| |
| case NFA_EOW: |
| ! result = TRUE; |
| if (reginput == regline) |
| ! result = FALSE; |
| #ifdef FEAT_MBYTE |
| else if (has_mbyte) |
| { |
| |
| *** 4690,4707 **** |
| prev_class = reg_prev_class(); |
| if (this_class == prev_class |
| || prev_class == 0 || prev_class == 1) |
| ! eow = FALSE; |
| } |
| #endif |
| else if (!vim_iswordc_buf(reginput[-1], reg_buf) |
| || (reginput[0] != NUL |
| && vim_iswordc_buf(curc, reg_buf))) |
| ! eow = FALSE; |
| ! if (eow) |
| addstate_here(thislist, t->state->out, &t->subs, |
| t->pim, &listidx); |
| break; |
| - } |
| |
| case NFA_BOF: |
| if (reglnum == 0 && reginput == regline |
| --- 4708,4724 ---- |
| prev_class = reg_prev_class(); |
| if (this_class == prev_class |
| || prev_class == 0 || prev_class == 1) |
| ! result = FALSE; |
| } |
| #endif |
| else if (!vim_iswordc_buf(reginput[-1], reg_buf) |
| || (reginput[0] != NUL |
| && vim_iswordc_buf(curc, reg_buf))) |
| ! result = FALSE; |
| ! if (result) |
| addstate_here(thislist, t->state->out, &t->subs, |
| t->pim, &listidx); |
| break; |
| |
| case NFA_BOF: |
| if (reglnum == 0 && reginput == regline |
| |
| *** 4740,4746 **** |
| { |
| /* If \Z was present, then ignore composing characters. |
| * When ignoring the base character this always matches. */ |
| - /* TODO: How about negated? */ |
| if (len == 0 && sta->c != curc) |
| result = FAIL; |
| else |
| --- 4757,4762 ---- |
| |
| *** 4813,4838 **** |
| } |
| break; |
| |
| - case NFA_CLASS_ALNUM: |
| - case NFA_CLASS_ALPHA: |
| - case NFA_CLASS_BLANK: |
| - case NFA_CLASS_CNTRL: |
| - case NFA_CLASS_DIGIT: |
| - case NFA_CLASS_GRAPH: |
| - case NFA_CLASS_LOWER: |
| - case NFA_CLASS_PRINT: |
| - case NFA_CLASS_PUNCT: |
| - case NFA_CLASS_SPACE: |
| - case NFA_CLASS_UPPER: |
| - case NFA_CLASS_XDIGIT: |
| - case NFA_CLASS_TAB: |
| - case NFA_CLASS_RETURN: |
| - case NFA_CLASS_BACKSPACE: |
| - case NFA_CLASS_ESCAPE: |
| - result = check_char_class(t->state->c, curc); |
| - ADD_STATE_IF_MATCH(t->state); |
| - break; |
| - |
| case NFA_START_COLL: |
| case NFA_START_NEG_COLL: |
| { |
| --- 4829,4834 ---- |
| |
| *** 5212,5221 **** |
| int c = t->state->c; |
| |
| /* TODO: put this in #ifdef later */ |
| ! if (c < -256) |
| EMSGN("INTERNAL: Negative state char: %ld", c); |
| - if (is_Magic(c)) |
| - c = un_Magic(c); |
| result = (c == curc); |
| |
| if (!result && ireg_ic) |
| --- 5208,5215 ---- |
| int c = t->state->c; |
| |
| /* TODO: put this in #ifdef later */ |
| ! if (c < 0) |
| EMSGN("INTERNAL: Negative state char: %ld", c); |
| result = (c == curc); |
| |
| if (!result && ireg_ic) |
| |
| *** 5252,5259 **** |
| prog, submatch, m, &listids); |
| t->pim->result = result ? NFA_PIM_MATCH |
| : NFA_PIM_NOMATCH; |
| ! /* for \@! it is a match when result is FALSE */ |
| ! if (result != t->pim->state->negated) |
| { |
| /* Copy submatch info from the recursive call */ |
| copy_sub_off(&t->pim->subs.norm, &m->norm); |
| --- 5246,5257 ---- |
| prog, submatch, m, &listids); |
| t->pim->result = result ? NFA_PIM_MATCH |
| : NFA_PIM_NOMATCH; |
| ! /* for \@! and \@<! it is a match when the result is |
| ! * FALSE */ |
| ! if (result != (t->pim->state->c |
| ! == NFA_START_INVISIBLE_NEG |
| ! || t->pim->state->c |
| ! == NFA_START_INVISIBLE_BEFORE_NEG)) |
| { |
| /* Copy submatch info from the recursive call */ |
| copy_sub_off(&t->pim->subs.norm, &m->norm); |
| |
| *** 5274,5281 **** |
| #endif |
| } |
| |
| ! /* for \@! it is a match when result is FALSE */ |
| ! if (result != t->pim->state->negated) |
| { |
| /* Copy submatch info from the recursive call */ |
| copy_sub_off(&t->subs.norm, &t->pim->subs.norm); |
| --- 5272,5281 ---- |
| #endif |
| } |
| |
| ! /* for \@! and \@<! it is a match when result is FALSE */ |
| ! if (result != (t->pim->state->c == NFA_START_INVISIBLE_NEG |
| ! || t->pim->state->c |
| ! == NFA_START_INVISIBLE_BEFORE_NEG)) |
| { |
| /* Copy submatch info from the recursive call */ |
| copy_sub_off(&t->subs.norm, &t->pim->subs.norm); |
| |
| |
| |
| *** 73,79 **** |
| nfa_state_T *out1; |
| int id; |
| int lastlist[2]; /* 0: normal, 1: recursive */ |
| - int negated; |
| int val; |
| }; |
| |
| --- 73,78 ---- |
| |
| |
| |
| *** 730,731 **** |
| --- 730,733 ---- |
| { /* Add new patch number below this line */ |
| + /**/ |
| + 1139, |
| /**/ |
| |
| -- |
| Common sense is what tells you that the world is flat. |
| |
| /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ |
| /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ |
| \\\ an exciting new programming language -- http://www.Zimbu.org /// |
| \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |