To: vim_dev@googlegroups.com Subject: Patch 7.3.1137 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.3.1137 Problem: New regexp engine: collections are slow. Solution: Handle all characters in one go. Files: src/regexp_nfa.c *** ../vim-7.3.1136/src/regexp_nfa.c 2013-06-06 18:46:00.000000000 +0200 --- src/regexp_nfa.c 2013-06-07 13:40:58.000000000 +0200 *************** *** 34,48 **** NFA_SPLIT = -1024, NFA_MATCH, NFA_SKIP_CHAR, /* matches a 0-length char */ - NFA_END_NEG_RANGE, /* Used when expanding [^ab] */ ! NFA_CONCAT, NFA_OR, NFA_STAR, /* greedy * */ NFA_STAR_NONGREEDY, /* non-greedy * */ NFA_QUEST, /* greedy \? */ NFA_QUEST_NONGREEDY, /* non-greedy \? */ - NFA_NOT, /* used for [^ab] negated char ranges */ NFA_BOL, /* ^ Begin line */ NFA_EOL, /* $ End line */ --- 34,56 ---- NFA_SPLIT = -1024, NFA_MATCH, NFA_SKIP_CHAR, /* matches a 0-length char */ ! NFA_START_COLL, /* [abc] start */ ! NFA_END_COLL, /* [abc] end */ ! NFA_START_NEG_COLL, /* [^abc] start */ ! NFA_END_NEG_COLL, /* [^abc] end (only used in postfix) */ ! NFA_RANGE, /* range of the two previous items (only ! * used in postfix) */ ! NFA_RANGE_MIN, /* low end of a range */ ! NFA_RANGE_MAX, /* high end of a range */ ! ! NFA_CONCAT, /* concatenate two previous items (only ! * used in postfix) */ NFA_OR, NFA_STAR, /* greedy * */ NFA_STAR_NONGREEDY, /* non-greedy * */ NFA_QUEST, /* greedy \? */ NFA_QUEST_NONGREEDY, /* non-greedy \? */ NFA_BOL, /* ^ Begin line */ NFA_EOL, /* $ End line */ *************** *** 260,266 **** static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth)); static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth)); static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); ! static int nfa_emit_equi_class __ARGS((int c, int neg)); static int nfa_regatom __ARGS((void)); static int nfa_regpiece __ARGS((void)); static int nfa_regconcat __ARGS((void)); --- 268,274 ---- static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth)); static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth)); static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); ! static int nfa_emit_equi_class __ARGS((int c)); static int nfa_regatom __ARGS((void)); static int nfa_regpiece __ARGS((void)); static int nfa_regconcat __ARGS((void)); *************** *** 664,684 **** * NOTE! When changing this function, also update reg_equi_class() */ static int ! nfa_emit_equi_class(c, neg) int c; - int neg; { ! int first = TRUE; ! int glue = neg == TRUE ? NFA_CONCAT : NFA_OR; ! #define EMIT2(c) \ ! EMIT(c); \ ! if (neg == TRUE) { \ ! EMIT(NFA_NOT); \ ! } \ ! if (first == FALSE) \ ! EMIT(glue); \ ! else \ ! first = FALSE; \ #ifdef FEAT_MBYTE if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 --- 672,681 ---- * NOTE! When changing this function, also update reg_equi_class() */ static int ! nfa_emit_equi_class(c) int c; { ! #define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT); #ifdef FEAT_MBYTE if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 *************** *** 687,770 **** { switch (c) { ! case 'A': case '\300': case '\301': case '\302': ! case '\303': case '\304': case '\305': ! EMIT2('A'); EMIT2('\300'); EMIT2('\301'); ! EMIT2('\302'); EMIT2('\303'); EMIT2('\304'); ! EMIT2('\305'); return OK; ! case 'C': case '\307': ! EMIT2('C'); EMIT2('\307'); return OK; ! case 'E': case '\310': case '\311': case '\312': case '\313': ! EMIT2('E'); EMIT2('\310'); EMIT2('\311'); ! EMIT2('\312'); EMIT2('\313'); return OK; ! case 'I': case '\314': case '\315': case '\316': case '\317': ! EMIT2('I'); EMIT2('\314'); EMIT2('\315'); ! EMIT2('\316'); EMIT2('\317'); return OK; ! case 'N': case '\321': ! EMIT2('N'); EMIT2('\321'); return OK; ! case 'O': case '\322': case '\323': case '\324': case '\325': ! case '\326': ! EMIT2('O'); EMIT2('\322'); EMIT2('\323'); ! EMIT2('\324'); EMIT2('\325'); EMIT2('\326'); return OK; ! case 'U': case '\331': case '\332': case '\333': case '\334': ! EMIT2('U'); EMIT2('\331'); EMIT2('\332'); ! EMIT2('\333'); EMIT2('\334'); return OK; ! case 'Y': case '\335': ! EMIT2('Y'); EMIT2('\335'); return OK; ! case 'a': case '\340': case '\341': case '\342': ! case '\343': case '\344': case '\345': ! EMIT2('a'); EMIT2('\340'); EMIT2('\341'); ! EMIT2('\342'); EMIT2('\343'); EMIT2('\344'); ! EMIT2('\345'); return OK; ! case 'c': case '\347': ! EMIT2('c'); EMIT2('\347'); return OK; ! case 'e': case '\350': case '\351': case '\352': case '\353': ! EMIT2('e'); EMIT2('\350'); EMIT2('\351'); ! EMIT2('\352'); EMIT2('\353'); return OK; ! case 'i': case '\354': case '\355': case '\356': case '\357': ! EMIT2('i'); EMIT2('\354'); EMIT2('\355'); ! EMIT2('\356'); EMIT2('\357'); return OK; ! case 'n': case '\361': ! EMIT2('n'); EMIT2('\361'); return OK; ! case 'o': case '\362': case '\363': case '\364': case '\365': ! case '\366': ! EMIT2('o'); EMIT2('\362'); EMIT2('\363'); ! EMIT2('\364'); EMIT2('\365'); EMIT2('\366'); return OK; ! case 'u': case '\371': case '\372': case '\373': case '\374': ! EMIT2('u'); EMIT2('\371'); EMIT2('\372'); ! EMIT2('\373'); EMIT2('\374'); return OK; ! case 'y': case '\375': case '\377': ! EMIT2('y'); EMIT2('\375'); EMIT2('\377'); return OK; default: --- 684,767 ---- { switch (c) { ! case 'A': case 0300: case 0301: case 0302: ! case 0303: case 0304: case 0305: ! EMIT2('A'); EMIT2(0300); EMIT2(0301); ! EMIT2(0302); EMIT2(0303); EMIT2(0304); ! EMIT2(0305); return OK; ! case 'C': case 0307: ! EMIT2('C'); EMIT2(0307); return OK; ! case 'E': case 0310: case 0311: case 0312: case 0313: ! EMIT2('E'); EMIT2(0310); EMIT2(0311); ! EMIT2(0312); EMIT2(0313); return OK; ! case 'I': case 0314: case 0315: case 0316: case 0317: ! EMIT2('I'); EMIT2(0314); EMIT2(0315); ! EMIT2(0316); EMIT2(0317); return OK; ! case 'N': case 0321: ! EMIT2('N'); EMIT2(0321); return OK; ! case 'O': case 0322: case 0323: case 0324: case 0325: ! case 0326: ! EMIT2('O'); EMIT2(0322); EMIT2(0323); ! EMIT2(0324); EMIT2(0325); EMIT2(0326); return OK; ! case 'U': case 0331: case 0332: case 0333: case 0334: ! EMIT2('U'); EMIT2(0331); EMIT2(0332); ! EMIT2(0333); EMIT2(0334); return OK; ! case 'Y': case 0335: ! EMIT2('Y'); EMIT2(0335); return OK; ! case 'a': case 0340: case 0341: case 0342: ! case 0343: case 0344: case 0345: ! EMIT2('a'); EMIT2(0340); EMIT2(0341); ! EMIT2(0342); EMIT2(0343); EMIT2(0344); ! EMIT2(0345); return OK; ! case 'c': case 0347: ! EMIT2('c'); EMIT2(0347); return OK; ! case 'e': case 0350: case 0351: case 0352: case 0353: ! EMIT2('e'); EMIT2(0350); EMIT2(0351); ! EMIT2(0352); EMIT2(0353); return OK; ! case 'i': case 0354: case 0355: case 0356: case 0357: ! EMIT2('i'); EMIT2(0354); EMIT2(0355); ! EMIT2(0356); EMIT2(0357); return OK; ! case 'n': case 0361: ! EMIT2('n'); EMIT2(0361); return OK; ! case 'o': case 0362: case 0363: case 0364: case 0365: ! case 0366: ! EMIT2('o'); EMIT2(0362); EMIT2(0363); ! EMIT2(0364); EMIT2(0365); EMIT2(0366); return OK; ! case 'u': case 0371: case 0372: case 0373: case 0374: ! EMIT2('u'); EMIT2(0371); EMIT2(0372); ! EMIT2(0373); EMIT2(0374); return OK; ! case 'y': case 0375: case 0377: ! EMIT2('y'); EMIT2(0375); EMIT2(0377); return OK; default: *************** *** 811,824 **** char_u *old_regparse = regparse; #endif int extra = 0; - int first; int emit_range; int negated; int result; int startc = -1; int endc = -1; int oldstartc = -1; - int glue; /* ID that will "glue" nodes together */ c = getchr(); switch (c) --- 808,819 ---- *************** *** 927,934 **** case Magic('n'): if (reg_string) ! /* In a string "\n" matches a newline character. */ ! EMIT(NL); else { /* In buffer text "\n" matches the end of a line. */ --- 922,929 ---- case Magic('n'): if (reg_string) ! /* In a string "\n" matches a newline character. */ ! EMIT(NL); else { /* In buffer text "\n" matches the end of a line. */ *************** *** 1160,1191 **** case Magic('['): collection: /* ! * Glue is emitted between several atoms from the []. ! * It is either NFA_OR, or NFA_CONCAT. ! * ! * [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation) ! * [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT ! * NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix ! * notation) ! * */ - - /* Emit negation atoms, if needed. - * The CONCAT below merges the NOT with the previous node. */ - #define TRY_NEG() \ - if (negated == TRUE) \ - { \ - EMIT(NFA_NOT); \ - } - - /* Emit glue between important nodes : CONCAT or OR. */ - #define EMIT_GLUE() \ - if (first == FALSE) \ - EMIT(glue); \ - else \ - first = FALSE; - p = regparse; endp = skip_anyof(p); if (*endp == ']') --- 1155,1169 ---- case Magic('['): collection: /* ! * [abc] uses NFA_START_COLL - NFA_END_COLL ! * [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL ! * Each character is produced as a regular state, using ! * NFA_CONCAT to bind them together. ! * Besides normal characters there can be: ! * - character classes NFA_CLASS_* ! * - ranges, two characters followed by NFA_RANGE. */ p = regparse; endp = skip_anyof(p); if (*endp == ']') *************** *** 1216,1236 **** * version that turns [abc] into 'a' OR 'b' OR 'c' */ startc = endc = oldstartc = -1; - first = TRUE; /* Emitting first atom in this sequence? */ negated = FALSE; - glue = NFA_OR; if (*regparse == '^') /* negated range */ { negated = TRUE; - glue = NFA_CONCAT; mb_ptr_adv(regparse); } if (*regparse == '-') { startc = '-'; EMIT(startc); ! TRY_NEG(); ! EMIT_GLUE(); mb_ptr_adv(regparse); } /* Emit the OR branches for each character in the [] */ --- 1194,1213 ---- * version that turns [abc] into 'a' OR 'b' OR 'c' */ startc = endc = oldstartc = -1; negated = FALSE; if (*regparse == '^') /* negated range */ { negated = TRUE; mb_ptr_adv(regparse); + EMIT(NFA_START_NEG_COLL); } + else + EMIT(NFA_START_COLL); if (*regparse == '-') { startc = '-'; EMIT(startc); ! EMIT(NFA_CONCAT); mb_ptr_adv(regparse); } /* Emit the OR branches for each character in the [] */ *************** *** 1306,1325 **** EMIT(NFA_CLASS_ESCAPE); break; } ! TRY_NEG(); ! EMIT_GLUE(); continue; } /* Try equivalence class [=a=] and the like */ if (equiclass != 0) { ! result = nfa_emit_equi_class(equiclass, negated); if (result == FAIL) { /* should never happen */ EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!")); } - EMIT_GLUE(); continue; } /* Try collating class like [. .] */ --- 1283,1300 ---- EMIT(NFA_CLASS_ESCAPE); break; } ! EMIT(NFA_CONCAT); continue; } /* Try equivalence class [=a=] and the like */ if (equiclass != 0) { ! result = nfa_emit_equi_class(equiclass); if (result == FAIL) { /* should never happen */ EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!")); } continue; } /* Try collating class like [. .] */ *************** *** 1391,1409 **** startc = oldstartc; if (startc > endc) EMSG_RET_FAIL(_(e_invrange)); #ifdef FEAT_MBYTE ! if (has_mbyte && ((*mb_char2len)(startc) > 1 || (*mb_char2len)(endc) > 1)) { ! if (endc > startc + 256) ! EMSG_RET_FAIL(_(e_invrange)); ! /* Emit the range. "startc" was already emitted, so ! * skip it. */ for (c = startc + 1; c <= endc; c++) { EMIT(c); ! TRY_NEG(); ! EMIT_GLUE(); } } else --- 1366,1397 ---- startc = oldstartc; if (startc > endc) EMSG_RET_FAIL(_(e_invrange)); + + if (endc > startc + 2) + { + /* Emit a range instead of the sequence of + * individual characters. */ + if (startc == 0) + /* \x00 is translated to \x0a, start at \x01. */ + EMIT(1); + else + --post_ptr; /* remove NFA_CONCAT */ + EMIT(endc); + EMIT(NFA_RANGE); + EMIT(NFA_CONCAT); + } + else #ifdef FEAT_MBYTE ! if (has_mbyte && ((*mb_char2len)(startc) > 1 || (*mb_char2len)(endc) > 1)) { ! /* Emit the characters in the range. ! * "startc" was already emitted, so skip it. ! * */ for (c = startc + 1; c <= endc; c++) { EMIT(c); ! EMIT(NFA_CONCAT); } } else *************** *** 1425,1432 **** #endif { EMIT(c); ! TRY_NEG(); ! EMIT_GLUE(); } } emit_range = FALSE; --- 1413,1419 ---- #endif { EMIT(c); ! EMIT(NFA_CONCAT); } } emit_range = FALSE; *************** *** 1434,1456 **** } else { ! /* ! * This char (startc) is not part of a range. Just * emit it. - * * Normally, simply emit startc. But if we get char * code=0 from a collating char, then replace it with * 0x0a. - * * This is needed to completely mimic the behaviour of ! * the backtracking engine. ! */ ! if (got_coll_char == TRUE && startc == 0) ! EMIT(0x0a); else ! EMIT(startc); ! TRY_NEG(); ! EMIT_GLUE(); } mb_ptr_adv(regparse); --- 1421,1449 ---- } else { ! /* This char (startc) is not part of a range. Just * emit it. * Normally, simply emit startc. But if we get char * code=0 from a collating char, then replace it with * 0x0a. * This is needed to completely mimic the behaviour of ! * the backtracking engine. */ ! if (startc == NFA_NEWL) ! { ! /* Line break can't be matched as part of the ! * collection, add an OR below. But not for negated ! * range. */ ! if (!negated) ! extra = ADD_NL; ! } else ! { ! if (got_coll_char == TRUE && startc == 0) ! EMIT(0x0a); ! else ! EMIT(startc); ! EMIT(NFA_CONCAT); ! } } mb_ptr_adv(regparse); *************** *** 1460,1479 **** if (*regparse == '-') /* if last, '-' is just a char */ { EMIT('-'); ! TRY_NEG(); ! EMIT_GLUE(); } mb_ptr_adv(regparse); /* skip the trailing ] */ regparse = endp; mb_ptr_adv(regparse); if (negated == TRUE) ! { ! /* Mark end of negated char range */ ! EMIT(NFA_END_NEG_RANGE); ! EMIT(NFA_CONCAT); ! } /* \_[] also matches \n but it's not negated */ if (extra == ADD_NL) --- 1453,1471 ---- if (*regparse == '-') /* if last, '-' is just a char */ { EMIT('-'); ! EMIT(NFA_CONCAT); } mb_ptr_adv(regparse); /* skip the trailing ] */ regparse = endp; mb_ptr_adv(regparse); + + /* Mark end of the collection. */ if (negated == TRUE) ! EMIT(NFA_END_NEG_COLL); ! else ! EMIT(NFA_END_COLL); /* \_[] also matches \n but it's not negated */ if (extra == ADD_NL) *************** *** 1532,1540 **** } } - #undef TRY_NEG - #undef EMIT_GLUE - return OK; } --- 1524,1529 ---- *************** *** 2091,2100 **** case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break; case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; - case NFA_NOT: STRCPY(code, "NFA_NOT "); break; case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break; case NFA_OR: STRCPY(code, "NFA_OR"); break; ! case NFA_END_NEG_RANGE: STRCPY(code, "NFA_END_NEG_RANGE"); break; case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break; case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break; case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break; --- 2080,2096 ---- case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break; case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break; case NFA_OR: STRCPY(code, "NFA_OR"); break; ! ! case NFA_START_COLL: STRCPY(code, "NFA_START_COLL"); break; ! case NFA_END_COLL: STRCPY(code, "NFA_END_COLL"); break; ! case NFA_START_NEG_COLL: STRCPY(code, "NFA_START_NEG_COLL"); break; ! case NFA_END_NEG_COLL: STRCPY(code, "NFA_END_NEG_COLL"); break; ! case NFA_RANGE: STRCPY(code, "NFA_RANGE"); break; ! case NFA_RANGE_MIN: STRCPY(code, "NFA_RANGE_MIN"); break; ! case NFA_RANGE_MAX: STRCPY(code, "NFA_RANGE_MAX"); break; ! case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break; case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break; case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break; *************** *** 2231,2238 **** fprintf(debugf, " %s", p); nfa_set_code(state->c); ! fprintf(debugf, "%s%s (%d) (id=%d)\n", ! state->negated ? "NOT " : "", code, state->c, abs(state->id)); if (state->id < 0) return; --- 2227,2238 ---- fprintf(debugf, " %s", p); nfa_set_code(state->c); ! fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n", ! state->negated ? "NOT " : "", ! code, ! state->c, ! abs(state->id), ! state->val); if (state->id < 0) return; *************** *** 2325,2330 **** --- 2325,2331 ---- s->c = c; s->out = out; s->out1 = out1; + s->val = 0; s->id = istate; s->lastlist[0] = 0; *************** *** 2565,2577 **** switch (*p) { case NFA_CONCAT: ! /* Catenation. ! * Pay attention: this operator does not exist ! * in the r.e. itself (it is implicit, really). ! * It is added when r.e. is translated to postfix ! * form in re2post(). ! * ! * No new state added here. */ if (nfa_calc_size == TRUE) { /* nstate += 0; */ --- 2566,2575 ---- switch (*p) { case NFA_CONCAT: ! /* Concatenation. ! * Pay attention: this operator does not exist in the r.e. itself ! * (it is implicit, really). It is added when r.e. is translated ! * to postfix form in re2post(). */ if (nfa_calc_size == TRUE) { /* nstate += 0; */ *************** *** 2583,2604 **** PUSH(frag(e1.start, e2.out)); break; - case NFA_NOT: - /* Negation of a character */ - if (nfa_calc_size == TRUE) - { - /* nstate += 0; */ - break; - } - e1 = POP(); - e1.start->negated = TRUE; - #ifdef FEAT_MBYTE - if (e1.start->c == NFA_COMPOSING) - e1.start->out1->negated = TRUE; - #endif - PUSH(e1); - break; - case NFA_OR: /* Alternation */ if (nfa_calc_size == TRUE) --- 2581,2586 ---- *************** *** 2672,2677 **** --- 2654,2696 ---- PUSH(frag(s, append(e.out, list1(&s->out)))); break; + case NFA_END_COLL: + case NFA_END_NEG_COLL: + /* On the stack is the sequence starting with NFA_START_COLL or + * NFA_START_NEG_COLL and all possible characters. Patch it to + * add the output to the start. */ + if (nfa_calc_size == TRUE) + { + nstate++; + break; + } + e = POP(); + s = alloc_state(NFA_END_COLL, NULL, NULL); + if (s == NULL) + goto theend; + patch(e.out, s); + e.start->out1 = s; + PUSH(frag(e.start, list1(&s->out))); + break; + + case NFA_RANGE: + /* Before this are two characters, the low and high end of a + * range. Turn them into two states with MIN and MAX. */ + if (nfa_calc_size == TRUE) + { + /* nstate += 0; */ + break; + } + e2 = POP(); + e1 = POP(); + e2.start->val = e2.start->c; + e2.start->c = NFA_RANGE_MAX; + e1.start->val = e1.start->c; + e1.start->c = NFA_RANGE_MIN; + patch(e1.out, e2.start); + PUSH(frag(e1.start, e2.out)); + break; + case NFA_SKIP_CHAR: /* Symbol of 0-length, Used in a repetition * with max/min count of 0 */ *************** *** 2990,2995 **** --- 3009,3016 ---- matchstate = &state_ptr[istate++]; /* the match state */ matchstate->c = NFA_MATCH; matchstate->out = matchstate->out1 = NULL; + matchstate->negated = FALSE; + matchstate->id = 0; patch(e.out, matchstate); ret = e.start; *************** *** 3308,3314 **** switch (state->c) { case NFA_SPLIT: - case NFA_NOT: case NFA_NOPEN: case NFA_SKIP_CHAR: case NFA_NCLOSE: --- 3329,3334 ---- *************** *** 3782,3788 **** default: /* should not be here :P */ ! EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class ")); } return FAIL; } --- 3802,3809 ---- default: /* should not be here :P */ ! EMSGN("E877: (NFA regexp) Invalid character class: %ld", class); ! return FAIL; } return FAIL; } *************** *** 4320,4327 **** addstate(thislist, start, m, 0); /* There are two cases when the NFA advances: 1. input char matches the ! * NFA node and 2. input char does not match the NFA node, but the next ! * node is NFA_NOT. The following macro calls addstate() according to * these rules. It is used A LOT, so use the "listtbl" table for speed */ listtbl[0][0] = NULL; listtbl[0][1] = neglist; --- 4341,4348 ---- addstate(thislist, start, m, 0); /* There are two cases when the NFA advances: 1. input char matches the ! * NFA node and 2. input char does not match the NFA node and the state ! * has the negated flag. The following macro calls addstate() according to * these rules. It is used A LOT, so use the "listtbl" table for speed */ listtbl[0][0] = NULL; listtbl[0][1] = neglist; *************** *** 4845,4860 **** ADD_POS_NEG_STATE(t->state); break; ! case NFA_END_NEG_RANGE: ! /* This follows a series of negated nodes, like: ! * NOT CHAR(x), NOT CHAR(y), etc. */ ! if (curc > 0) { ll = nextlist; ! add_state = t->state->out; add_off = clen; } break; case NFA_ANY: /* Any char except '\0', (end of input) does not match. */ --- 4866,4944 ---- ADD_POS_NEG_STATE(t->state); break; ! case NFA_START_COLL: ! case NFA_START_NEG_COLL: ! { ! /* What follows is a list of characters, until NFA_END_COLL. ! * One of them must match or none of them must match. */ ! nfa_state_T *state; ! int result_if_matched; ! int c1, c2; ! ! /* Never match EOL. If it's part of the collection it is added ! * as a separate state with an OR. */ ! if (curc == NUL) ! break; ! ! state = t->state->out; ! result_if_matched = (t->state->c == NFA_START_COLL); ! for (;;) { + if (state->c == NFA_END_COLL) + { + result = !result_if_matched; + break; + } + if (state->c == NFA_RANGE_MIN) + { + c1 = state->val; + state = state->out; /* advance to NFA_RANGE_MAX */ + c2 = state->val; + #ifdef ENABLE_LOG + fprintf(log_fd, "NFA_RANGE_MIN curc=%d c1=%d c2=%d\n", + curc, c1, c2); + #endif + if (curc >= c1 && curc <= c2) + { + result = result_if_matched; + break; + } + if (ireg_ic) + { + int curc_low = MB_TOLOWER(curc); + int done = FALSE; + + for ( ; c1 <= c2; ++c1) + if (MB_TOLOWER(c1) == curc_low) + { + result = result_if_matched; + done = TRUE; + break; + } + if (done) + break; + } + } + else if (state->c < 0 ? check_char_class(state->c, curc) + : (curc == state->c + || (ireg_ic && MB_TOLOWER(curc) + == MB_TOLOWER(state->c)))) + { + result = result_if_matched; + break; + } + state = state->out; + } + if (result) + { + /* next state is in out of the NFA_END_COLL, out1 of + * START points to the END state */ ll = nextlist; ! add_state = t->state->out1->out; add_off = clen; } break; + } case NFA_ANY: /* Any char except '\0', (end of input) does not match. */ *** ../vim-7.3.1136/src/version.c 2013-06-06 21:31:02.000000000 +0200 --- src/version.c 2013-06-07 13:21:57.000000000 +0200 *************** *** 730,731 **** --- 730,733 ---- { /* Add new patch number below this line */ + /**/ + 1137, /**/ -- From "know your smileys": :.-( Crying /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///