diff --git a/7.3.1137 b/7.3.1137 new file mode 100644 index 0000000..fb26417 --- /dev/null +++ b/7.3.1137 @@ -0,0 +1,966 @@ +To: vim_dev@googlegroups.com +Subject: Patch 7.3.1137 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.3.1137 +Problem: New regexp engine: collections are slow. +Solution: Handle all characters in one go. +Files: src/regexp_nfa.c + + +*** ../vim-7.3.1136/src/regexp_nfa.c 2013-06-06 18:46:00.000000000 +0200 +--- src/regexp_nfa.c 2013-06-07 13:40:58.000000000 +0200 +*************** +*** 34,48 **** + NFA_SPLIT = -1024, + NFA_MATCH, + NFA_SKIP_CHAR, /* matches a 0-length char */ +- NFA_END_NEG_RANGE, /* Used when expanding [^ab] */ + +! NFA_CONCAT, + NFA_OR, + NFA_STAR, /* greedy * */ + NFA_STAR_NONGREEDY, /* non-greedy * */ + NFA_QUEST, /* greedy \? */ + NFA_QUEST_NONGREEDY, /* non-greedy \? */ +- NFA_NOT, /* used for [^ab] negated char ranges */ + + NFA_BOL, /* ^ Begin line */ + NFA_EOL, /* $ End line */ +--- 34,56 ---- + NFA_SPLIT = -1024, + NFA_MATCH, + NFA_SKIP_CHAR, /* matches a 0-length char */ + +! NFA_START_COLL, /* [abc] start */ +! NFA_END_COLL, /* [abc] end */ +! NFA_START_NEG_COLL, /* [^abc] start */ +! NFA_END_NEG_COLL, /* [^abc] end (only used in postfix) */ +! NFA_RANGE, /* range of the two previous items (only +! * used in postfix) */ +! NFA_RANGE_MIN, /* low end of a range */ +! NFA_RANGE_MAX, /* high end of a range */ +! +! NFA_CONCAT, /* concatenate two previous items (only +! * used in postfix) */ + NFA_OR, + NFA_STAR, /* greedy * */ + NFA_STAR_NONGREEDY, /* non-greedy * */ + NFA_QUEST, /* greedy \? */ + NFA_QUEST_NONGREEDY, /* non-greedy \? */ + + NFA_BOL, /* ^ Begin line */ + NFA_EOL, /* $ End line */ +*************** +*** 260,266 **** + static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth)); + static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth)); + static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); +! static int nfa_emit_equi_class __ARGS((int c, int neg)); + static int nfa_regatom __ARGS((void)); + static int nfa_regpiece __ARGS((void)); + static int nfa_regconcat __ARGS((void)); +--- 268,274 ---- + static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth)); + static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth)); + static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); +! static int nfa_emit_equi_class __ARGS((int c)); + static int nfa_regatom __ARGS((void)); + static int nfa_regpiece __ARGS((void)); + static int nfa_regconcat __ARGS((void)); +*************** +*** 664,684 **** + * NOTE! When changing this function, also update reg_equi_class() + */ + static int +! nfa_emit_equi_class(c, neg) + int c; +- int neg; + { +! int first = TRUE; +! int glue = neg == TRUE ? NFA_CONCAT : NFA_OR; +! #define EMIT2(c) \ +! EMIT(c); \ +! if (neg == TRUE) { \ +! EMIT(NFA_NOT); \ +! } \ +! if (first == FALSE) \ +! EMIT(glue); \ +! else \ +! first = FALSE; \ + + #ifdef FEAT_MBYTE + if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 +--- 672,681 ---- + * NOTE! When changing this function, also update reg_equi_class() + */ + static int +! nfa_emit_equi_class(c) + int c; + { +! #define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT); + + #ifdef FEAT_MBYTE + if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 +*************** +*** 687,770 **** + { + switch (c) + { +! case 'A': case '\300': case '\301': case '\302': +! case '\303': case '\304': case '\305': +! EMIT2('A'); EMIT2('\300'); EMIT2('\301'); +! EMIT2('\302'); EMIT2('\303'); EMIT2('\304'); +! EMIT2('\305'); + return OK; + +! case 'C': case '\307': +! EMIT2('C'); EMIT2('\307'); + return OK; + +! case 'E': case '\310': case '\311': case '\312': case '\313': +! EMIT2('E'); EMIT2('\310'); EMIT2('\311'); +! EMIT2('\312'); EMIT2('\313'); + return OK; + +! case 'I': case '\314': case '\315': case '\316': case '\317': +! EMIT2('I'); EMIT2('\314'); EMIT2('\315'); +! EMIT2('\316'); EMIT2('\317'); + return OK; + +! case 'N': case '\321': +! EMIT2('N'); EMIT2('\321'); + return OK; + +! case 'O': case '\322': case '\323': case '\324': case '\325': +! case '\326': +! EMIT2('O'); EMIT2('\322'); EMIT2('\323'); +! EMIT2('\324'); EMIT2('\325'); EMIT2('\326'); + return OK; + +! case 'U': case '\331': case '\332': case '\333': case '\334': +! EMIT2('U'); EMIT2('\331'); EMIT2('\332'); +! EMIT2('\333'); EMIT2('\334'); + return OK; + +! case 'Y': case '\335': +! EMIT2('Y'); EMIT2('\335'); + return OK; + +! case 'a': case '\340': case '\341': case '\342': +! case '\343': case '\344': case '\345': +! EMIT2('a'); EMIT2('\340'); EMIT2('\341'); +! EMIT2('\342'); EMIT2('\343'); EMIT2('\344'); +! EMIT2('\345'); + return OK; + +! case 'c': case '\347': +! EMIT2('c'); EMIT2('\347'); + return OK; + +! case 'e': case '\350': case '\351': case '\352': case '\353': +! EMIT2('e'); EMIT2('\350'); EMIT2('\351'); +! EMIT2('\352'); EMIT2('\353'); + return OK; + +! case 'i': case '\354': case '\355': case '\356': case '\357': +! EMIT2('i'); EMIT2('\354'); EMIT2('\355'); +! EMIT2('\356'); EMIT2('\357'); + return OK; + +! case 'n': case '\361': +! EMIT2('n'); EMIT2('\361'); + return OK; + +! case 'o': case '\362': case '\363': case '\364': case '\365': +! case '\366': +! EMIT2('o'); EMIT2('\362'); EMIT2('\363'); +! EMIT2('\364'); EMIT2('\365'); EMIT2('\366'); + return OK; + +! case 'u': case '\371': case '\372': case '\373': case '\374': +! EMIT2('u'); EMIT2('\371'); EMIT2('\372'); +! EMIT2('\373'); EMIT2('\374'); + return OK; + +! case 'y': case '\375': case '\377': +! EMIT2('y'); EMIT2('\375'); EMIT2('\377'); + return OK; + + default: +--- 684,767 ---- + { + switch (c) + { +! case 'A': case 0300: case 0301: case 0302: +! case 0303: case 0304: case 0305: +! EMIT2('A'); EMIT2(0300); EMIT2(0301); +! EMIT2(0302); EMIT2(0303); EMIT2(0304); +! EMIT2(0305); + return OK; + +! case 'C': case 0307: +! EMIT2('C'); EMIT2(0307); + return OK; + +! case 'E': case 0310: case 0311: case 0312: case 0313: +! EMIT2('E'); EMIT2(0310); EMIT2(0311); +! EMIT2(0312); EMIT2(0313); + return OK; + +! case 'I': case 0314: case 0315: case 0316: case 0317: +! EMIT2('I'); EMIT2(0314); EMIT2(0315); +! EMIT2(0316); EMIT2(0317); + return OK; + +! case 'N': case 0321: +! EMIT2('N'); EMIT2(0321); + return OK; + +! case 'O': case 0322: case 0323: case 0324: case 0325: +! case 0326: +! EMIT2('O'); EMIT2(0322); EMIT2(0323); +! EMIT2(0324); EMIT2(0325); EMIT2(0326); + return OK; + +! case 'U': case 0331: case 0332: case 0333: case 0334: +! EMIT2('U'); EMIT2(0331); EMIT2(0332); +! EMIT2(0333); EMIT2(0334); + return OK; + +! case 'Y': case 0335: +! EMIT2('Y'); EMIT2(0335); + return OK; + +! case 'a': case 0340: case 0341: case 0342: +! case 0343: case 0344: case 0345: +! EMIT2('a'); EMIT2(0340); EMIT2(0341); +! EMIT2(0342); EMIT2(0343); EMIT2(0344); +! EMIT2(0345); + return OK; + +! case 'c': case 0347: +! EMIT2('c'); EMIT2(0347); + return OK; + +! case 'e': case 0350: case 0351: case 0352: case 0353: +! EMIT2('e'); EMIT2(0350); EMIT2(0351); +! EMIT2(0352); EMIT2(0353); + return OK; + +! case 'i': case 0354: case 0355: case 0356: case 0357: +! EMIT2('i'); EMIT2(0354); EMIT2(0355); +! EMIT2(0356); EMIT2(0357); + return OK; + +! case 'n': case 0361: +! EMIT2('n'); EMIT2(0361); + return OK; + +! case 'o': case 0362: case 0363: case 0364: case 0365: +! case 0366: +! EMIT2('o'); EMIT2(0362); EMIT2(0363); +! EMIT2(0364); EMIT2(0365); EMIT2(0366); + return OK; + +! case 'u': case 0371: case 0372: case 0373: case 0374: +! EMIT2('u'); EMIT2(0371); EMIT2(0372); +! EMIT2(0373); EMIT2(0374); + return OK; + +! case 'y': case 0375: case 0377: +! EMIT2('y'); EMIT2(0375); EMIT2(0377); + return OK; + + default: +*************** +*** 811,824 **** + char_u *old_regparse = regparse; + #endif + int extra = 0; +- int first; + int emit_range; + int negated; + int result; + int startc = -1; + int endc = -1; + int oldstartc = -1; +- int glue; /* ID that will "glue" nodes together */ + + c = getchr(); + switch (c) +--- 808,819 ---- +*************** +*** 927,934 **** + + case Magic('n'): + if (reg_string) +! /* In a string "\n" matches a newline character. */ +! EMIT(NL); + else + { + /* In buffer text "\n" matches the end of a line. */ +--- 922,929 ---- + + case Magic('n'): + if (reg_string) +! /* In a string "\n" matches a newline character. */ +! EMIT(NL); + else + { + /* In buffer text "\n" matches the end of a line. */ +*************** +*** 1160,1191 **** + case Magic('['): + collection: + /* +! * Glue is emitted between several atoms from the []. +! * It is either NFA_OR, or NFA_CONCAT. +! * +! * [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation) +! * [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT +! * NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix +! * notation) +! * + */ + +- +- /* Emit negation atoms, if needed. +- * The CONCAT below merges the NOT with the previous node. */ +- #define TRY_NEG() \ +- if (negated == TRUE) \ +- { \ +- EMIT(NFA_NOT); \ +- } +- +- /* Emit glue between important nodes : CONCAT or OR. */ +- #define EMIT_GLUE() \ +- if (first == FALSE) \ +- EMIT(glue); \ +- else \ +- first = FALSE; +- + p = regparse; + endp = skip_anyof(p); + if (*endp == ']') +--- 1155,1169 ---- + case Magic('['): + collection: + /* +! * [abc] uses NFA_START_COLL - NFA_END_COLL +! * [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL +! * Each character is produced as a regular state, using +! * NFA_CONCAT to bind them together. +! * Besides normal characters there can be: +! * - character classes NFA_CLASS_* +! * - ranges, two characters followed by NFA_RANGE. + */ + + p = regparse; + endp = skip_anyof(p); + if (*endp == ']') +*************** +*** 1216,1236 **** + * version that turns [abc] into 'a' OR 'b' OR 'c' + */ + startc = endc = oldstartc = -1; +- first = TRUE; /* Emitting first atom in this sequence? */ + negated = FALSE; +- glue = NFA_OR; + if (*regparse == '^') /* negated range */ + { + negated = TRUE; +- glue = NFA_CONCAT; + mb_ptr_adv(regparse); + } + if (*regparse == '-') + { + startc = '-'; + EMIT(startc); +! TRY_NEG(); +! EMIT_GLUE(); + mb_ptr_adv(regparse); + } + /* Emit the OR branches for each character in the [] */ +--- 1194,1213 ---- + * version that turns [abc] into 'a' OR 'b' OR 'c' + */ + startc = endc = oldstartc = -1; + negated = FALSE; + if (*regparse == '^') /* negated range */ + { + negated = TRUE; + mb_ptr_adv(regparse); ++ EMIT(NFA_START_NEG_COLL); + } ++ else ++ EMIT(NFA_START_COLL); + if (*regparse == '-') + { + startc = '-'; + EMIT(startc); +! EMIT(NFA_CONCAT); + mb_ptr_adv(regparse); + } + /* Emit the OR branches for each character in the [] */ +*************** +*** 1306,1325 **** + EMIT(NFA_CLASS_ESCAPE); + break; + } +! TRY_NEG(); +! EMIT_GLUE(); + continue; + } + /* Try equivalence class [=a=] and the like */ + if (equiclass != 0) + { +! result = nfa_emit_equi_class(equiclass, negated); + if (result == FAIL) + { + /* should never happen */ + EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!")); + } +- EMIT_GLUE(); + continue; + } + /* Try collating class like [. .] */ +--- 1283,1300 ---- + EMIT(NFA_CLASS_ESCAPE); + break; + } +! EMIT(NFA_CONCAT); + continue; + } + /* Try equivalence class [=a=] and the like */ + if (equiclass != 0) + { +! result = nfa_emit_equi_class(equiclass); + if (result == FAIL) + { + /* should never happen */ + EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!")); + } + continue; + } + /* Try collating class like [. .] */ +*************** +*** 1391,1409 **** + startc = oldstartc; + if (startc > endc) + EMSG_RET_FAIL(_(e_invrange)); + #ifdef FEAT_MBYTE +! if (has_mbyte && ((*mb_char2len)(startc) > 1 + || (*mb_char2len)(endc) > 1)) + { +! if (endc > startc + 256) +! EMSG_RET_FAIL(_(e_invrange)); +! /* Emit the range. "startc" was already emitted, so +! * skip it. */ + for (c = startc + 1; c <= endc; c++) + { + EMIT(c); +! TRY_NEG(); +! EMIT_GLUE(); + } + } + else +--- 1366,1397 ---- + startc = oldstartc; + if (startc > endc) + EMSG_RET_FAIL(_(e_invrange)); ++ ++ if (endc > startc + 2) ++ { ++ /* Emit a range instead of the sequence of ++ * individual characters. */ ++ if (startc == 0) ++ /* \x00 is translated to \x0a, start at \x01. */ ++ EMIT(1); ++ else ++ --post_ptr; /* remove NFA_CONCAT */ ++ EMIT(endc); ++ EMIT(NFA_RANGE); ++ EMIT(NFA_CONCAT); ++ } ++ else + #ifdef FEAT_MBYTE +! if (has_mbyte && ((*mb_char2len)(startc) > 1 + || (*mb_char2len)(endc) > 1)) + { +! /* Emit the characters in the range. +! * "startc" was already emitted, so skip it. +! * */ + for (c = startc + 1; c <= endc; c++) + { + EMIT(c); +! EMIT(NFA_CONCAT); + } + } + else +*************** +*** 1425,1432 **** + #endif + { + EMIT(c); +! TRY_NEG(); +! EMIT_GLUE(); + } + } + emit_range = FALSE; +--- 1413,1419 ---- + #endif + { + EMIT(c); +! EMIT(NFA_CONCAT); + } + } + emit_range = FALSE; +*************** +*** 1434,1456 **** + } + else + { +! /* +! * This char (startc) is not part of a range. Just + * emit it. +- * + * Normally, simply emit startc. But if we get char + * code=0 from a collating char, then replace it with + * 0x0a. +- * + * This is needed to completely mimic the behaviour of +! * the backtracking engine. +! */ +! if (got_coll_char == TRUE && startc == 0) +! EMIT(0x0a); + else +! EMIT(startc); +! TRY_NEG(); +! EMIT_GLUE(); + } + + mb_ptr_adv(regparse); +--- 1421,1449 ---- + } + else + { +! /* This char (startc) is not part of a range. Just + * emit it. + * Normally, simply emit startc. But if we get char + * code=0 from a collating char, then replace it with + * 0x0a. + * This is needed to completely mimic the behaviour of +! * the backtracking engine. */ +! if (startc == NFA_NEWL) +! { +! /* Line break can't be matched as part of the +! * collection, add an OR below. But not for negated +! * range. */ +! if (!negated) +! extra = ADD_NL; +! } + else +! { +! if (got_coll_char == TRUE && startc == 0) +! EMIT(0x0a); +! else +! EMIT(startc); +! EMIT(NFA_CONCAT); +! } + } + + mb_ptr_adv(regparse); +*************** +*** 1460,1479 **** + if (*regparse == '-') /* if last, '-' is just a char */ + { + EMIT('-'); +! TRY_NEG(); +! EMIT_GLUE(); + } + mb_ptr_adv(regparse); + + /* skip the trailing ] */ + regparse = endp; + mb_ptr_adv(regparse); + if (negated == TRUE) +! { +! /* Mark end of negated char range */ +! EMIT(NFA_END_NEG_RANGE); +! EMIT(NFA_CONCAT); +! } + + /* \_[] also matches \n but it's not negated */ + if (extra == ADD_NL) +--- 1453,1471 ---- + if (*regparse == '-') /* if last, '-' is just a char */ + { + EMIT('-'); +! EMIT(NFA_CONCAT); + } + mb_ptr_adv(regparse); + + /* skip the trailing ] */ + regparse = endp; + mb_ptr_adv(regparse); ++ ++ /* Mark end of the collection. */ + if (negated == TRUE) +! EMIT(NFA_END_NEG_COLL); +! else +! EMIT(NFA_END_COLL); + + /* \_[] also matches \n but it's not negated */ + if (extra == ADD_NL) +*************** +*** 1532,1540 **** + } + } + +- #undef TRY_NEG +- #undef EMIT_GLUE +- + return OK; + } + +--- 1524,1529 ---- +*************** +*** 2091,2100 **** + case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; + case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break; + case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; +- case NFA_NOT: STRCPY(code, "NFA_NOT "); break; + case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break; + case NFA_OR: STRCPY(code, "NFA_OR"); break; +! case NFA_END_NEG_RANGE: STRCPY(code, "NFA_END_NEG_RANGE"); break; + case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break; + case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break; + case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break; +--- 2080,2096 ---- + case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; + case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break; + case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; + case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break; + case NFA_OR: STRCPY(code, "NFA_OR"); break; +! +! case NFA_START_COLL: STRCPY(code, "NFA_START_COLL"); break; +! case NFA_END_COLL: STRCPY(code, "NFA_END_COLL"); break; +! case NFA_START_NEG_COLL: STRCPY(code, "NFA_START_NEG_COLL"); break; +! case NFA_END_NEG_COLL: STRCPY(code, "NFA_END_NEG_COLL"); break; +! case NFA_RANGE: STRCPY(code, "NFA_RANGE"); break; +! case NFA_RANGE_MIN: STRCPY(code, "NFA_RANGE_MIN"); break; +! case NFA_RANGE_MAX: STRCPY(code, "NFA_RANGE_MAX"); break; +! + case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break; + case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break; + case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break; +*************** +*** 2231,2238 **** + fprintf(debugf, " %s", p); + + nfa_set_code(state->c); +! fprintf(debugf, "%s%s (%d) (id=%d)\n", +! state->negated ? "NOT " : "", code, state->c, abs(state->id)); + if (state->id < 0) + return; + +--- 2227,2238 ---- + fprintf(debugf, " %s", p); + + nfa_set_code(state->c); +! fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n", +! state->negated ? "NOT " : "", +! code, +! state->c, +! abs(state->id), +! state->val); + if (state->id < 0) + return; + +*************** +*** 2325,2330 **** +--- 2325,2331 ---- + s->c = c; + s->out = out; + s->out1 = out1; ++ s->val = 0; + + s->id = istate; + s->lastlist[0] = 0; +*************** +*** 2565,2577 **** + switch (*p) + { + case NFA_CONCAT: +! /* Catenation. +! * Pay attention: this operator does not exist +! * in the r.e. itself (it is implicit, really). +! * It is added when r.e. is translated to postfix +! * form in re2post(). +! * +! * No new state added here. */ + if (nfa_calc_size == TRUE) + { + /* nstate += 0; */ +--- 2566,2575 ---- + switch (*p) + { + case NFA_CONCAT: +! /* Concatenation. +! * Pay attention: this operator does not exist in the r.e. itself +! * (it is implicit, really). It is added when r.e. is translated +! * to postfix form in re2post(). */ + if (nfa_calc_size == TRUE) + { + /* nstate += 0; */ +*************** +*** 2583,2604 **** + PUSH(frag(e1.start, e2.out)); + break; + +- case NFA_NOT: +- /* Negation of a character */ +- if (nfa_calc_size == TRUE) +- { +- /* nstate += 0; */ +- break; +- } +- e1 = POP(); +- e1.start->negated = TRUE; +- #ifdef FEAT_MBYTE +- if (e1.start->c == NFA_COMPOSING) +- e1.start->out1->negated = TRUE; +- #endif +- PUSH(e1); +- break; +- + case NFA_OR: + /* Alternation */ + if (nfa_calc_size == TRUE) +--- 2581,2586 ---- +*************** +*** 2672,2677 **** +--- 2654,2696 ---- + PUSH(frag(s, append(e.out, list1(&s->out)))); + break; + ++ case NFA_END_COLL: ++ case NFA_END_NEG_COLL: ++ /* On the stack is the sequence starting with NFA_START_COLL or ++ * NFA_START_NEG_COLL and all possible characters. Patch it to ++ * add the output to the start. */ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate++; ++ break; ++ } ++ e = POP(); ++ s = alloc_state(NFA_END_COLL, NULL, NULL); ++ if (s == NULL) ++ goto theend; ++ patch(e.out, s); ++ e.start->out1 = s; ++ PUSH(frag(e.start, list1(&s->out))); ++ break; ++ ++ case NFA_RANGE: ++ /* Before this are two characters, the low and high end of a ++ * range. Turn them into two states with MIN and MAX. */ ++ if (nfa_calc_size == TRUE) ++ { ++ /* nstate += 0; */ ++ break; ++ } ++ e2 = POP(); ++ e1 = POP(); ++ e2.start->val = e2.start->c; ++ e2.start->c = NFA_RANGE_MAX; ++ e1.start->val = e1.start->c; ++ e1.start->c = NFA_RANGE_MIN; ++ patch(e1.out, e2.start); ++ PUSH(frag(e1.start, e2.out)); ++ break; ++ + case NFA_SKIP_CHAR: + /* Symbol of 0-length, Used in a repetition + * with max/min count of 0 */ +*************** +*** 2990,2995 **** +--- 3009,3016 ---- + matchstate = &state_ptr[istate++]; /* the match state */ + matchstate->c = NFA_MATCH; + matchstate->out = matchstate->out1 = NULL; ++ matchstate->negated = FALSE; ++ matchstate->id = 0; + + patch(e.out, matchstate); + ret = e.start; +*************** +*** 3308,3314 **** + switch (state->c) + { + case NFA_SPLIT: +- case NFA_NOT: + case NFA_NOPEN: + case NFA_SKIP_CHAR: + case NFA_NCLOSE: +--- 3329,3334 ---- +*************** +*** 3782,3788 **** + + default: + /* should not be here :P */ +! EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class ")); + } + return FAIL; + } +--- 3802,3809 ---- + + default: + /* should not be here :P */ +! EMSGN("E877: (NFA regexp) Invalid character class: %ld", class); +! return FAIL; + } + return FAIL; + } +*************** +*** 4320,4327 **** + addstate(thislist, start, m, 0); + + /* There are two cases when the NFA advances: 1. input char matches the +! * NFA node and 2. input char does not match the NFA node, but the next +! * node is NFA_NOT. The following macro calls addstate() according to + * these rules. It is used A LOT, so use the "listtbl" table for speed */ + listtbl[0][0] = NULL; + listtbl[0][1] = neglist; +--- 4341,4348 ---- + addstate(thislist, start, m, 0); + + /* There are two cases when the NFA advances: 1. input char matches the +! * NFA node and 2. input char does not match the NFA node and the state +! * has the negated flag. The following macro calls addstate() according to + * these rules. It is used A LOT, so use the "listtbl" table for speed */ + listtbl[0][0] = NULL; + listtbl[0][1] = neglist; +*************** +*** 4845,4860 **** + ADD_POS_NEG_STATE(t->state); + break; + +! case NFA_END_NEG_RANGE: +! /* This follows a series of negated nodes, like: +! * NOT CHAR(x), NOT CHAR(y), etc. */ +! if (curc > 0) + { + ll = nextlist; +! add_state = t->state->out; + add_off = clen; + } + break; + + case NFA_ANY: + /* Any char except '\0', (end of input) does not match. */ +--- 4866,4944 ---- + ADD_POS_NEG_STATE(t->state); + break; + +! case NFA_START_COLL: +! case NFA_START_NEG_COLL: +! { +! /* What follows is a list of characters, until NFA_END_COLL. +! * One of them must match or none of them must match. */ +! nfa_state_T *state; +! int result_if_matched; +! int c1, c2; +! +! /* Never match EOL. If it's part of the collection it is added +! * as a separate state with an OR. */ +! if (curc == NUL) +! break; +! +! state = t->state->out; +! result_if_matched = (t->state->c == NFA_START_COLL); +! for (;;) + { ++ if (state->c == NFA_END_COLL) ++ { ++ result = !result_if_matched; ++ break; ++ } ++ if (state->c == NFA_RANGE_MIN) ++ { ++ c1 = state->val; ++ state = state->out; /* advance to NFA_RANGE_MAX */ ++ c2 = state->val; ++ #ifdef ENABLE_LOG ++ fprintf(log_fd, "NFA_RANGE_MIN curc=%d c1=%d c2=%d\n", ++ curc, c1, c2); ++ #endif ++ if (curc >= c1 && curc <= c2) ++ { ++ result = result_if_matched; ++ break; ++ } ++ if (ireg_ic) ++ { ++ int curc_low = MB_TOLOWER(curc); ++ int done = FALSE; ++ ++ for ( ; c1 <= c2; ++c1) ++ if (MB_TOLOWER(c1) == curc_low) ++ { ++ result = result_if_matched; ++ done = TRUE; ++ break; ++ } ++ if (done) ++ break; ++ } ++ } ++ else if (state->c < 0 ? check_char_class(state->c, curc) ++ : (curc == state->c ++ || (ireg_ic && MB_TOLOWER(curc) ++ == MB_TOLOWER(state->c)))) ++ { ++ result = result_if_matched; ++ break; ++ } ++ state = state->out; ++ } ++ if (result) ++ { ++ /* next state is in out of the NFA_END_COLL, out1 of ++ * START points to the END state */ + ll = nextlist; +! add_state = t->state->out1->out; + add_off = clen; + } + break; ++ } + + case NFA_ANY: + /* Any char except '\0', (end of input) does not match. */ +*** ../vim-7.3.1136/src/version.c 2013-06-06 21:31:02.000000000 +0200 +--- src/version.c 2013-06-07 13:21:57.000000000 +0200 +*************** +*** 730,731 **** +--- 730,733 ---- + { /* Add new patch number below this line */ ++ /**/ ++ 1137, + /**/ + +-- +From "know your smileys": + :.-( Crying + + /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ +/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ +\\\ an exciting new programming language -- http://www.Zimbu.org /// + \\\ help me help AIDS victims -- http://ICCF-Holland.org ///