diff --git a/7.3.970 b/7.3.970 new file mode 100644 index 0000000..5b74a5e --- /dev/null +++ b/7.3.970 @@ -0,0 +1,6775 @@ +To: vim_dev@googlegroups.com +Subject: Patch 7.3.970 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.3.970 +Problem: Syntax highlighting can be slow. +Solution: Include the NFA regexp engine. Add the 'regexpengine' option to + select which one is used. (various authors, including Ken Takata, + Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young) +Files: src/Make_cyg.mak, src/Make_ming.mak, src/Make_mvc.mak, + src/Makefile, src/regexp.c, src/regexp.h, src/regexp_nfa.c, + src/structs.h, src/testdir/Makefile, src/testdir/test64.in, + src/testdir/test64.ok, Filelist, runtime/doc/pattern.txt, + runtime/doc/option.txt, src/option.c, src/option.h, + src/testdir/test95.in, src/testdir/test95.ok, + src/testdir/Make_amiga.mak, src/testdir/Make_dos.mak, + src/testdir/Make_ming.mak, src/testdir/Make_os2.mak, + src/testdir/Make_vms.mms, src/testdir/Makefile + + +*** ../vim-7.3.969/src/Make_cyg.mak 2013-05-06 04:21:35.000000000 +0200 +--- src/Make_cyg.mak 2013-05-17 18:54:19.000000000 +0200 +*************** +*** 672,677 **** +--- 672,680 ---- + $(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_DEP) + $(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o + ++ $(OUTDIR)/regexp.o: regexp.c regexp_nfa.c $(INCL) ++ $(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o ++ + $(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP) + $(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o + +*** ../vim-7.3.969/src/Make_ming.mak 2013-03-07 15:16:16.000000000 +0100 +--- src/Make_ming.mak 2013-05-17 18:54:19.000000000 +0200 +*************** +*** 765,770 **** +--- 765,773 ---- + $(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_INCL) $(NBDEBUG_SRC) + $(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o + ++ $(OUTDIR)/regexp.o: regexp.c regexp_nfa.c $(INCL) ++ $(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o ++ + $(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP) + $(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o + +*** ../vim-7.3.969/src/Make_mvc.mak 2013-05-07 05:11:12.000000000 +0200 +--- src/Make_mvc.mak 2013-05-19 16:38:29.000000000 +0200 +*************** +*** 1166,1172 **** + + $(OUTDIR)/quickfix.obj: $(OUTDIR) quickfix.c $(INCL) + +! $(OUTDIR)/regexp.obj: $(OUTDIR) regexp.c $(INCL) + + $(OUTDIR)/screen.obj: $(OUTDIR) screen.c $(INCL) + +--- 1173,1179 ---- + + $(OUTDIR)/quickfix.obj: $(OUTDIR) quickfix.c $(INCL) + +! $(OUTDIR)/regexp.obj: $(OUTDIR) regexp.c regexp_nfa.c $(INCL) + + $(OUTDIR)/screen.obj: $(OUTDIR) screen.c $(INCL) + +*** ../vim-7.3.969/src/Makefile 2013-05-06 04:21:35.000000000 +0200 +--- src/Makefile 2013-05-18 23:42:30.000000000 +0200 +*************** +*** 454,460 **** + + # MULTIBYTE - To edit multi-byte characters. + # Uncomment this when you want to edit a multibyte language. +! # It's automatically enabled with big features or IME support. + # Note: Compile on a machine where setlocale() actually works, otherwise the + # configure tests may fail. + #CONF_OPT_MULTIBYTE = --enable-multibyte +--- 454,460 ---- + + # MULTIBYTE - To edit multi-byte characters. + # Uncomment this when you want to edit a multibyte language. +! # It's automatically enabled with normal features, GTK or IME support. + # Note: Compile on a machine where setlocale() actually works, otherwise the + # configure tests may fail. + #CONF_OPT_MULTIBYTE = --enable-multibyte +*************** +*** 2664,2670 **** + objects/quickfix.o: quickfix.c + $(CCC) -o $@ quickfix.c + +! objects/regexp.o: regexp.c + $(CCC) -o $@ regexp.c + + objects/screen.o: screen.c +--- 2664,2670 ---- + objects/quickfix.o: quickfix.c + $(CCC) -o $@ quickfix.c + +! objects/regexp.o: regexp.c regexp_nfa.c + $(CCC) -o $@ regexp.c + + objects/screen.o: screen.c +*************** +*** 2938,2947 **** + auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \ + regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \ + globals.h farsi.h arabic.h +! objects/regexp.o: regexp.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \ +! ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \ +! gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \ +! arabic.h + objects/screen.o: screen.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \ + ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \ + gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \ +--- 2938,2947 ---- + auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \ + regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \ + globals.h farsi.h arabic.h +! objects/regexp.o: regexp.c regexp_nfa.c vim.h auto/config.h feature.h os_unix.h \ +! auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \ +! regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \ +! globals.h farsi.h arabic.h + objects/screen.o: screen.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \ + ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \ + gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \ +*** ../vim-7.3.969/src/regexp.c 2013-03-19 17:42:10.000000000 +0100 +--- src/regexp.c 2013-05-18 19:47:06.000000000 +0200 +*************** +*** 38,46 **** + * Named character class support added by Walter Briscoe (1998 Jul 01) + */ + + #include "vim.h" + +! #undef DEBUG + + /* + * The "internal use only" fields in regexp.h are present to pass info from +--- 38,57 ---- + * Named character class support added by Walter Briscoe (1998 Jul 01) + */ + ++ /* Uncomment the first if you do not want to see debugging logs or files ++ * related to regular expressions, even when compiling with -DDEBUG. ++ * Uncomment the second to get the regexp debugging. */ ++ /* #undef DEBUG */ ++ /* #define DEBUG */ ++ + #include "vim.h" + +! #ifdef DEBUG +! /* show/save debugging data when BT engine is used */ +! # define BT_REGEXP_DUMP +! /* save the debugging data to a file instead of displaying it */ +! # define BT_REGEXP_LOG +! #endif + + /* + * The "internal use only" fields in regexp.h are present to pass info from +*************** +*** 326,334 **** + /* Used for an error (down from) vim_regcomp(): give the error message, set + * rc_did_emsg and return NULL */ + #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL) +- #define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL) + #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL) +! #define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) + + #define MAX_LIMIT (32767L << 16L) + +--- 337,346 ---- + /* Used for an error (down from) vim_regcomp(): give the error message, set + * rc_did_emsg and return NULL */ + #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL) + #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL) +! #define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL) +! #define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL) +! #define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) + + #define MAX_LIMIT (32767L << 16L) + +*************** +*** 336,346 **** + static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n)); + static char_u *cstrchr __ARGS((char_u *, int)); + + #ifdef DEBUG +- static void regdump __ARGS((char_u *, regprog_T *)); + static char_u *regprop __ARGS((char_u *)); + #endif + + #define NOT_MULTI 0 + #define MULTI_ONE 1 + #define MULTI_MULT 2 +--- 348,365 ---- + static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n)); + static char_u *cstrchr __ARGS((char_u *, int)); + ++ #ifdef BT_REGEXP_DUMP ++ static void regdump __ARGS((char_u *, bt_regprog_T *)); ++ #endif + #ifdef DEBUG + static char_u *regprop __ARGS((char_u *)); + #endif + ++ static char_u e_missingbracket[] = N_("E769: Missing ] after %s["); ++ static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%("); ++ static char_u e_unmatchedp[] = N_("E54: Unmatched %s("); ++ static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)"); ++ + #define NOT_MULTI 0 + #define MULTI_ONE 1 + #define MULTI_MULT 2 +*************** +*** 630,636 **** + }; + #endif + +! static int curchr; + + /* arguments for reg() */ + #define REG_NOPAREN 0 /* toplevel reg() */ +--- 649,661 ---- + }; + #endif + +! static int curchr; /* currently parsed character */ +! /* Previous character. Note: prevchr is sometimes -1 when we are not at the +! * start, eg in /[ ^I]^ the pattern was never found even if it existed, +! * because ^ was taken to be magic -- webb */ +! static int prevchr; +! static int prevprevchr; /* previous-previous character */ +! static int nextchr; /* used for ungetchr() */ + + /* arguments for reg() */ + #define REG_NOPAREN 0 /* toplevel reg() */ +*************** +*** 680,685 **** +--- 705,713 ---- + static void regtail __ARGS((char_u *, char_u *)); + static void regoptail __ARGS((char_u *, char_u *)); + ++ static regengine_T bt_regengine; ++ static regengine_T nfa_regengine; ++ + /* + * Return TRUE if compiled regular expression "prog" can match a line break. + */ +*************** +*** 762,767 **** +--- 790,796 ---- + /* + * Produce the bytes for equivalence class "c". + * Currently only handles latin1, latin9 and utf-8. ++ * NOTE: When changing this function, also change nfa_emit_equi_class() + */ + static void + reg_equi_class(c) +*************** +*** 1239,1246 **** + return p; + } + + /* +! * vim_regcomp() - compile a regular expression into internal code + * Returns the program in allocated space. Returns NULL for an error. + * + * We can't allocate space until we know how big the compiled form will be, +--- 1268,1278 ---- + return p; + } + ++ static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags)); ++ + /* +! * bt_regcomp() - compile a regular expression into internal code for the +! * traditional back track matcher. + * Returns the program in allocated space. Returns NULL for an error. + * + * We can't allocate space until we know how big the compiled form will be, +*************** +*** 1259,1270 **** + * of the structure of the compiled regexp. + * "re_flags": RE_MAGIC and/or RE_STRING. + */ +! regprog_T * +! vim_regcomp(expr, re_flags) + char_u *expr; + int re_flags; + { +! regprog_T *r; + char_u *scan; + char_u *longest; + int len; +--- 1291,1302 ---- + * of the structure of the compiled regexp. + * "re_flags": RE_MAGIC and/or RE_STRING. + */ +! static regprog_T * +! bt_regcomp(expr, re_flags) + char_u *expr; + int re_flags; + { +! bt_regprog_T *r; + char_u *scan; + char_u *longest; + int len; +*************** +*** 1291,1297 **** + #endif + + /* Allocate space. */ +! r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE); + if (r == NULL) + return NULL; + +--- 1323,1329 ---- + #endif + + /* Allocate space. */ +! r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE); + if (r == NULL) + return NULL; + +*************** +*** 1386,1395 **** + r->regmlen = len; + } + } +! #ifdef DEBUG + regdump(expr, r); + #endif +! return r; + } + + /* +--- 1418,1428 ---- + r->regmlen = len; + } + } +! #ifdef BT_REGEXP_DUMP + regdump(expr, r); + #endif +! r->engine = &bt_regengine; +! return (regprog_T *)r; + } + + /* +*************** +*** 1436,1442 **** + #endif + + /* +! * reg - regular expression, i.e. main body or parenthesized thing + * + * Caller must absorb opening parenthesis. + * +--- 1469,1475 ---- + #endif + + /* +! * Parse regular expression, i.e. main body or parenthesized thing. + * + * Caller must absorb opening parenthesis. + * +*************** +*** 1473,1479 **** + { + /* Make a MOPEN node. */ + if (regnpar >= NSUBEXP) +! EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); + parno = regnpar; + ++regnpar; + ret = regnode(MOPEN + parno); +--- 1506,1512 ---- + { + /* Make a MOPEN node. */ + if (regnpar >= NSUBEXP) +! EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); + parno = regnpar; + ++regnpar; + ret = regnode(MOPEN + parno); +*************** +*** 1534,1547 **** + else + #endif + if (paren == REG_NPAREN) +! EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL); + else +! EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL); + } + else if (paren == REG_NOPAREN && peekchr() != NUL) + { + if (curchr == Magic(')')) +! EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL); + else + EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */ + /* NOTREACHED */ +--- 1567,1580 ---- + else + #endif + if (paren == REG_NPAREN) +! EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL); + else +! EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL); + } + else if (paren == REG_NOPAREN && peekchr() != NUL) + { + if (curchr == Magic(')')) +! EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL); + else + EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */ + /* NOTREACHED */ +*************** +*** 1556,1562 **** + } + + /* +! * Handle one alternative of an | operator. + * Implements the & operator. + */ + static char_u * +--- 1589,1595 ---- + } + + /* +! * Parse one alternative of an | operator. + * Implements the & operator. + */ + static char_u * +*************** +*** 1599,1605 **** + } + + /* +! * Handle one alternative of an | or & operator. + * Implements the concatenation operator. + */ + static char_u * +--- 1632,1638 ---- + } + + /* +! * Parse one alternative of an | or & operator. + * Implements the concatenation operator. + */ + static char_u * +*************** +*** 1679,1685 **** + } + + /* +! * regpiece - something followed by possible [*+=] + * + * Note that the branching code sequences used for = and the general cases + * of * and + are somewhat optimized: they use the same NOTHING node as +--- 1712,1718 ---- + } + + /* +! * Parse something followed by possible [*+=]. + * + * Note that the branching code sequences used for = and the general cases + * of * and + are somewhat optimized: they use the same NOTHING node as +*************** +*** 1759,1765 **** + } + } + if (lop == END) +! EMSG_M_RET_NULL(_("E59: invalid character after %s@"), + reg_magic == MAGIC_ALL); + /* Look behind must match with behind_pos. */ + if (lop == BEHIND || lop == NOBEHIND) +--- 1792,1798 ---- + } + } + if (lop == END) +! EMSG2_RET_NULL(_("E59: invalid character after %s@"), + reg_magic == MAGIC_ALL); + /* Look behind must match with behind_pos. */ + if (lop == BEHIND || lop == NOBEHIND) +*************** +*** 1793,1799 **** + else + { + if (num_complex_braces >= 10) +! EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"), + reg_magic == MAGIC_ALL); + reginsert(BRACE_COMPLEX + num_complex_braces, ret); + regoptail(ret, regnode(BACK)); +--- 1826,1832 ---- + else + { + if (num_complex_braces >= 10) +! EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"), + reg_magic == MAGIC_ALL); + reginsert(BRACE_COMPLEX + num_complex_braces, ret); + regoptail(ret, regnode(BACK)); +*************** +*** 1820,1827 **** + return ret; + } + + /* +! * regatom - the lowest level + * + * Optimization: gobbles an entire sequence of ordinary characters so that + * it can turn them into a single node, which is smaller to store and +--- 1853,1872 ---- + return ret; + } + ++ /* When making changes to classchars also change nfa_classcodes. */ ++ static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; ++ static int classcodes[] = { ++ ANY, IDENT, SIDENT, KWORD, SKWORD, ++ FNAME, SFNAME, PRINT, SPRINT, ++ WHITE, NWHITE, DIGIT, NDIGIT, ++ HEX, NHEX, OCTAL, NOCTAL, ++ WORD, NWORD, HEAD, NHEAD, ++ ALPHA, NALPHA, LOWER, NLOWER, ++ UPPER, NUPPER ++ }; ++ + /* +! * Parse the lowest level. + * + * Optimization: gobbles an entire sequence of ordinary characters so that + * it can turn them into a single node, which is smaller to store and +*************** +*** 1836,1850 **** + int cpo_lit; /* 'cpoptions' contains 'l' flag */ + int cpo_bsl; /* 'cpoptions' contains '\' flag */ + int c; +- static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; +- static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD, +- FNAME, SFNAME, PRINT, SPRINT, +- WHITE, NWHITE, DIGIT, NDIGIT, +- HEX, NHEX, OCTAL, NOCTAL, +- WORD, NWORD, HEAD, NHEAD, +- ALPHA, NALPHA, LOWER, NLOWER, +- UPPER, NUPPER +- }; + char_u *p; + int extra = 0; + +--- 1881,1886 ---- +*************** +*** 2140,2146 **** + while ((c = getchr()) != ']') + { + if (c == NUL) +! EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["), + reg_magic == MAGIC_ALL); + br = regnode(BRANCH); + if (ret == NULL) +--- 2176,2182 ---- + while ((c = getchr()) != ']') + { + if (c == NUL) +! EMSG2_RET_NULL(_("E69: Missing ] after %s%%["), + reg_magic == MAGIC_ALL); + br = regnode(BRANCH); + if (ret == NULL) +*************** +*** 2156,2162 **** + return NULL; + } + if (ret == NULL) +! EMSG_M_RET_NULL(_("E70: Empty %s%%[]"), + reg_magic == MAGIC_ALL); + lastbranch = regnode(BRANCH); + br = regnode(NOTHING); +--- 2192,2198 ---- + return NULL; + } + if (ret == NULL) +! EMSG2_RET_NULL(_("E70: Empty %s%%[]"), + reg_magic == MAGIC_ALL); + lastbranch = regnode(BRANCH); + br = regnode(NOTHING); +*************** +*** 2200,2206 **** + } + + if (i < 0) +! EMSG_M_RET_NULL( + _("E678: Invalid character after %s%%[dxouU]"), + reg_magic == MAGIC_ALL); + #ifdef FEAT_MBYTE +--- 2236,2242 ---- + } + + if (i < 0) +! EMSG2_RET_NULL( + _("E678: Invalid character after %s%%[dxouU]"), + reg_magic == MAGIC_ALL); + #ifdef FEAT_MBYTE +*************** +*** 2272,2278 **** + } + } + +! EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"), + reg_magic == MAGIC_ALL); + } + } +--- 2308,2314 ---- + } + } + +! EMSG2_RET_NULL(_("E71: Invalid character after %s%%"), + reg_magic == MAGIC_ALL); + } + } +*************** +*** 2567,2574 **** + break; + } + else if (reg_strict) +! EMSG_M_RET_NULL(_("E769: Missing ] after %s["), +! reg_magic > MAGIC_OFF); + } + /* FALLTHROUGH */ + +--- 2603,2609 ---- + break; + } + else if (reg_strict) +! EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF); + } + /* FALLTHROUGH */ + +*************** +*** 2659,2665 **** + #endif + + /* +! * emit a node + * Return pointer to generated code. + */ + static char_u * +--- 2694,2700 ---- + #endif + + /* +! * Emit a node. + * Return pointer to generated code. + */ + static char_u * +*************** +*** 2711,2717 **** + #endif + + /* +! * reginsert - insert an operator in front of already-emitted operand + * + * Means relocating the operand. + */ +--- 2746,2752 ---- + #endif + + /* +! * Insert an operator in front of already-emitted operand + * + * Means relocating the operand. + */ +*************** +*** 2742,2748 **** + } + + /* +! * reginsert_limits - insert an operator in front of already-emitted operand. + * The operator has the given limit values as operands. Also set next pointer. + * + * Means relocating the operand. +--- 2777,2783 ---- + } + + /* +! * Insert an operator in front of already-emitted operand. + * The operator has the given limit values as operands. Also set next pointer. + * + * Means relocating the operand. +*************** +*** 2794,2800 **** + } + + /* +! * regtail - set the next-pointer at the end of a node chain + */ + static void + regtail(p, val) +--- 2829,2835 ---- + } + + /* +! * Set the next-pointer at the end of a node chain. + */ + static void + regtail(p, val) +*************** +*** 2835,2841 **** + } + + /* +! * regoptail - regtail on item after a BRANCH; nop if none + */ + static void + regoptail(p, val) +--- 2870,2876 ---- + } + + /* +! * Like regtail, on item after a BRANCH; nop if none. + */ + static void + regoptail(p, val) +*************** +*** 2851,2872 **** + } + + /* +! * getchr() - get the next character from the pattern. We know about +! * magic and such, so therefore we need a lexical analyzer. + */ + +- /* static int curchr; */ +- static int prevprevchr; +- static int prevchr; +- static int nextchr; /* used for ungetchr() */ +- /* +- * Note: prevchr is sometimes -1 when we are not at the start, +- * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was +- * taken to be magic -- webb +- */ + static int at_start; /* True when on the first character */ + static int prev_at_start; /* True when on the second character */ + + static void + initchr(str) + char_u *str; +--- 2886,2900 ---- + } + + /* +! * Functions for getting characters from the regexp input. + */ + + static int at_start; /* True when on the first character */ + static int prev_at_start; /* True when on the second character */ + ++ /* ++ * Start parsing at "str". ++ */ + static void + initchr(str) + char_u *str; +*************** +*** 2878,2883 **** +--- 2906,2914 ---- + prev_at_start = FALSE; + } + ++ /* ++ * Get the next character without advancing. ++ */ + static int + peekchr() + { +*************** +*** 3086,3091 **** +--- 3117,3126 ---- + prevprevchr = prpr; + } + ++ /* ++ * Get the next character from the pattern. We know about magic and such, so ++ * therefore we need a lexical analyzer. ++ */ + static int + getchr() + { +*************** +*** 3340,3347 **** + } regbehind_T; + + static char_u *reg_getline __ARGS((linenr_T lnum)); +! static long vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm)); +! static long regtry __ARGS((regprog_T *prog, colnr_T col)); + static void cleanup_subexpr __ARGS((void)); + #ifdef FEAT_SYN_HL + static void cleanup_zsubexpr __ARGS((void)); +--- 3375,3382 ---- + } regbehind_T; + + static char_u *reg_getline __ARGS((linenr_T lnum)); +! static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm)); +! static long regtry __ARGS((bt_regprog_T *prog, colnr_T col)); + static void cleanup_subexpr __ARGS((void)); + #ifdef FEAT_SYN_HL + static void cleanup_zsubexpr __ARGS((void)); +*************** +*** 3398,3404 **** + /* + * Sometimes need to save a copy of a line. Since alloc()/free() is very + * slow, we keep one allocated piece of memory and only re-allocate it when +! * it's too small. It's freed in vim_regexec_both() when finished. + */ + static char_u *reg_tofree = NULL; + static unsigned reg_tofreelen; +--- 3433,3439 ---- + /* + * Sometimes need to save a copy of a line. Since alloc()/free() is very + * slow, we keep one allocated piece of memory and only re-allocate it when +! * it's too small. It's freed in bt_regexec_both() when finished. + */ + static char_u *reg_tofree = NULL; + static unsigned reg_tofreelen; +*************** +*** 3556,3561 **** +--- 3591,3598 ---- + /* TRUE if using multi-line regexp. */ + #define REG_MULTI (reg_match == NULL) + ++ static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); ++ + /* + * Match a regexp against a string. + * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). +*************** +*** 3563,3570 **** + * + * Return TRUE if there is a match, FALSE if not. + */ +! int +! vim_regexec(rmp, line, col) + regmatch_T *rmp; + char_u *line; /* string to match against */ + colnr_T col; /* column to start looking for match */ +--- 3600,3607 ---- + * + * Return TRUE if there is a match, FALSE if not. + */ +! static int +! bt_regexec(rmp, line, col) + regmatch_T *rmp; + char_u *line; /* string to match against */ + colnr_T col; /* column to start looking for match */ +*************** +*** 3580,3595 **** + ireg_icombine = FALSE; + #endif + ireg_maxcol = 0; +! return (vim_regexec_both(line, col, NULL) != 0); + } + + #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ + || defined(FIND_REPLACE_DIALOG) || defined(PROTO) + /* + * Like vim_regexec(), but consider a "\n" in "line" to be a line break. + */ +! int +! vim_regexec_nl(rmp, line, col) + regmatch_T *rmp; + char_u *line; /* string to match against */ + colnr_T col; /* column to start looking for match */ +--- 3617,3635 ---- + ireg_icombine = FALSE; + #endif + ireg_maxcol = 0; +! return (bt_regexec_both(line, col, NULL) != 0); + } + + #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ + || defined(FIND_REPLACE_DIALOG) || defined(PROTO) ++ ++ static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); ++ + /* + * Like vim_regexec(), but consider a "\n" in "line" to be a line break. + */ +! static int +! bt_regexec_nl(rmp, line, col) + regmatch_T *rmp; + char_u *line; /* string to match against */ + colnr_T col; /* column to start looking for match */ +*************** +*** 3605,3614 **** + ireg_icombine = FALSE; + #endif + ireg_maxcol = 0; +! return (vim_regexec_both(line, col, NULL) != 0); + } + #endif + + /* + * Match a regexp against multiple lines. + * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). +--- 3645,3656 ---- + ireg_icombine = FALSE; + #endif + ireg_maxcol = 0; +! return (bt_regexec_both(line, col, NULL) != 0); + } + #endif + ++ static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm)); ++ + /* + * Match a regexp against multiple lines. + * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). +*************** +*** 3617,3624 **** + * Return zero if there is no match. Return number of lines contained in the + * match otherwise. + */ +! long +! vim_regexec_multi(rmp, win, buf, lnum, col, tm) + regmmatch_T *rmp; + win_T *win; /* window in which to search or NULL */ + buf_T *buf; /* buffer in which to search */ +--- 3659,3666 ---- + * Return zero if there is no match. Return number of lines contained in the + * match otherwise. + */ +! static long +! bt_regexec_multi(rmp, win, buf, lnum, col, tm) + regmmatch_T *rmp; + win_T *win; /* window in which to search or NULL */ + buf_T *buf; /* buffer in which to search */ +*************** +*** 3641,3647 **** + #endif + ireg_maxcol = rmp->rmm_maxcol; + +! r = vim_regexec_both(NULL, col, tm); + + return r; + } +--- 3683,3689 ---- + #endif + ireg_maxcol = rmp->rmm_maxcol; + +! r = bt_regexec_both(NULL, col, tm); + + return r; + } +*************** +*** 3651,3662 **** + * lines ("line" is NULL, use reg_getline()). + */ + static long +! vim_regexec_both(line, col, tm) + char_u *line; + colnr_T col; /* column to start looking for match */ + proftime_T *tm UNUSED; /* timeout limit or NULL */ + { +! regprog_T *prog; + char_u *s; + long retval = 0L; + +--- 3693,3704 ---- + * lines ("line" is NULL, use reg_getline()). + */ + static long +! bt_regexec_both(line, col, tm) + char_u *line; + colnr_T col; /* column to start looking for match */ + proftime_T *tm UNUSED; /* timeout limit or NULL */ + { +! bt_regprog_T *prog; + char_u *s; + long retval = 0L; + +*************** +*** 3682,3695 **** + + if (REG_MULTI) + { +! prog = reg_mmatch->regprog; + line = reg_getline((linenr_T)0); + reg_startpos = reg_mmatch->startpos; + reg_endpos = reg_mmatch->endpos; + } + else + { +! prog = reg_match->regprog; + reg_startp = reg_match->startp; + reg_endp = reg_match->endp; + } +--- 3724,3737 ---- + + if (REG_MULTI) + { +! prog = (bt_regprog_T *)reg_mmatch->regprog; + line = reg_getline((linenr_T)0); + reg_startpos = reg_mmatch->startpos; + reg_endpos = reg_mmatch->endpos; + } + else + { +! prog = (bt_regprog_T *)reg_match->regprog; + reg_startp = reg_match->startp; + reg_endp = reg_match->endp; + } +*************** +*** 3931,3937 **** + */ + static long + regtry(prog, col) +! regprog_T *prog; + colnr_T col; + { + reginput = regline + col; +--- 3973,3979 ---- + */ + static long + regtry(prog, col) +! bt_regprog_T *prog; + colnr_T col; + { + reginput = regline + col; +*************** +*** 4063,4069 **** + #define RA_NOMATCH 5 /* didn't match */ + + /* Make "regstack" and "backpos" empty. They are allocated and freed in +! * vim_regexec_both() to reduce malloc()/free() calls. */ + regstack.ga_len = 0; + backpos.ga_len = 0; + +--- 4105,4111 ---- + #define RA_NOMATCH 5 /* didn't match */ + + /* Make "regstack" and "backpos" empty. They are allocated and freed in +! * bt_regexec_both() to reduce malloc()/free() calls. */ + regstack.ga_len = 0; + backpos.ga_len = 0; + +*************** +*** 4072,4085 **** + */ + for (;;) + { +! /* Some patterns my cause a long time to match, even though they are not + * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */ + fast_breakcheck(); + + #ifdef DEBUG + if (scan != NULL && regnarrate) + { +! mch_errmsg(regprop(scan)); + mch_errmsg("(\n"); + } + #endif +--- 4114,4127 ---- + */ + for (;;) + { +! /* Some patterns may cause a long time to match, even though they are not + * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */ + fast_breakcheck(); + + #ifdef DEBUG + if (scan != NULL && regnarrate) + { +! mch_errmsg((char *)regprop(scan)); + mch_errmsg("(\n"); + } + #endif +*************** +*** 4100,4106 **** + #ifdef DEBUG + if (regnarrate) + { +! mch_errmsg(regprop(scan)); + mch_errmsg("...\n"); + # ifdef FEAT_SYN_HL + if (re_extmatch_in != NULL) +--- 4142,4148 ---- + #ifdef DEBUG + if (regnarrate) + { +! mch_errmsg((char *)regprop(scan)); + mch_errmsg("...\n"); + # ifdef FEAT_SYN_HL + if (re_extmatch_in != NULL) +*************** +*** 4112,4118 **** + { + mch_errmsg(" \""); + if (re_extmatch_in->matches[i] != NULL) +! mch_errmsg(re_extmatch_in->matches[i]); + mch_errmsg("\"\n"); + } + } +--- 4154,4160 ---- + { + mch_errmsg(" \""); + if (re_extmatch_in->matches[i] != NULL) +! mch_errmsg((char *)re_extmatch_in->matches[i]); + mch_errmsg("\"\n"); + } + } +*************** +*** 6091,6099 **** + static int + prog_magic_wrong() + { +! if (UCHARAT(REG_MULTI +! ? reg_mmatch->regprog->program +! : reg_match->regprog->program) != REGMAGIC) + { + EMSG(_(e_re_corr)); + return TRUE; +--- 6133,6146 ---- + static int + prog_magic_wrong() + { +! regprog_T *prog; +! +! prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog; +! if (prog->engine == &nfa_regengine) +! /* For NFA matcher we don't check the magic */ +! return FALSE; +! +! if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC) + { + EMSG(_(e_re_corr)); + return TRUE; +*************** +*** 6318,6324 **** + } + + +! #ifdef DEBUG + + /* + * regdump - dump a regexp onto stdout in vaguely comprehensible form +--- 6365,6371 ---- + } + + +! #ifdef BT_REGEXP_DUMP + + /* + * regdump - dump a regexp onto stdout in vaguely comprehensible form +*************** +*** 6326,6339 **** + static void + regdump(pattern, r) + char_u *pattern; +! regprog_T *r; + { + char_u *s; + int op = EXACTLY; /* Arbitrary non-END op. */ + char_u *next; + char_u *end = NULL; + +! printf("\r\nregcomp(%s):\r\n", pattern); + + s = r->program + 1; + /* +--- 6373,6394 ---- + static void + regdump(pattern, r) + char_u *pattern; +! bt_regprog_T *r; + { + char_u *s; + int op = EXACTLY; /* Arbitrary non-END op. */ + char_u *next; + char_u *end = NULL; ++ FILE *f; + +! #ifdef BT_REGEXP_LOG +! f = fopen("bt_regexp_log.log", "a"); +! #else +! f = stdout; +! #endif +! if (f == NULL) +! return; +! fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern); + + s = r->program + 1; + /* +*************** +*** 6343,6360 **** + while (op != END || s <= end) + { + op = OP(s); +! printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */ + next = regnext(s); + if (next == NULL) /* Next ptr. */ +! printf("(0)"); + else +! printf("(%d)", (int)((s - r->program) + (next - s))); + if (end < next) + end = next; + if (op == BRACE_LIMITS) + { + /* Two short ints */ +! printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s)); + s += 8; + } + s += 3; +--- 6398,6415 ---- + while (op != END || s <= end) + { + op = OP(s); +! fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */ + next = regnext(s); + if (next == NULL) /* Next ptr. */ +! fprintf(f, "(0)"); + else +! fprintf(f, "(%d)", (int)((s - r->program) + (next - s))); + if (end < next) + end = next; + if (op == BRACE_LIMITS) + { + /* Two short ints */ +! fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s)); + s += 8; + } + s += 3; +*************** +*** 6363,6387 **** + || op == EXACTLY) + { + /* Literal string, where present. */ + while (*s != NUL) +! printf("%c", *s++); + s++; + } +! printf("\r\n"); + } + + /* Header fields of interest. */ + if (r->regstart != NUL) +! printf("start `%s' 0x%x; ", r->regstart < 256 + ? (char *)transchar(r->regstart) + : "multibyte", r->regstart); + if (r->reganch) +! printf("anchored; "); + if (r->regmust != NULL) +! printf("must have \"%s\"", r->regmust); +! printf("\r\n"); + } + + /* + * regprop - printable representation of opcode + */ +--- 6418,6450 ---- + || op == EXACTLY) + { + /* Literal string, where present. */ ++ fprintf(f, "\nxxxxxxxxx\n"); + while (*s != NUL) +! fprintf(f, "%c", *s++); +! fprintf(f, "\nxxxxxxxxx\n"); + s++; + } +! fprintf(f, "\r\n"); + } + + /* Header fields of interest. */ + if (r->regstart != NUL) +! fprintf(f, "start `%s' 0x%x; ", r->regstart < 256 + ? (char *)transchar(r->regstart) + : "multibyte", r->regstart); + if (r->reganch) +! fprintf(f, "anchored; "); + if (r->regmust != NULL) +! fprintf(f, "must have \"%s\"", r->regmust); +! fprintf(f, "\r\n"); +! +! #ifdef BT_REGEXP_LOG +! fclose(f); +! #endif + } ++ #endif /* BT_REGEXP_DUMP */ + ++ #ifdef DEBUG + /* + * regprop - printable representation of opcode + */ +*************** +*** 6389,6400 **** + regprop(op) + char_u *op; + { +! char_u *p; +! static char_u buf[50]; + +! (void) strcpy(buf, ":"); + +! switch (OP(op)) + { + case BOL: + p = "BOL"; +--- 6452,6463 ---- + regprop(op) + char_u *op; + { +! char *p; +! static char buf[50]; + +! STRCPY(buf, ":"); + +! switch ((int) OP(op)) + { + case BOL: + p = "BOL"; +*************** +*** 6761,6770 **** + break; + } + if (p != NULL) +! (void) strcat(buf, p); +! return buf; + } +! #endif + + #ifdef FEAT_MBYTE + static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3)); +--- 6824,6833 ---- + break; + } + if (p != NULL) +! STRCAT(buf, p); +! return (char_u *)buf; + } +! #endif /* DEBUG */ + + #ifdef FEAT_MBYTE + static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3)); +*************** +*** 7667,7669 **** +--- 7730,7916 ---- + return retval; + } + #endif ++ ++ static regengine_T bt_regengine = ++ { ++ bt_regcomp, ++ bt_regexec, ++ #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ ++ || defined(FIND_REPLACE_DIALOG) || defined(PROTO) ++ bt_regexec_nl, ++ #endif ++ bt_regexec_multi ++ #ifdef DEBUG ++ ,(char_u *)"" ++ #endif ++ }; ++ ++ ++ #include "regexp_nfa.c" ++ ++ static regengine_T nfa_regengine = ++ { ++ nfa_regcomp, ++ nfa_regexec, ++ #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ ++ || defined(FIND_REPLACE_DIALOG) || defined(PROTO) ++ nfa_regexec_nl, ++ #endif ++ nfa_regexec_multi ++ #ifdef DEBUG ++ ,(char_u *)"" ++ #endif ++ }; ++ ++ /* Which regexp engine to use? Needed for vim_regcomp(). ++ * Must match with 'regexpengine'. */ ++ static int regexp_engine = 0; ++ #define AUTOMATIC_ENGINE 0 ++ #define BACKTRACKING_ENGINE 1 ++ #define NFA_ENGINE 2 ++ #ifdef DEBUG ++ static char_u regname[][30] = { ++ "AUTOMATIC Regexp Engine", ++ "BACKTACKING Regexp Engine", ++ "NFA Regexp Engine" ++ }; ++ #endif ++ ++ /* ++ * Compile a regular expression into internal code. ++ * Returns the program in allocated memory. Returns NULL for an error. ++ */ ++ regprog_T * ++ vim_regcomp(expr_arg, re_flags) ++ char_u *expr_arg; ++ int re_flags; ++ { ++ regprog_T *prog = NULL; ++ char_u *expr = expr_arg; ++ ++ syntax_error = FALSE; ++ regexp_engine = p_re; ++ ++ /* Check for prefix "\%#=", that sets the regexp engine */ ++ if (STRNCMP(expr, "\\%#=", 4) == 0) ++ { ++ int newengine = expr[4] - '0'; ++ ++ if (newengine == AUTOMATIC_ENGINE ++ || newengine == BACKTRACKING_ENGINE ++ || newengine == NFA_ENGINE) ++ { ++ regexp_engine = expr[4] - '0'; ++ expr += 5; ++ #ifdef DEBUG ++ EMSG3("New regexp mode selected (%d): %s", regexp_engine, ++ regname[newengine]); ++ #endif ++ } ++ else ++ { ++ EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used ")); ++ regexp_engine = AUTOMATIC_ENGINE; ++ } ++ } ++ #ifdef DEBUG ++ bt_regengine.expr = expr; ++ nfa_regengine.expr = expr; ++ #endif ++ ++ /* ++ * First try the NFA engine, unless backtracking was requested. ++ */ ++ if (regexp_engine != BACKTRACKING_ENGINE) ++ prog = nfa_regengine.regcomp(expr, re_flags); ++ else ++ prog = bt_regengine.regcomp(expr, re_flags); ++ ++ if (prog == NULL) /* error compiling regexp with initial engine */ ++ { ++ #ifdef DEBUG ++ if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */ ++ { ++ FILE *f; ++ f = fopen("debug.log", "a"); ++ if (f) ++ { ++ if (!syntax_error) ++ fprintf(f, "NFA engine could not handle \"%s\"\n", expr); ++ else ++ fprintf(f, "Syntax error in \"%s\"\n", expr); ++ fclose(f); ++ } ++ else ++ EMSG("(NFA) Could not open \"debug.log\" to write !!!"); ++ /* ++ if (syntax_error) ++ EMSG("NFA Regexp: Syntax Error !"); ++ */ ++ } ++ #endif ++ /* ++ * If NFA engine failed, then revert to the backtracking engine. ++ * Except when there was a syntax error, which was properly handled by ++ * NFA engine. ++ */ ++ if (regexp_engine == AUTOMATIC_ENGINE) ++ if (!syntax_error) ++ prog = bt_regengine.regcomp(expr, re_flags); ++ ++ } /* endif prog==NULL */ ++ ++ ++ return prog; ++ } ++ ++ /* ++ * Match a regexp against a string. ++ * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). ++ * Uses curbuf for line count and 'iskeyword'. ++ * ++ * Return TRUE if there is a match, FALSE if not. ++ */ ++ int ++ vim_regexec(rmp, line, col) ++ regmatch_T *rmp; ++ char_u *line; /* string to match against */ ++ colnr_T col; /* column to start looking for match */ ++ { ++ return rmp->regprog->engine->regexec(rmp, line, col); ++ } ++ ++ #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ ++ || defined(FIND_REPLACE_DIALOG) || defined(PROTO) ++ /* ++ * Like vim_regexec(), but consider a "\n" in "line" to be a line break. ++ */ ++ int ++ vim_regexec_nl(rmp, line, col) ++ regmatch_T *rmp; ++ char_u *line; ++ colnr_T col; ++ { ++ return rmp->regprog->engine->regexec_nl(rmp, line, col); ++ } ++ #endif ++ ++ /* ++ * Match a regexp against multiple lines. ++ * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). ++ * Uses curbuf for line count and 'iskeyword'. ++ * ++ * Return zero if there is no match. Return number of lines contained in the ++ * match otherwise. ++ */ ++ long ++ vim_regexec_multi(rmp, win, buf, lnum, col, tm) ++ regmmatch_T *rmp; ++ win_T *win; /* window in which to search or NULL */ ++ buf_T *buf; /* buffer in which to search */ ++ linenr_T lnum; /* nr of line to start looking for match */ ++ colnr_T col; /* column to start looking for match */ ++ proftime_T *tm; /* timeout limit or NULL */ ++ { ++ return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm); ++ } +*** ../vim-7.3.969/src/regexp.h 2010-08-15 21:57:27.000000000 +0200 +--- src/regexp.h 2013-05-18 17:24:42.000000000 +0200 +*************** +*** 22,41 **** + #define NSUBEXP 10 + + /* + * Structure returned by vim_regcomp() to pass on to vim_regexec(). + * These fields are only to be used in regexp.c! +! * See regep.c for an explanation. + */ + typedef struct + { + int regstart; + char_u reganch; + char_u *regmust; + int regmlen; +- unsigned regflags; + char_u reghasz; +! char_u program[1]; /* actually longer.. */ +! } regprog_T; + + /* + * Structure to be used for single-line matching. +--- 22,97 ---- + #define NSUBEXP 10 + + /* ++ * In the NFA engine: how many braces are allowed. ++ * TODO(RE): Use dynamic memory allocation instead of static, like here ++ */ ++ #define NFA_MAX_BRACES 20 ++ ++ typedef struct regengine regengine_T; ++ ++ typedef struct thread thread_T; ++ ++ /* + * Structure returned by vim_regcomp() to pass on to vim_regexec(). ++ * This is the general structure. For the actual matcher, two specific ++ * structures are used. See code below. ++ */ ++ typedef struct regprog ++ { ++ regengine_T *engine; ++ unsigned regflags; ++ } regprog_T; ++ ++ /* ++ * Structure used by the back track matcher. + * These fields are only to be used in regexp.c! +! * See regexp.c for an explanation. + */ + typedef struct + { ++ /* These two members implement regprog_T */ ++ regengine_T *engine; ++ unsigned regflags; ++ + int regstart; + char_u reganch; + char_u *regmust; + int regmlen; + char_u reghasz; +! char_u program[1]; /* actually longer.. */ +! } bt_regprog_T; +! +! /* +! * Structure representing a NFA state. +! * A NFA state may have no outgoing edge, when it is a NFA_MATCH state. +! */ +! typedef struct nfa_state nfa_state_T; +! struct nfa_state +! { +! int c; +! nfa_state_T *out; +! nfa_state_T *out1; +! int id; +! int lastlist; +! int visits; +! thread_T *lastthread; +! int negated; +! }; +! +! /* +! * Structure used by the NFA matcher. +! */ +! typedef struct +! { +! /* These two members implement regprog_T */ +! regengine_T *engine; +! unsigned regflags; +! +! regprog_T regprog; +! nfa_state_T *start; +! int nstate; +! nfa_state_T state[0]; /* actually longer.. */ +! } nfa_regprog_T; + + /* + * Structure to be used for single-line matching. +*************** +*** 78,81 **** +--- 134,151 ---- + char_u *matches[NSUBEXP]; + } reg_extmatch_T; + ++ struct regengine ++ { ++ regprog_T *(*regcomp)(char_u*, int); ++ int (*regexec)(regmatch_T*, char_u*, colnr_T); ++ #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ ++ || defined(FIND_REPLACE_DIALOG) || defined(PROTO) ++ int (*regexec_nl)(regmatch_T*, char_u*, colnr_T); ++ #endif ++ long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*); ++ #ifdef DEBUG ++ char_u *expr; ++ #endif ++ }; ++ + #endif /* _REGEXP_H */ +*** ../vim-7.3.969/src/regexp_nfa.c 2013-05-19 19:10:10.000000000 +0200 +--- src/regexp_nfa.c 2013-05-19 16:08:09.000000000 +0200 +*************** +*** 0 **** +--- 1,3819 ---- ++ /* vi:set ts=8 sts=4 sw=4: ++ * ++ * NFA regular expression implementation. ++ * ++ * This file is included in "regexp.c". ++ */ ++ ++ #ifdef DEBUG ++ /* Comment this out to disable log files. They can get pretty big */ ++ # define ENABLE_LOG ++ # define LOG_NAME "log_nfarun.log" ++ #endif ++ ++ /* Upper limit allowed for {m,n} repetitions handled by NFA */ ++ #define NFA_BRACES_MAXLIMIT 10 ++ /* For allocating space for the postfix representation */ ++ #define NFA_POSTFIX_MULTIPLIER (NFA_BRACES_MAXLIMIT + 2)*2 ++ /* Size of stack, used when converting the postfix regexp into NFA */ ++ #define NFA_STACK_SIZE 1024 ++ ++ enum ++ { ++ NFA_SPLIT = -1024, ++ NFA_MATCH, ++ NFA_SKIP_CHAR, /* matches a 0-length char */ ++ NFA_END_NEG_RANGE, /* Used when expanding [^ab] */ ++ ++ NFA_CONCAT, ++ NFA_OR, ++ NFA_STAR, ++ NFA_PLUS, ++ NFA_QUEST, ++ NFA_QUEST_NONGREEDY, /* Non-greedy version of \? */ ++ NFA_NOT, /* used for [^ab] negated char ranges */ ++ ++ NFA_BOL, /* ^ Begin line */ ++ NFA_EOL, /* $ End line */ ++ NFA_BOW, /* \< Begin word */ ++ NFA_EOW, /* \> End word */ ++ NFA_BOF, /* \%^ Begin file */ ++ NFA_EOF, /* \%$ End file */ ++ NFA_NEWL, ++ NFA_ZSTART, /* Used for \zs */ ++ NFA_ZEND, /* Used for \ze */ ++ NFA_NOPEN, /* Start of subexpression marked with \%( */ ++ NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */ ++ NFA_START_INVISIBLE, ++ NFA_END_INVISIBLE, ++ NFA_MULTIBYTE, /* Next nodes in NFA are part of the same ++ multibyte char */ ++ NFA_END_MULTIBYTE, /* End of multibyte char in the NFA */ ++ NFA_COMPOSING, /* Next nodes in NFA are part of the ++ composing multibyte char */ ++ NFA_END_COMPOSING, /* End of a composing char in the NFA */ ++ ++ /* The following are used only in the postfix form, not in the NFA */ ++ NFA_PREV_ATOM_NO_WIDTH, /* Used for \@= */ ++ NFA_PREV_ATOM_NO_WIDTH_NEG, /* Used for \@! */ ++ NFA_PREV_ATOM_JUST_BEFORE, /* Used for \@<= */ ++ NFA_PREV_ATOM_JUST_BEFORE_NEG, /* Used for \@ */ ++ ++ NFA_MOPEN, ++ NFA_MCLOSE = NFA_MOPEN + NSUBEXP, ++ ++ /* NFA_FIRST_NL */ ++ NFA_ANY = NFA_MCLOSE + NSUBEXP, /* Match any one character. */ ++ NFA_ANYOF, /* Match any character in this string. */ ++ NFA_ANYBUT, /* Match any character not in this string. */ ++ NFA_IDENT, /* Match identifier char */ ++ NFA_SIDENT, /* Match identifier char but no digit */ ++ NFA_KWORD, /* Match keyword char */ ++ NFA_SKWORD, /* Match word char but no digit */ ++ NFA_FNAME, /* Match file name char */ ++ NFA_SFNAME, /* Match file name char but no digit */ ++ NFA_PRINT, /* Match printable char */ ++ NFA_SPRINT, /* Match printable char but no digit */ ++ NFA_WHITE, /* Match whitespace char */ ++ NFA_NWHITE, /* Match non-whitespace char */ ++ NFA_DIGIT, /* Match digit char */ ++ NFA_NDIGIT, /* Match non-digit char */ ++ NFA_HEX, /* Match hex char */ ++ NFA_NHEX, /* Match non-hex char */ ++ NFA_OCTAL, /* Match octal char */ ++ NFA_NOCTAL, /* Match non-octal char */ ++ NFA_WORD, /* Match word char */ ++ NFA_NWORD, /* Match non-word char */ ++ NFA_HEAD, /* Match head char */ ++ NFA_NHEAD, /* Match non-head char */ ++ NFA_ALPHA, /* Match alpha char */ ++ NFA_NALPHA, /* Match non-alpha char */ ++ NFA_LOWER, /* Match lowercase char */ ++ NFA_NLOWER, /* Match non-lowercase char */ ++ NFA_UPPER, /* Match uppercase char */ ++ NFA_NUPPER, /* Match non-uppercase char */ ++ NFA_FIRST_NL = NFA_ANY + ADD_NL, ++ NFA_LAST_NL = NFA_NUPPER + ADD_NL, ++ ++ /* Character classes [:alnum:] etc */ ++ NFA_CLASS_ALNUM, ++ NFA_CLASS_ALPHA, ++ NFA_CLASS_BLANK, ++ NFA_CLASS_CNTRL, ++ NFA_CLASS_DIGIT, ++ NFA_CLASS_GRAPH, ++ NFA_CLASS_LOWER, ++ NFA_CLASS_PRINT, ++ NFA_CLASS_PUNCT, ++ NFA_CLASS_SPACE, ++ NFA_CLASS_UPPER, ++ NFA_CLASS_XDIGIT, ++ NFA_CLASS_TAB, ++ NFA_CLASS_RETURN, ++ NFA_CLASS_BACKSPACE, ++ NFA_CLASS_ESCAPE ++ }; ++ ++ /* Keep in sync with classchars. */ ++ static int nfa_classcodes[] = { ++ NFA_ANY, NFA_IDENT, NFA_SIDENT, NFA_KWORD,NFA_SKWORD, ++ NFA_FNAME, NFA_SFNAME, NFA_PRINT, NFA_SPRINT, ++ NFA_WHITE, NFA_NWHITE, NFA_DIGIT, NFA_NDIGIT, ++ NFA_HEX, NFA_NHEX, NFA_OCTAL, NFA_NOCTAL, ++ NFA_WORD, NFA_NWORD, NFA_HEAD, NFA_NHEAD, ++ NFA_ALPHA, NFA_NALPHA, NFA_LOWER, NFA_NLOWER, ++ NFA_UPPER, NFA_NUPPER ++ }; ++ ++ static char_u e_misplaced[] = N_("E866: (NFA regexp) Misplaced %c"); ++ ++ /* ++ * NFA errors can be of 3 types: ++ * *** NFA runtime errors, when something unknown goes wrong. The NFA fails ++ * silently and revert the to backtracking engine. ++ * syntax_error = FALSE; ++ * *** Regexp syntax errors, when the input regexp is not syntactically correct. ++ * The NFA engine displays an error message, and nothing else happens. ++ * syntax_error = TRUE ++ * *** Unsupported features, when the input regexp uses an operator that is not ++ * implemented in the NFA. The NFA engine fails silently, and reverts to the ++ * old backtracking engine. ++ * syntax_error = FALSE ++ * "The NFA fails" means that "compiling the regexp with the NFA fails": ++ * nfa_regcomp() returns FAIL. ++ */ ++ static int syntax_error = FALSE; ++ ++ /* NFA regexp \ze operator encountered. */ ++ static int nfa_has_zend = FALSE; ++ ++ static int *post_start; /* holds the postfix form of r.e. */ ++ static int *post_end; ++ static int *post_ptr; ++ ++ static int nstate; /* Number of states in the NFA. */ ++ static int istate; /* Index in the state vector, used in new_state() */ ++ static int nstate_max; /* Upper bound of estimated number of states. */ ++ ++ ++ static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags)); ++ static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); ++ static int nfa_emit_equi_class __ARGS((int c, int neg)); ++ static void nfa_inc __ARGS((char_u **p)); ++ static void nfa_dec __ARGS((char_u **p)); ++ static int nfa_regatom __ARGS((void)); ++ static int nfa_regpiece __ARGS((void)); ++ static int nfa_regconcat __ARGS((void)); ++ static int nfa_regbranch __ARGS((void)); ++ static int nfa_reg __ARGS((int paren)); ++ #ifdef DEBUG ++ static void nfa_set_code __ARGS((int c)); ++ static void nfa_postfix_dump __ARGS((char_u *expr, int retval)); ++ static void nfa_print_state __ARGS((FILE *debugf, nfa_state_T *state, int ident)); ++ static void nfa_dump __ARGS((nfa_regprog_T *prog)); ++ #endif ++ static int *re2post __ARGS((void)); ++ static nfa_state_T *new_state __ARGS((int c, nfa_state_T *out, nfa_state_T *out1)); ++ static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size)); ++ static int check_char_class __ARGS((int class, int c)); ++ static void st_error __ARGS((int *postfix, int *end, int *p)); ++ static void nfa_save_listids __ARGS((nfa_state_T *start, int *list)); ++ static void nfa_restore_listids __ARGS((nfa_state_T *start, int *list)); ++ static void nfa_set_null_listids __ARGS((nfa_state_T *start)); ++ static void nfa_set_neg_listids __ARGS((nfa_state_T *start)); ++ static long nfa_regtry __ARGS((nfa_state_T *start, colnr_T col)); ++ static long nfa_regexec_both __ARGS((char_u *line, colnr_T col)); ++ static regprog_T *nfa_regcomp __ARGS((char_u *expr, int re_flags)); ++ static int nfa_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); ++ static long nfa_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm)); ++ ++ /* helper functions used when doing re2post() ... regatom() parsing */ ++ #define EMIT(c) do { \ ++ if (post_ptr >= post_end) \ ++ return FAIL; \ ++ *post_ptr++ = c; \ ++ } while (0) ++ ++ #define EMIT_MBYTE(c) \ ++ len = (*mb_char2bytes)(c, buf); \ ++ EMIT(buf[0]); \ ++ for (i = 1; i < len; i++) \ ++ { \ ++ EMIT(buf[i]); \ ++ EMIT(NFA_CONCAT); \ ++ } \ ++ EMIT(NFA_MULTIBYTE); ++ ++ #define EMIT_COMPOSING_UTF(input) \ ++ len = utfc_ptr2len(input); \ ++ EMIT(input[0]); \ ++ for (i = 1; i < len; i++) \ ++ { \ ++ EMIT(input[i]); \ ++ EMIT(NFA_CONCAT); \ ++ } \ ++ EMIT(NFA_COMPOSING); ++ ++ /* ++ * Initialize internal variables before NFA compilation. ++ * Return OK on success, FAIL otherwise. ++ */ ++ static int ++ nfa_regcomp_start(expr, re_flags) ++ char_u *expr; ++ int re_flags; /* see vim_regcomp() */ ++ { ++ int postfix_size; ++ ++ nstate = 0; ++ istate = 0; ++ /* A reasonable estimation for size */ ++ nstate_max = (STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER; ++ ++ /* Size for postfix representation of expr */ ++ postfix_size = sizeof(*post_start) * nstate_max; ++ post_start = (int *)lalloc(postfix_size, TRUE); ++ if (post_start == NULL) ++ return FAIL; ++ vim_memset(post_start, 0, postfix_size); ++ post_ptr = post_start; ++ post_end = post_start + postfix_size; ++ nfa_has_zend = FALSE; ++ ++ regcomp_start(expr, re_flags); ++ ++ return OK; ++ } ++ ++ /* ++ * Search between "start" and "end" and try to recognize a ++ * character class in expanded form. For example [0-9]. ++ * On success, return the id the character class to be emitted. ++ * On failure, return 0 (=FAIL) ++ * Start points to the first char of the range, while end should point ++ * to the closing brace. ++ */ ++ static int ++ nfa_recognize_char_class(start, end, extra_newl) ++ char_u *start; ++ char_u *end; ++ int extra_newl; ++ { ++ int i; ++ /* Each of these variables takes up a char in "config[]", ++ * in the order they are here. */ ++ int not = FALSE, af = FALSE, AF = FALSE, az = FALSE, AZ = FALSE, ++ o7 = FALSE, o9 = FALSE, underscore = FALSE, newl = FALSE; ++ char_u *p; ++ #define NCONFIGS 16 ++ int classid[NCONFIGS] = { ++ NFA_DIGIT, NFA_NDIGIT, NFA_HEX, NFA_NHEX, ++ NFA_OCTAL, NFA_NOCTAL, NFA_WORD, NFA_NWORD, ++ NFA_HEAD, NFA_NHEAD, NFA_ALPHA, NFA_NALPHA, ++ NFA_LOWER, NFA_NLOWER, NFA_UPPER, NFA_NUPPER ++ }; ++ char_u myconfig[9]; ++ char_u config[NCONFIGS][9] = { ++ "000000100", /* digit */ ++ "100000100", /* non digit */ ++ "011000100", /* hex-digit */ ++ "111000100", /* non hex-digit */ ++ "000001000", /* octal-digit */ ++ "100001000", /* [^0-7] */ ++ "000110110", /* [0-9A-Za-z_] */ ++ "100110110", /* [^0-9A-Za-z_] */ ++ "000110010", /* head of word */ ++ "100110010", /* not head of word */ ++ "000110000", /* alphabetic char a-z */ ++ "100110000", /* non alphabetic char */ ++ "000100000", /* lowercase letter */ ++ "100100000", /* non lowercase */ ++ "000010000", /* uppercase */ ++ "100010000" /* non uppercase */ ++ }; ++ ++ if (extra_newl == TRUE) ++ newl = TRUE; ++ ++ if (*end != ']') ++ return FAIL; ++ p = start; ++ if (*p == '^') ++ { ++ not = TRUE; ++ p ++; ++ } ++ ++ while (p < end) ++ { ++ if (p + 2 < end && *(p + 1) == '-') ++ { ++ switch (*p) ++ { ++ case '0': ++ if (*(p + 2) == '9') ++ { ++ o9 = TRUE; ++ break; ++ } ++ else ++ if (*(p + 2) == '7') ++ { ++ o7 = TRUE; ++ break; ++ } ++ case 'a': ++ if (*(p + 2) == 'z') ++ { ++ az = TRUE; ++ break; ++ } ++ else ++ if (*(p + 2) == 'f') ++ { ++ af = TRUE; ++ break; ++ } ++ case 'A': ++ if (*(p + 2) == 'Z') ++ { ++ AZ = TRUE; ++ break; ++ } ++ else ++ if (*(p + 2) == 'F') ++ { ++ AF = TRUE; ++ break; ++ } ++ /* FALLTHROUGH */ ++ default: ++ return FAIL; ++ } ++ p += 3; ++ } ++ else if (p + 1 < end && *p == '\\' && *(p + 1) == 'n') ++ { ++ newl = TRUE; ++ p += 2; ++ } ++ else if (*p == '_') ++ { ++ underscore = TRUE; ++ p ++; ++ } ++ else if (*p == '\n') ++ { ++ newl = TRUE; ++ p ++; ++ } ++ else ++ return FAIL; ++ } /* while (p < end) */ ++ ++ if (p != end) ++ return FAIL; ++ ++ /* build the config that represents the ranges we gathered */ ++ STRCPY(myconfig, "000000000"); ++ if (not == TRUE) ++ myconfig[0] = '1'; ++ if (af == TRUE) ++ myconfig[1] = '1'; ++ if (AF == TRUE) ++ myconfig[2] = '1'; ++ if (az == TRUE) ++ myconfig[3] = '1'; ++ if (AZ == TRUE) ++ myconfig[4] = '1'; ++ if (o7 == TRUE) ++ myconfig[5] = '1'; ++ if (o9 == TRUE) ++ myconfig[6] = '1'; ++ if (underscore == TRUE) ++ myconfig[7] = '1'; ++ if (newl == TRUE) ++ { ++ myconfig[8] = '1'; ++ extra_newl = ADD_NL; ++ } ++ /* try to recognize character classes */ ++ for (i = 0; i < NCONFIGS; i++) ++ if (STRNCMP(myconfig, config[i],8) == 0) ++ return classid[i] + extra_newl; ++ ++ /* fallthrough => no success so far */ ++ return FAIL; ++ ++ #undef NCONFIGS ++ } ++ ++ /* ++ * Produce the bytes for equivalence class "c". ++ * Currently only handles latin1, latin9 and utf-8. ++ * Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is ++ * equivalent to 'a OR b OR c' ++ * ++ * NOTE! When changing this function, also update reg_equi_class() ++ */ ++ static int ++ nfa_emit_equi_class(c, neg) ++ int c; ++ int neg; ++ { ++ int first = TRUE; ++ int glue = neg == TRUE ? NFA_CONCAT : NFA_OR; ++ #define EMIT2(c) \ ++ EMIT(c); \ ++ if (neg == TRUE) { \ ++ EMIT(NFA_NOT); \ ++ } \ ++ if (first == FALSE) \ ++ EMIT(glue); \ ++ else \ ++ first = FALSE; \ ++ ++ #ifdef FEAT_MBYTE ++ if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 ++ || STRCMP(p_enc, "iso-8859-15") == 0) ++ #endif ++ { ++ switch (c) ++ { ++ case 'A': case '\300': case '\301': case '\302': ++ case '\303': case '\304': case '\305': ++ EMIT2('A'); EMIT2('\300'); EMIT2('\301'); ++ EMIT2('\302'); EMIT2('\303'); EMIT2('\304'); ++ EMIT2('\305'); ++ return OK; ++ ++ case 'C': case '\307': ++ EMIT2('C'); EMIT2('\307'); ++ return OK; ++ ++ case 'E': case '\310': case '\311': case '\312': case '\313': ++ EMIT2('E'); EMIT2('\310'); EMIT2('\311'); ++ EMIT2('\312'); EMIT2('\313'); ++ return OK; ++ ++ case 'I': case '\314': case '\315': case '\316': case '\317': ++ EMIT2('I'); EMIT2('\314'); EMIT2('\315'); ++ EMIT2('\316'); EMIT2('\317'); ++ return OK; ++ ++ case 'N': case '\321': ++ EMIT2('N'); EMIT2('\321'); ++ return OK; ++ ++ case 'O': case '\322': case '\323': case '\324': case '\325': ++ case '\326': ++ EMIT2('O'); EMIT2('\322'); EMIT2('\323'); ++ EMIT2('\324'); EMIT2('\325'); EMIT2('\326'); ++ return OK; ++ ++ case 'U': case '\331': case '\332': case '\333': case '\334': ++ EMIT2('U'); EMIT2('\331'); EMIT2('\332'); ++ EMIT2('\333'); EMIT2('\334'); ++ return OK; ++ ++ case 'Y': case '\335': ++ EMIT2('Y'); EMIT2('\335'); ++ return OK; ++ ++ case 'a': case '\340': case '\341': case '\342': ++ case '\343': case '\344': case '\345': ++ EMIT2('a'); EMIT2('\340'); EMIT2('\341'); ++ EMIT2('\342'); EMIT2('\343'); EMIT2('\344'); ++ EMIT2('\345'); ++ return OK; ++ ++ case 'c': case '\347': ++ EMIT2('c'); EMIT2('\347'); ++ return OK; ++ ++ case 'e': case '\350': case '\351': case '\352': case '\353': ++ EMIT2('e'); EMIT2('\350'); EMIT2('\351'); ++ EMIT2('\352'); EMIT2('\353'); ++ return OK; ++ ++ case 'i': case '\354': case '\355': case '\356': case '\357': ++ EMIT2('i'); EMIT2('\354'); EMIT2('\355'); ++ EMIT2('\356'); EMIT2('\357'); ++ return OK; ++ ++ case 'n': case '\361': ++ EMIT2('n'); EMIT2('\361'); ++ return OK; ++ ++ case 'o': case '\362': case '\363': case '\364': case '\365': ++ case '\366': ++ EMIT2('o'); EMIT2('\362'); EMIT2('\363'); ++ EMIT2('\364'); EMIT2('\365'); EMIT2('\366'); ++ return OK; ++ ++ case 'u': case '\371': case '\372': case '\373': case '\374': ++ EMIT2('u'); EMIT2('\371'); EMIT2('\372'); ++ EMIT2('\373'); EMIT2('\374'); ++ return OK; ++ ++ case 'y': case '\375': case '\377': ++ EMIT2('y'); EMIT2('\375'); EMIT2('\377'); ++ return OK; ++ ++ default: ++ return FAIL; ++ } ++ } ++ ++ EMIT(c); ++ return OK; ++ #undef EMIT2 ++ } ++ ++ /* ++ * Code to parse regular expression. ++ * ++ * We try to reuse parsing functions in regexp.c to ++ * minimize surprise and keep the syntax consistent. ++ */ ++ ++ /* ++ * Increments the pointer "p" by one (multi-byte) character. ++ */ ++ static void ++ nfa_inc(p) ++ char_u **p; ++ { ++ #ifdef FEAT_MBYTE ++ if (has_mbyte) ++ mb_ptr2char_adv(p); ++ else ++ #endif ++ *p = *p + 1; ++ } ++ ++ /* ++ * Decrements the pointer "p" by one (multi-byte) character. ++ */ ++ static void ++ nfa_dec(p) ++ char_u **p; ++ { ++ #ifdef FEAT_MBYTE ++ char_u *p2, *oldp; ++ ++ if (has_mbyte) ++ { ++ oldp = *p; ++ /* Try to find the multibyte char that advances to the current ++ * position. */ ++ do ++ { ++ *p = *p - 1; ++ p2 = *p; ++ mb_ptr2char_adv(&p2); ++ } while (p2 != oldp); ++ } ++ #else ++ *p = *p - 1; ++ #endif ++ } ++ ++ /* ++ * Parse the lowest level. ++ * ++ * An atom can be one of a long list of items. Many atoms match one character ++ * in the text. It is often an ordinary character or a character class. ++ * Braces can be used to make a pattern into an atom. The "\z(\)" construct ++ * is only for syntax highlighting. ++ * ++ * atom ::= ordinary-atom ++ * or \( pattern \) ++ * or \%( pattern \) ++ * or \z( pattern \) ++ */ ++ static int ++ nfa_regatom() ++ { ++ int c; ++ int charclass; ++ int equiclass; ++ int collclass; ++ int got_coll_char; ++ char_u *p; ++ char_u *endp; ++ #ifdef FEAT_MBYTE ++ char_u *old_regparse = regparse; ++ int clen; ++ int len; ++ static char_u buf[30]; ++ int i; ++ #endif ++ int extra = 0; ++ int first; ++ int emit_range; ++ int negated; ++ int result; ++ int startc = -1; ++ int endc = -1; ++ int oldstartc = -1; ++ int cpo_lit; /* 'cpoptions' contains 'l' flag */ ++ int cpo_bsl; /* 'cpoptions' contains '\' flag */ ++ int glue; /* ID that will "glue" nodes together */ ++ ++ cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL; ++ cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL; ++ ++ c = getchr(); ++ ++ #ifdef FEAT_MBYTE ++ /* clen has the length of the current char, without composing chars */ ++ clen = (*mb_char2len)(c); ++ if (has_mbyte && clen > 1) ++ goto nfa_do_multibyte; ++ #endif ++ switch (c) ++ { ++ case Magic('^'): ++ EMIT(NFA_BOL); ++ break; ++ ++ case Magic('$'): ++ EMIT(NFA_EOL); ++ #if defined(FEAT_SYN_HL) || defined(PROTO) ++ had_eol = TRUE; ++ #endif ++ break; ++ ++ case Magic('<'): ++ EMIT(NFA_BOW); ++ break; ++ ++ case Magic('>'): ++ EMIT(NFA_EOW); ++ break; ++ ++ case Magic('_'): ++ c = no_Magic(getchr()); ++ if (c == '^') /* "\_^" is start-of-line */ ++ { ++ EMIT(NFA_BOL); ++ break; ++ } ++ if (c == '$') /* "\_$" is end-of-line */ ++ { ++ EMIT(NFA_EOL); ++ #if defined(FEAT_SYN_HL) || defined(PROTO) ++ had_eol = TRUE; ++ #endif ++ break; ++ } ++ ++ extra = ADD_NL; ++ ++ /* "\_[" is collection plus newline */ ++ if (c == '[') ++ /* TODO: make this work ++ * goto collection; */ ++ return FAIL; ++ ++ /* "\_x" is character class plus newline */ ++ /*FALLTHROUGH*/ ++ ++ /* ++ * Character classes. ++ */ ++ case Magic('.'): ++ case Magic('i'): ++ case Magic('I'): ++ case Magic('k'): ++ case Magic('K'): ++ case Magic('f'): ++ case Magic('F'): ++ case Magic('p'): ++ case Magic('P'): ++ case Magic('s'): ++ case Magic('S'): ++ case Magic('d'): ++ case Magic('D'): ++ case Magic('x'): ++ case Magic('X'): ++ case Magic('o'): ++ case Magic('O'): ++ case Magic('w'): ++ case Magic('W'): ++ case Magic('h'): ++ case Magic('H'): ++ case Magic('a'): ++ case Magic('A'): ++ case Magic('l'): ++ case Magic('L'): ++ case Magic('u'): ++ case Magic('U'): ++ p = vim_strchr(classchars, no_Magic(c)); ++ if (p == NULL) ++ { ++ return FAIL; /* runtime error */ ++ } ++ #ifdef FEAT_MBYTE ++ /* When '.' is followed by a composing char ignore the dot, so that ++ * the composing char is matched here. */ ++ if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) ++ { ++ c = getchr(); ++ goto nfa_do_multibyte; ++ } ++ #endif ++ EMIT(nfa_classcodes[p - classchars]); ++ if (extra == ADD_NL) ++ { ++ EMIT(NFA_NEWL); ++ EMIT(NFA_OR); ++ regflags |= RF_HASNL; ++ } ++ break; ++ ++ case Magic('n'): ++ if (reg_string) ++ /* In a string "\n" matches a newline character. */ ++ EMIT(NL); ++ else ++ { ++ /* In buffer text "\n" matches the end of a line. */ ++ EMIT(NFA_NEWL); ++ regflags |= RF_HASNL; ++ } ++ break; ++ ++ case Magic('('): ++ if (nfa_reg(REG_PAREN) == FAIL) ++ return FAIL; /* cascaded error */ ++ break; ++ ++ case NUL: ++ syntax_error = TRUE; ++ EMSG_RET_FAIL(_("E865: (NFA) Regexp end encountered prematurely")); ++ ++ case Magic('|'): ++ case Magic('&'): ++ case Magic(')'): ++ syntax_error = TRUE; ++ EMSG2(_(e_misplaced), no_Magic(c)); ++ return FAIL; ++ ++ case Magic('='): ++ case Magic('?'): ++ case Magic('+'): ++ case Magic('@'): ++ case Magic('*'): ++ case Magic('{'): ++ /* these should follow an atom, not form an atom */ ++ syntax_error = TRUE; ++ EMSG2(_(e_misplaced), no_Magic(c)); ++ return FAIL; ++ ++ case Magic('~'): /* previous substitute pattern */ ++ /* Not supported yet */ ++ return FAIL; ++ ++ case Magic('1'): ++ case Magic('2'): ++ case Magic('3'): ++ case Magic('4'): ++ case Magic('5'): ++ case Magic('6'): ++ case Magic('7'): ++ case Magic('8'): ++ case Magic('9'): ++ /* not supported yet */ ++ return FAIL; ++ ++ case Magic('z'): ++ c = no_Magic(getchr()); ++ switch (c) ++ { ++ case 's': ++ EMIT(NFA_ZSTART); ++ break; ++ case 'e': ++ EMIT(NFA_ZEND); ++ nfa_has_zend = TRUE; ++ /* TODO: Currently \ze does not work properly. */ ++ return FAIL; ++ /* break; */ ++ case '1': ++ case '2': ++ case '3': ++ case '4': ++ case '5': ++ case '6': ++ case '7': ++ case '8': ++ case '9': ++ case '(': ++ /* \z1...\z9 and \z( not yet supported */ ++ return FAIL; ++ default: ++ syntax_error = TRUE; ++ EMSG2(_("E867: (NFA) Unknown operator '\\z%c'"), ++ no_Magic(c)); ++ return FAIL; ++ } ++ break; ++ ++ case Magic('%'): ++ c = no_Magic(getchr()); ++ switch (c) ++ { ++ /* () without a back reference */ ++ case '(': ++ if (nfa_reg(REG_NPAREN) == FAIL) ++ return FAIL; ++ EMIT(NFA_NOPEN); ++ break; ++ ++ case 'd': /* %d123 decimal */ ++ case 'o': /* %o123 octal */ ++ case 'x': /* %xab hex 2 */ ++ case 'u': /* %uabcd hex 4 */ ++ case 'U': /* %U1234abcd hex 8 */ ++ /* Not yet supported */ ++ return FAIL; ++ ++ c = coll_get_char(); ++ #ifdef FEAT_MBYTE ++ if ((*mb_char2len)(c) > 1) ++ { ++ EMIT_MBYTE(c); ++ } ++ else ++ #endif ++ EMIT(c); ++ break; ++ ++ /* Catch \%^ and \%$ regardless of where they appear in the ++ * pattern -- regardless of whether or not it makes sense. */ ++ case '^': ++ EMIT(NFA_BOF); ++ /* Not yet supported */ ++ return FAIL; ++ break; ++ ++ case '$': ++ EMIT(NFA_EOF); ++ /* Not yet supported */ ++ return FAIL; ++ break; ++ ++ case '#': ++ /* not supported yet */ ++ return FAIL; ++ break; ++ ++ case 'V': ++ /* not supported yet */ ++ return FAIL; ++ break; ++ ++ case '[': ++ /* \%[abc] not supported yet */ ++ return FAIL; ++ ++ default: ++ /* not supported yet */ ++ return FAIL; ++ } ++ break; ++ ++ /* collection: */ ++ case Magic('['): ++ /* ++ * Glue is emitted between several atoms from the []. ++ * It is either NFA_OR, or NFA_CONCAT. ++ * ++ * [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation) ++ * [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT ++ * NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix ++ * notation) ++ * ++ */ ++ ++ ++ /* Emit negation atoms, if needed. ++ * The CONCAT below merges the NOT with the previous node. */ ++ #define TRY_NEG() \ ++ if (negated == TRUE) \ ++ { \ ++ EMIT(NFA_NOT); \ ++ } ++ ++ /* Emit glue between important nodes : CONCAT or OR. */ ++ #define EMIT_GLUE() \ ++ if (first == FALSE) \ ++ EMIT(glue); \ ++ else \ ++ first = FALSE; ++ ++ p = regparse; ++ endp = skip_anyof(p); ++ if (*endp == ']') ++ { ++ /* ++ * Try to reverse engineer character classes. For example, ++ * recognize that [0-9] stands for \d and [A-Za-z_] with \h, ++ * and perform the necessary substitutions in the NFA. ++ */ ++ result = nfa_recognize_char_class(regparse, endp, ++ extra == ADD_NL); ++ if (result != FAIL) ++ { ++ if (result >= NFA_DIGIT && result <= NFA_NUPPER) ++ EMIT(result); ++ else /* must be char class + newline */ ++ { ++ EMIT(result - ADD_NL); ++ EMIT(NFA_NEWL); ++ EMIT(NFA_OR); ++ } ++ regparse = endp; ++ nfa_inc(®parse); ++ return OK; ++ } ++ /* ++ * Failed to recognize a character class. Use the simple ++ * version that turns [abc] into 'a' OR 'b' OR 'c' ++ */ ++ startc = endc = oldstartc = -1; ++ first = TRUE; /* Emitting first atom in this sequence? */ ++ negated = FALSE; ++ glue = NFA_OR; ++ if (*regparse == '^') /* negated range */ ++ { ++ negated = TRUE; ++ glue = NFA_CONCAT; ++ nfa_inc(®parse); ++ } ++ if (*regparse == '-') ++ { ++ startc = '-'; ++ EMIT(startc); ++ TRY_NEG(); ++ EMIT_GLUE(); ++ nfa_inc(®parse); ++ } ++ /* Emit the OR branches for each character in the [] */ ++ emit_range = FALSE; ++ while (regparse < endp) ++ { ++ oldstartc = startc; ++ startc = -1; ++ got_coll_char = FALSE; ++ if (*regparse == '[') ++ { ++ /* Check for [: :], [= =], [. .] */ ++ equiclass = collclass = 0; ++ charclass = get_char_class(®parse); ++ if (charclass == CLASS_NONE) ++ { ++ equiclass = get_equi_class(®parse); ++ if (equiclass == 0) ++ collclass = get_coll_element(®parse); ++ } ++ ++ /* Character class like [:alpha:] */ ++ if (charclass != CLASS_NONE) ++ { ++ switch (charclass) ++ { ++ case CLASS_ALNUM: ++ EMIT(NFA_CLASS_ALNUM); ++ break; ++ case CLASS_ALPHA: ++ EMIT(NFA_CLASS_ALPHA); ++ break; ++ case CLASS_BLANK: ++ EMIT(NFA_CLASS_BLANK); ++ break; ++ case CLASS_CNTRL: ++ EMIT(NFA_CLASS_CNTRL); ++ break; ++ case CLASS_DIGIT: ++ EMIT(NFA_CLASS_DIGIT); ++ break; ++ case CLASS_GRAPH: ++ EMIT(NFA_CLASS_GRAPH); ++ break; ++ case CLASS_LOWER: ++ EMIT(NFA_CLASS_LOWER); ++ break; ++ case CLASS_PRINT: ++ EMIT(NFA_CLASS_PRINT); ++ break; ++ case CLASS_PUNCT: ++ EMIT(NFA_CLASS_PUNCT); ++ break; ++ case CLASS_SPACE: ++ EMIT(NFA_CLASS_SPACE); ++ break; ++ case CLASS_UPPER: ++ EMIT(NFA_CLASS_UPPER); ++ break; ++ case CLASS_XDIGIT: ++ EMIT(NFA_CLASS_XDIGIT); ++ break; ++ case CLASS_TAB: ++ EMIT(NFA_CLASS_TAB); ++ break; ++ case CLASS_RETURN: ++ EMIT(NFA_CLASS_RETURN); ++ break; ++ case CLASS_BACKSPACE: ++ EMIT(NFA_CLASS_BACKSPACE); ++ break; ++ case CLASS_ESCAPE: ++ EMIT(NFA_CLASS_ESCAPE); ++ break; ++ } ++ TRY_NEG(); ++ EMIT_GLUE(); ++ continue; ++ } ++ /* Try equivalence class [=a=] and the like */ ++ if (equiclass != 0) ++ { ++ result = nfa_emit_equi_class(equiclass, negated); ++ if (result == FAIL) ++ { ++ /* should never happen */ ++ EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!")); ++ } ++ EMIT_GLUE(); ++ continue; ++ } ++ /* Try collating class like [. .] */ ++ if (collclass != 0) ++ { ++ startc = collclass; /* allow [.a.]-x as a range */ ++ /* Will emit the proper atom at the end of the ++ * while loop. */ ++ } ++ } ++ /* Try a range like 'a-x' or '\t-z' */ ++ if (*regparse == '-') ++ { ++ emit_range = TRUE; ++ startc = oldstartc; ++ nfa_inc(®parse); ++ continue; /* reading the end of the range */ ++ } ++ ++ /* Now handle simple and escaped characters. ++ * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim ++ * accepts "\t", "\e", etc., but only when the 'l' flag in ++ * 'cpoptions' is not included. ++ * Posix doesn't recognize backslash at all. ++ */ ++ if (*regparse == '\\' ++ && !cpo_bsl ++ && regparse + 1 <= endp ++ && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL ++ || (!cpo_lit ++ && vim_strchr(REGEXP_ABBR, regparse[1]) ++ != NULL) ++ ) ++ ) ++ { ++ nfa_inc(®parse); ++ ++ if (*regparse == 'n' || *regparse == 'n') ++ startc = reg_string ? NL : NFA_NEWL; ++ else ++ if (*regparse == 'd' ++ || *regparse == 'o' ++ || *regparse == 'x' ++ || *regparse == 'u' ++ || *regparse == 'U' ++ ) ++ { ++ /* TODO(RE) This needs more testing */ ++ startc = coll_get_char(); ++ got_coll_char = TRUE; ++ nfa_dec(®parse); ++ } ++ else ++ { ++ /* \r,\t,\e,\b */ ++ startc = backslash_trans(*regparse); ++ } ++ } ++ ++ /* Normal printable char */ ++ if (startc == -1) ++ #ifdef FEAT_MBYTE ++ startc = (*mb_ptr2char)(regparse); ++ #else ++ startc = *regparse; ++ #endif ++ ++ /* Previous char was '-', so this char is end of range. */ ++ if (emit_range) ++ { ++ endc = startc; startc = oldstartc; ++ if (startc > endc) ++ EMSG_RET_FAIL(_(e_invrange)); ++ #ifdef FEAT_MBYTE ++ if (has_mbyte && ((*mb_char2len)(startc) > 1 ++ || (*mb_char2len)(endc) > 1)) ++ { ++ if (endc > startc + 256) ++ EMSG_RET_FAIL(_(e_invrange)); ++ /* Emit the range. "startc" was already emitted, so ++ * skip it. */ ++ for (c = startc + 1; c <= endc; c++) ++ { ++ if ((*mb_char2len)(c) > 1) ++ { ++ EMIT_MBYTE(c); ++ } ++ else ++ EMIT(c); ++ TRY_NEG(); ++ EMIT_GLUE(); ++ } ++ emit_range = FALSE; ++ } ++ else ++ #endif ++ { ++ #ifdef EBCDIC ++ int alpha_only = FALSE; ++ ++ /* for alphabetical range skip the gaps ++ * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */ ++ if (isalpha(startc) && isalpha(endc)) ++ alpha_only = TRUE; ++ #endif ++ /* Emit the range. "startc" was already emitted, so ++ * skip it. */ ++ for (c = startc + 1; c <= endc; c++) ++ #ifdef EBCDIC ++ if (!alpha_only || isalpha(startc)) ++ #endif ++ { ++ EMIT(c); ++ TRY_NEG(); ++ EMIT_GLUE(); ++ } ++ emit_range = FALSE; ++ } ++ } ++ else ++ { ++ /* ++ * This char (startc) is not part of a range. Just ++ * emit it. ++ * ++ * Normally, simply emit startc. But if we get char ++ * code=0 from a collating char, then replace it with ++ * 0x0a. ++ * ++ * This is needed to completely mimic the behaviour of ++ * the backtracking engine. ++ */ ++ if (got_coll_char == TRUE && startc == 0) ++ EMIT(0x0a); ++ else ++ #ifdef FEAT_MBYTE ++ if ((*mb_char2len)(startc) > 1) ++ { ++ EMIT_MBYTE(startc); ++ } ++ else ++ #endif ++ EMIT(startc); ++ TRY_NEG(); ++ EMIT_GLUE(); ++ } ++ ++ nfa_inc(®parse); ++ } /* while (p < endp) */ ++ ++ nfa_dec(®parse); ++ if (*regparse == '-') /* if last, '-' is just a char */ ++ { ++ EMIT('-'); ++ TRY_NEG(); ++ EMIT_GLUE(); ++ } ++ nfa_inc(®parse); ++ ++ if (extra == ADD_NL) /* \_[] also matches \n */ ++ { ++ EMIT(reg_string ? NL : NFA_NEWL); ++ TRY_NEG(); ++ EMIT_GLUE(); ++ } ++ ++ /* skip the trailing ] */ ++ regparse = endp; ++ nfa_inc(®parse); ++ if (negated == TRUE) ++ { ++ /* Mark end of negated char range */ ++ EMIT(NFA_END_NEG_RANGE); ++ EMIT(NFA_CONCAT); ++ } ++ return OK; ++ } /* if exists closing ] */ ++ else if (reg_strict) ++ { ++ syntax_error = TRUE; ++ EMSG_RET_FAIL(_(e_missingbracket)); ++ } ++ ++ /* FALLTHROUGH */ ++ default: ++ { ++ #ifdef FEAT_MBYTE ++ int plen; ++ ++ nfa_do_multibyte: ++ /* length of current char, with composing chars, ++ * from pointer */ ++ plen = (*mb_ptr2len)(old_regparse); ++ if (enc_utf8 && clen != plen) ++ { ++ /* A composing character is always handled as a ++ * separate atom, surrounded by NFA_COMPOSING and ++ * NFA_END_COMPOSING. Note that right now we are ++ * building the postfix form, not the NFA itself; ++ * a composing char could be: a, b, c, NFA_COMPOSING ++ * where 'a', 'b', 'c' are chars with codes > 256. ++ */ ++ EMIT_COMPOSING_UTF(old_regparse); ++ regparse = old_regparse + plen; ++ } ++ else ++ /* A multi-byte character is always handled as a ++ * separate atom, surrounded by NFA_MULTIBYTE and ++ * NFA_END_MULTIBYTE */ ++ if (plen > 1) ++ { ++ EMIT_MBYTE(c); ++ } ++ else ++ #endif ++ { ++ c = no_Magic(c); ++ EMIT(c); ++ } ++ return OK; ++ } ++ } ++ ++ #undef TRY_NEG ++ #undef EMIT_GLUE ++ ++ return OK; ++ } ++ ++ /* ++ * Parse something followed by possible [*+=]. ++ * ++ * A piece is an atom, possibly followed by a multi, an indication of how many ++ * times the atom can be matched. Example: "a*" matches any sequence of "a" ++ * characters: "", "a", "aa", etc. ++ * ++ * piece ::= atom ++ * or atom multi ++ */ ++ static int ++ nfa_regpiece() ++ { ++ int i; ++ int op; ++ int ret; ++ long minval, maxval; ++ int greedy = TRUE; /* Braces are prefixed with '-' ? */ ++ char_u *old_regparse, *new_regparse; ++ int c2; ++ int *old_post_ptr, *my_post_start; ++ int old_regnpar; ++ int quest; ++ ++ /* Save the current position in the regexp, so that we can use it if ++ * {m,n} is next. */ ++ old_regparse = regparse; ++ /* Save current number of open parenthesis, so we can use it if ++ * {m,n} is next */ ++ old_regnpar = regnpar; ++ /* store current pos in the postfix form, for \{m,n} involving 0s */ ++ my_post_start = post_ptr; ++ ++ ret = nfa_regatom(); ++ if (ret == FAIL) ++ return FAIL; /* cascaded error */ ++ ++ op = peekchr(); ++ if (re_multi_type(op) == NOT_MULTI) ++ return OK; ++ ++ skipchr(); ++ switch (op) ++ { ++ case Magic('*'): ++ EMIT(NFA_STAR); ++ break; ++ ++ case Magic('+'): ++ /* ++ * Trick: Normally, (a*)\+ would match the whole input "aaa". The ++ * first and only submatch would be "aaa". But the backtracking ++ * engine interprets the plus as "try matching one more time", and ++ * a* matches a second time at the end of the input, the empty ++ * string. ++ * The submatch will the empty string. ++ * ++ * In order to be consistent with the old engine, we disable ++ * NFA_PLUS, and replace + with * ++ */ ++ /* EMIT(NFA_PLUS); */ ++ regnpar = old_regnpar; ++ regparse = old_regparse; ++ curchr = -1; ++ if (nfa_regatom() == FAIL) ++ return FAIL; ++ EMIT(NFA_STAR); ++ EMIT(NFA_CONCAT); ++ skipchr(); /* skip the \+ */ ++ break; ++ ++ case Magic('@'): ++ op = no_Magic(getchr()); ++ switch(op) ++ { ++ case '=': ++ EMIT(NFA_PREV_ATOM_NO_WIDTH); ++ break; ++ case '!': ++ case '<': ++ case '>': ++ /* Not supported yet */ ++ return FAIL; ++ default: ++ syntax_error = TRUE; ++ EMSG2(_("E869: (NFA) Unknown operator '\\@%c'"), op); ++ return FAIL; ++ } ++ break; ++ ++ case Magic('?'): ++ case Magic('='): ++ EMIT(NFA_QUEST); ++ break; ++ ++ case Magic('{'): ++ /* a{2,5} will expand to 'aaa?a?a?' ++ * a{-1,3} will expand to 'aa??a??', where ?? is the nongreedy ++ * version of '?' ++ * \v(ab){2,3} will expand to '(ab)(ab)(ab)?', where all the ++ * parenthesis have the same id ++ */ ++ ++ greedy = TRUE; ++ c2 = peekchr(); ++ if (c2 == '-' || c2 == Magic('-')) ++ { ++ skipchr(); ++ greedy = FALSE; ++ } ++ if (!read_limits(&minval, &maxval)) ++ { ++ syntax_error = TRUE; ++ EMSG_RET_FAIL(_("E870: (NFA regexp) Error reading repetition limits")); ++ } ++ /* {0,inf}, {0,} and {} are equivalent to ++ * * */ ++ if (minval == 0 && maxval == MAX_LIMIT && greedy) ++ { ++ EMIT(NFA_STAR); ++ break; ++ } ++ ++ if (maxval > NFA_BRACES_MAXLIMIT) ++ { ++ /* This would yield a huge automaton and use too much memory. ++ * Revert to old engine */ ++ return FAIL; ++ } ++ ++ /* Special case: x{0} or x{-0} */ ++ if (maxval == 0) ++ { ++ /* Ignore result of previous call to nfa_regatom() */ ++ post_ptr = my_post_start; ++ /* NFA_SKIP_CHAR has 0-length and works everywhere */ ++ EMIT(NFA_SKIP_CHAR); ++ return OK; ++ } ++ ++ /* Ignore previous call to nfa_regatom() */ ++ post_ptr = my_post_start; ++ /* Save pos after the repeated atom and the \{} */ ++ new_regparse = regparse; ++ ++ new_regparse = regparse; ++ quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY); ++ for (i = 0; i < maxval; i++) ++ { ++ /* Goto beginning of the repeated atom */ ++ regparse = old_regparse; ++ curchr = -1; ++ /* Restore count of parenthesis */ ++ regnpar = old_regnpar; ++ old_post_ptr = post_ptr; ++ if (nfa_regatom() == FAIL) ++ return FAIL; ++ /* after "minval" times, atoms are optional */ ++ if (i + 1 > minval) ++ EMIT(quest); ++ if (old_post_ptr != my_post_start) ++ EMIT(NFA_CONCAT); ++ } ++ ++ /* Go to just after the repeated atom and the \{} */ ++ regparse = new_regparse; ++ curchr = -1; ++ ++ break; ++ ++ ++ default: ++ break; ++ } /* end switch */ ++ ++ if (re_multi_type(peekchr()) != NOT_MULTI) ++ { ++ /* Can't have a multi follow a multi. */ ++ syntax_error = TRUE; ++ EMSG_RET_FAIL(_("E871: (NFA regexp) Can't have a multi follow a multi !")); ++ } ++ ++ return OK; ++ } ++ ++ /* ++ * Parse one or more pieces, concatenated. It matches a match for the ++ * first piece, followed by a match for the second piece, etc. Example: ++ * "f[0-9]b", first matches "f", then a digit and then "b". ++ * ++ * concat ::= piece ++ * or piece piece ++ * or piece piece piece ++ * etc. ++ */ ++ static int ++ nfa_regconcat() ++ { ++ int cont = TRUE; ++ int first = TRUE; ++ ++ while (cont) ++ { ++ switch (peekchr()) ++ { ++ case NUL: ++ case Magic('|'): ++ case Magic('&'): ++ case Magic(')'): ++ cont = FALSE; ++ break; ++ ++ case Magic('Z'): ++ #ifdef FEAT_MBYTE ++ regflags |= RF_ICOMBINE; ++ #endif ++ skipchr_keepstart(); ++ break; ++ case Magic('c'): ++ regflags |= RF_ICASE; ++ skipchr_keepstart(); ++ break; ++ case Magic('C'): ++ regflags |= RF_NOICASE; ++ skipchr_keepstart(); ++ break; ++ case Magic('v'): ++ reg_magic = MAGIC_ALL; ++ skipchr_keepstart(); ++ curchr = -1; ++ break; ++ case Magic('m'): ++ reg_magic = MAGIC_ON; ++ skipchr_keepstart(); ++ curchr = -1; ++ break; ++ case Magic('M'): ++ reg_magic = MAGIC_OFF; ++ skipchr_keepstart(); ++ curchr = -1; ++ break; ++ case Magic('V'): ++ reg_magic = MAGIC_NONE; ++ skipchr_keepstart(); ++ curchr = -1; ++ break; ++ ++ default: ++ if (nfa_regpiece() == FAIL) ++ return FAIL; ++ if (first == FALSE) ++ EMIT(NFA_CONCAT); ++ else ++ first = FALSE; ++ break; ++ } ++ } ++ ++ return OK; ++ } ++ ++ /* ++ * Parse a branch, one or more concats, separated by "\&". It matches the ++ * last concat, but only if all the preceding concats also match at the same ++ * position. Examples: ++ * "foobeep\&..." matches "foo" in "foobeep". ++ * ".*Peter\&.*Bob" matches in a line containing both "Peter" and "Bob" ++ * ++ * branch ::= concat ++ * or concat \& concat ++ * or concat \& concat \& concat ++ * etc. ++ */ ++ static int ++ nfa_regbranch() ++ { ++ int ch; ++ int *old_post_ptr; ++ ++ old_post_ptr = post_ptr; ++ ++ /* First branch, possibly the only one */ ++ if (nfa_regconcat() == FAIL) ++ return FAIL; ++ ++ ch = peekchr(); ++ /* Try next concats */ ++ while (ch == Magic('&')) ++ { ++ skipchr(); ++ EMIT(NFA_NOPEN); ++ EMIT(NFA_PREV_ATOM_NO_WIDTH); ++ old_post_ptr = post_ptr; ++ if (nfa_regconcat() == FAIL) ++ return FAIL; ++ /* if concat is empty, skip a input char. But do emit a node */ ++ if (old_post_ptr == post_ptr) ++ EMIT(NFA_SKIP_CHAR); ++ EMIT(NFA_CONCAT); ++ ch = peekchr(); ++ } ++ ++ /* Even if a branch is empty, emit one node for it */ ++ if (old_post_ptr == post_ptr) ++ EMIT(NFA_SKIP_CHAR); ++ ++ return OK; ++ } ++ ++ /* ++ * Parse a pattern, one or more branches, separated by "\|". It matches ++ * anything that matches one of the branches. Example: "foo\|beep" matches ++ * "foo" and matches "beep". If more than one branch matches, the first one ++ * is used. ++ * ++ * pattern ::= branch ++ * or branch \| branch ++ * or branch \| branch \| branch ++ * etc. ++ */ ++ static int ++ nfa_reg(paren) ++ int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */ ++ { ++ int parno = 0; ++ ++ #ifdef FEAT_SYN_HL ++ #endif ++ if (paren == REG_PAREN) ++ { ++ if (regnpar >= NSUBEXP) /* Too many `(' */ ++ { ++ syntax_error = TRUE; ++ EMSG_RET_FAIL(_("E872: (NFA regexp) Too many '('")); ++ } ++ parno = regnpar++; ++ } ++ ++ if (nfa_regbranch() == FAIL) ++ return FAIL; /* cascaded error */ ++ ++ while (peekchr() == Magic('|')) ++ { ++ skipchr(); ++ if (nfa_regbranch() == FAIL) ++ return FAIL; /* cascaded error */ ++ EMIT(NFA_OR); ++ } ++ ++ /* Check for proper termination. */ ++ if (paren != REG_NOPAREN && getchr() != Magic(')')) ++ { ++ syntax_error = TRUE; ++ if (paren == REG_NPAREN) ++ EMSG2_RET_FAIL(_(e_unmatchedpp), reg_magic == MAGIC_ALL); ++ else ++ EMSG2_RET_FAIL(_(e_unmatchedp), reg_magic == MAGIC_ALL); ++ } ++ else if (paren == REG_NOPAREN && peekchr() != NUL) ++ { ++ syntax_error = TRUE; ++ if (peekchr() == Magic(')')) ++ EMSG2_RET_FAIL(_(e_unmatchedpar), reg_magic == MAGIC_ALL); ++ else ++ EMSG_RET_FAIL(_("E873: (NFA regexp) proper termination error")); ++ } ++ /* ++ * Here we set the flag allowing back references to this set of ++ * parentheses. ++ */ ++ if (paren == REG_PAREN) ++ { ++ had_endbrace[parno] = TRUE; /* have seen the close paren */ ++ EMIT(NFA_MOPEN + parno); ++ } ++ ++ return OK; ++ } ++ ++ typedef struct ++ { ++ char_u *start[NSUBEXP]; ++ char_u *end[NSUBEXP]; ++ lpos_T startpos[NSUBEXP]; ++ lpos_T endpos[NSUBEXP]; ++ } regsub_T; ++ ++ static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m)); ++ ++ #ifdef DEBUG ++ static char_u code[50]; ++ ++ static void ++ nfa_set_code(c) ++ int c; ++ { ++ int addnl = FALSE; ++ ++ if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL) ++ { ++ addnl = TRUE; ++ c -= ADD_NL; ++ } ++ ++ STRCPY(code, ""); ++ switch (c) ++ { ++ case NFA_MATCH: STRCPY(code, "NFA_MATCH "); break; ++ case NFA_SPLIT: STRCPY(code, "NFA_SPLIT "); break; ++ case NFA_CONCAT: STRCPY(code, "NFA_CONCAT "); break; ++ case NFA_NEWL: STRCPY(code, "NFA_NEWL "); break; ++ case NFA_ZSTART: STRCPY(code, "NFA_ZSTART"); break; ++ case NFA_ZEND: STRCPY(code, "NFA_ZEND"); break; ++ ++ case NFA_PREV_ATOM_NO_WIDTH: ++ STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break; ++ case NFA_NOPEN: STRCPY(code, "NFA_MOPEN_INVISIBLE"); break; ++ case NFA_NCLOSE: STRCPY(code, "NFA_MCLOSE_INVISIBLE"); break; ++ case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; ++ case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; ++ ++ case NFA_MULTIBYTE: STRCPY(code, "NFA_MULTIBYTE"); break; ++ case NFA_END_MULTIBYTE: STRCPY(code, "NFA_END_MULTIBYTE"); break; ++ ++ case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; ++ case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break; ++ ++ case NFA_MOPEN + 0: ++ case NFA_MOPEN + 1: ++ case NFA_MOPEN + 2: ++ case NFA_MOPEN + 3: ++ case NFA_MOPEN + 4: ++ case NFA_MOPEN + 5: ++ case NFA_MOPEN + 6: ++ case NFA_MOPEN + 7: ++ case NFA_MOPEN + 8: ++ case NFA_MOPEN + 9: ++ STRCPY(code, "NFA_MOPEN(x)"); ++ code[10] = c - NFA_MOPEN + '0'; ++ break; ++ case NFA_MCLOSE + 0: ++ case NFA_MCLOSE + 1: ++ case NFA_MCLOSE + 2: ++ case NFA_MCLOSE + 3: ++ case NFA_MCLOSE + 4: ++ case NFA_MCLOSE + 5: ++ case NFA_MCLOSE + 6: ++ case NFA_MCLOSE + 7: ++ case NFA_MCLOSE + 8: ++ case NFA_MCLOSE + 9: ++ STRCPY(code, "NFA_MCLOSE(x)"); ++ code[11] = c - NFA_MCLOSE + '0'; ++ break; ++ case NFA_EOL: STRCPY(code, "NFA_EOL "); break; ++ case NFA_BOL: STRCPY(code, "NFA_BOL "); break; ++ case NFA_EOW: STRCPY(code, "NFA_EOW "); break; ++ case NFA_BOW: STRCPY(code, "NFA_BOW "); break; ++ case NFA_STAR: STRCPY(code, "NFA_STAR "); break; ++ case NFA_PLUS: STRCPY(code, "NFA_PLUS "); break; ++ case NFA_NOT: STRCPY(code, "NFA_NOT "); break; ++ case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break; ++ case NFA_OR: STRCPY(code, "NFA_OR"); break; ++ case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break; ++ case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; ++ case NFA_END_NEG_RANGE: STRCPY(code, "NFA_END_NEG_RANGE"); break; ++ case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break; ++ case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break; ++ case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break; ++ case NFA_CLASS_CNTRL: STRCPY(code, "NFA_CLASS_CNTRL"); break; ++ case NFA_CLASS_DIGIT: STRCPY(code, "NFA_CLASS_DIGIT"); break; ++ case NFA_CLASS_GRAPH: STRCPY(code, "NFA_CLASS_GRAPH"); break; ++ case NFA_CLASS_LOWER: STRCPY(code, "NFA_CLASS_LOWER"); break; ++ case NFA_CLASS_PRINT: STRCPY(code, "NFA_CLASS_PRINT"); break; ++ case NFA_CLASS_PUNCT: STRCPY(code, "NFA_CLASS_PUNCT"); break; ++ case NFA_CLASS_SPACE: STRCPY(code, "NFA_CLASS_SPACE"); break; ++ case NFA_CLASS_UPPER: STRCPY(code, "NFA_CLASS_UPPER"); break; ++ case NFA_CLASS_XDIGIT: STRCPY(code, "NFA_CLASS_XDIGIT"); break; ++ case NFA_CLASS_TAB: STRCPY(code, "NFA_CLASS_TAB"); break; ++ case NFA_CLASS_RETURN: STRCPY(code, "NFA_CLASS_RETURN"); break; ++ case NFA_CLASS_BACKSPACE: STRCPY(code, "NFA_CLASS_BACKSPACE"); break; ++ case NFA_CLASS_ESCAPE: STRCPY(code, "NFA_CLASS_ESCAPE"); break; ++ ++ case NFA_ANY: STRCPY(code, "NFA_ANY"); break; ++ case NFA_IDENT: STRCPY(code, "NFA_IDENT"); break; ++ case NFA_SIDENT:STRCPY(code, "NFA_SIDENT"); break; ++ case NFA_KWORD: STRCPY(code, "NFA_KWORD"); break; ++ case NFA_SKWORD:STRCPY(code, "NFA_SKWORD"); break; ++ case NFA_FNAME: STRCPY(code, "NFA_FNAME"); break; ++ case NFA_SFNAME:STRCPY(code, "NFA_SFNAME"); break; ++ case NFA_PRINT: STRCPY(code, "NFA_PRINT"); break; ++ case NFA_SPRINT:STRCPY(code, "NFA_SPRINT"); break; ++ case NFA_WHITE: STRCPY(code, "NFA_WHITE"); break; ++ case NFA_NWHITE:STRCPY(code, "NFA_NWHITE"); break; ++ case NFA_DIGIT: STRCPY(code, "NFA_DIGIT"); break; ++ case NFA_NDIGIT:STRCPY(code, "NFA_NDIGIT"); break; ++ case NFA_HEX: STRCPY(code, "NFA_HEX"); break; ++ case NFA_NHEX: STRCPY(code, "NFA_NHEX"); break; ++ case NFA_OCTAL: STRCPY(code, "NFA_OCTAL"); break; ++ case NFA_NOCTAL:STRCPY(code, "NFA_NOCTAL"); break; ++ case NFA_WORD: STRCPY(code, "NFA_WORD"); break; ++ case NFA_NWORD: STRCPY(code, "NFA_NWORD"); break; ++ case NFA_HEAD: STRCPY(code, "NFA_HEAD"); break; ++ case NFA_NHEAD: STRCPY(code, "NFA_NHEAD"); break; ++ case NFA_ALPHA: STRCPY(code, "NFA_ALPHA"); break; ++ case NFA_NALPHA:STRCPY(code, "NFA_NALPHA"); break; ++ case NFA_LOWER: STRCPY(code, "NFA_LOWER"); break; ++ case NFA_NLOWER:STRCPY(code, "NFA_NLOWER"); break; ++ case NFA_UPPER: STRCPY(code, "NFA_UPPER"); break; ++ case NFA_NUPPER:STRCPY(code, "NFA_NUPPER"); break; ++ ++ default: ++ STRCPY(code, "CHAR(x)"); ++ code[5] = c; ++ } ++ ++ if (addnl == TRUE) ++ STRCAT(code, " + NEWLINE "); ++ ++ } ++ ++ #ifdef ENABLE_LOG ++ static FILE *log_fd; ++ ++ /* ++ * Print the postfix notation of the current regexp. ++ */ ++ static void ++ nfa_postfix_dump(expr, retval) ++ char_u *expr; ++ int retval; ++ { ++ int *p; ++ FILE *f; ++ ++ f = fopen("LOG.log", "a"); ++ if (f != NULL) ++ { ++ fprintf(f, "\n-------------------------\n"); ++ if (retval == FAIL) ++ fprintf(f, ">>> NFA engine failed ... \n"); ++ else if (retval == OK) ++ fprintf(f, ">>> NFA engine succeeded !\n"); ++ fprintf(f, "Regexp: \"%s\"\nPostfix notation (char): \"", expr); ++ for (p=post_start; *p; p++) ++ { ++ nfa_set_code(*p); ++ fprintf(f, "%s, ", code); ++ } ++ fprintf(f, "\"\nPostfix notation (int): "); ++ for (p=post_start; *p; p++) ++ fprintf(f, "%d ", *p); ++ fprintf(f, "\n\n"); ++ fclose(f); ++ } ++ } ++ ++ /* ++ * Print the NFA starting with a root node "state". ++ */ ++ static void ++ nfa_print_state(debugf, state, ident) ++ FILE *debugf; ++ nfa_state_T *state; ++ int ident; ++ { ++ int i; ++ ++ if (state == NULL) ++ return; ++ ++ fprintf(debugf, "(%2d)", abs(state->id)); ++ for (i = 0; i < ident; i++) ++ fprintf(debugf, "%c", ' '); ++ ++ nfa_set_code(state->c); ++ fprintf(debugf, "%s %s (%d) (id=%d)\n", ++ state->negated ? "NOT" : "", code, state->c, abs(state->id)); ++ if (state->id < 0) ++ return; ++ ++ state->id = abs(state->id) * -1; ++ nfa_print_state(debugf, state->out, ident + 4); ++ nfa_print_state(debugf, state->out1, ident + 4); ++ } ++ ++ /* ++ * Print the NFA state machine. ++ */ ++ static void ++ nfa_dump(prog) ++ nfa_regprog_T *prog; ++ { ++ FILE *debugf = fopen("LOG.log", "a"); ++ ++ if (debugf != NULL) ++ { ++ nfa_print_state(debugf, prog->start, 0); ++ fclose(debugf); ++ } ++ } ++ #endif /* ENABLE_LOG */ ++ #endif /* DEBUG */ ++ ++ /* ++ * Parse r.e. @expr and convert it into postfix form. ++ * Return the postfix string on success, NULL otherwise. ++ */ ++ static int * ++ re2post() ++ { ++ if (nfa_reg(REG_NOPAREN) == FAIL) ++ return NULL; ++ EMIT(NFA_MOPEN); ++ return post_start; ++ } ++ ++ /* NB. Some of the code below is inspired by Russ's. */ ++ ++ /* ++ * Represents an NFA state plus zero or one or two arrows exiting. ++ * if c == MATCH, no arrows out; matching state. ++ * If c == SPLIT, unlabeled arrows to out and out1 (if != NULL). ++ * If c < 256, labeled arrow with character c to out. ++ */ ++ ++ static nfa_state_T *state_ptr; /* points to nfa_prog->state */ ++ ++ /* ++ * Allocate and initialize nfa_state_T. ++ */ ++ static nfa_state_T * ++ new_state(c, out, out1) ++ int c; ++ nfa_state_T *out; ++ nfa_state_T *out1; ++ { ++ nfa_state_T *s; ++ ++ if (istate >= nstate) ++ return NULL; ++ ++ s = &state_ptr[istate++]; ++ ++ s->c = c; ++ s->out = out; ++ s->out1 = out1; ++ ++ s->id = istate; ++ s->lastlist = 0; ++ s->lastthread = NULL; ++ s->visits = 0; ++ s->negated = FALSE; ++ ++ return s; ++ } ++ ++ /* ++ * A partially built NFA without the matching state filled in. ++ * Frag_T.start points at the start state. ++ * Frag_T.out is a list of places that need to be set to the ++ * next state for this fragment. ++ */ ++ typedef union Ptrlist Ptrlist; ++ struct Frag ++ { ++ nfa_state_T *start; ++ Ptrlist *out; ++ }; ++ typedef struct Frag Frag_T; ++ ++ static Frag_T frag __ARGS((nfa_state_T *start, Ptrlist *out)); ++ static Ptrlist *list1 __ARGS((nfa_state_T **outp)); ++ static void patch __ARGS((Ptrlist *l, nfa_state_T *s)); ++ static Ptrlist *append __ARGS((Ptrlist *l1, Ptrlist *l2)); ++ static void st_push __ARGS((Frag_T s, Frag_T **p, Frag_T *stack_end)); ++ static Frag_T st_pop __ARGS((Frag_T **p, Frag_T *stack)); ++ ++ /* ++ * Initialize Frag_T struct. ++ */ ++ static Frag_T ++ frag(start, out) ++ nfa_state_T *start; ++ Ptrlist *out; ++ { ++ Frag_T n = { start, out }; ++ return n; ++ } ++ ++ /* ++ * Since the out pointers in the list are always ++ * uninitialized, we use the pointers themselves ++ * as storage for the Ptrlists. ++ */ ++ union Ptrlist ++ { ++ Ptrlist *next; ++ nfa_state_T *s; ++ }; ++ ++ /* ++ * Create singleton list containing just outp. ++ */ ++ static Ptrlist * ++ list1(outp) ++ nfa_state_T **outp; ++ { ++ Ptrlist *l; ++ ++ l = (Ptrlist *)outp; ++ l->next = NULL; ++ return l; ++ } ++ ++ /* ++ * Patch the list of states at out to point to start. ++ */ ++ static void ++ patch(l, s) ++ Ptrlist *l; ++ nfa_state_T *s; ++ { ++ Ptrlist *next; ++ ++ for (; l; l = next) ++ { ++ next = l->next; ++ l->s = s; ++ } ++ } ++ ++ ++ /* ++ * Join the two lists l1 and l2, returning the combination. ++ */ ++ static Ptrlist * ++ append(l1, l2) ++ Ptrlist *l1; ++ Ptrlist *l2; ++ { ++ Ptrlist *oldl1; ++ ++ oldl1 = l1; ++ while (l1->next) ++ l1 = l1->next; ++ l1->next = l2; ++ return oldl1; ++ } ++ ++ /* ++ * Stack used for transforming postfix form into NFA. ++ */ ++ static Frag_T empty; ++ ++ static void ++ st_error(postfix, end, p) ++ int *postfix; ++ int *end; ++ int *p; ++ { ++ FILE *df; ++ int *p2; ++ ++ df = fopen("stack.err", "a"); ++ if (df) ++ { ++ fprintf(df, "Error popping the stack!\n"); ++ #ifdef DEBUG ++ fprintf(df, "Current regexp is \"%s\"\n", nfa_regengine.expr); ++ #endif ++ fprintf(df, "Postfix form is: "); ++ #ifdef DEBUG ++ for (p2 = postfix; p2 < end; p2++) ++ { ++ nfa_set_code(*p2); ++ fprintf(df, "%s, ", code); ++ } ++ nfa_set_code(*p); ++ fprintf(df, "\nCurrent position is: "); ++ for (p2 = postfix; p2 <= p; p2 ++) ++ { ++ nfa_set_code(*p2); ++ fprintf(df, "%s, ", code); ++ } ++ #else ++ for (p2 = postfix; p2 < end; p2++) ++ { ++ fprintf(df, "%d, ", *p2); ++ } ++ fprintf(df, "\nCurrent position is: "); ++ for (p2 = postfix; p2 <= p; p2 ++) ++ { ++ fprintf(df, "%d, ", *p2); ++ } ++ #endif ++ fprintf(df, "\n--------------------------\n"); ++ fclose(df); ++ } ++ EMSG(_("E874: (NFA) Could not pop the stack !")); ++ } ++ ++ /* ++ * Push an item onto the stack. ++ */ ++ static void ++ st_push(s, p, stack_end) ++ Frag_T s; ++ Frag_T **p; ++ Frag_T *stack_end; ++ { ++ Frag_T *stackp = *p; ++ ++ if (stackp >= stack_end) ++ return; ++ *stackp = s; ++ *p = *p + 1; ++ } ++ ++ /* ++ * Pop an item from the stack. ++ */ ++ static Frag_T ++ st_pop(p, stack) ++ Frag_T **p; ++ Frag_T *stack; ++ { ++ Frag_T *stackp; ++ ++ *p = *p - 1; ++ stackp = *p; ++ if (stackp < stack) ++ return empty; ++ return **p; ++ } ++ ++ /* ++ * Convert a postfix form into its equivalent NFA. ++ * Return the NFA start state on success, NULL otherwise. ++ */ ++ static nfa_state_T * ++ post2nfa(postfix, end, nfa_calc_size) ++ int *postfix; ++ int *end; ++ int nfa_calc_size; ++ { ++ int *p; ++ int mopen; ++ int mclose; ++ Frag_T *stack = NULL; ++ Frag_T *stackp = NULL; ++ Frag_T *stack_end = NULL; ++ Frag_T e1; ++ Frag_T e2; ++ Frag_T e; ++ nfa_state_T *s; ++ nfa_state_T *s1; ++ nfa_state_T *matchstate; ++ ++ if (postfix == NULL) ++ return NULL; ++ ++ #define PUSH(s) st_push ((s), &stackp, stack_end) ++ #define POP() st_pop(&stackp, stack); \ ++ if (stackp < stack) \ ++ { \ ++ st_error(postfix, end, p); \ ++ return NULL; \ ++ } ++ ++ if (nfa_calc_size == FALSE) ++ { ++ /* Allocate space for the stack. Max states on the stack : nstate */ ++ stack = (Frag_T *) lalloc((nstate + 1)*sizeof(Frag_T), TRUE); ++ stackp = stack; ++ stack_end = stack + NFA_STACK_SIZE; ++ } ++ ++ for (p = postfix; p < end; ++p) ++ { ++ switch (*p) ++ { ++ case NFA_CONCAT: ++ /* Catenation. ++ * Pay attention: this operator does not exist ++ * in the r.e. itself (it is implicit, really). ++ * It is added when r.e. is translated to postfix ++ * form in re2post(). ++ * ++ * No new state added here. */ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate += 0; ++ break; ++ } ++ e2 = POP(); ++ e1 = POP(); ++ patch(e1.out, e2.start); ++ PUSH(frag(e1.start, e2.out)); ++ break; ++ ++ case NFA_NOT: ++ /* Negation of a character */ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate += 0; ++ break; ++ } ++ e1 = POP(); ++ e1.start->negated = TRUE; ++ if (e1.start->c == NFA_MULTIBYTE || e1.start->c == NFA_COMPOSING) ++ e1.start->out1->negated = TRUE; ++ PUSH(e1); ++ break; ++ ++ case NFA_OR: ++ /* Alternation */ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate ++; ++ break; ++ } ++ e2 = POP(); ++ e1 = POP(); ++ s = new_state(NFA_SPLIT, e1.start, e2.start); ++ if (s == NULL) ++ return NULL; ++ PUSH(frag(s, append(e1.out, e2.out))); ++ break; ++ ++ case NFA_STAR: ++ /* Zero or more */ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate ++; ++ break; ++ } ++ e = POP(); ++ s = new_state(NFA_SPLIT, e.start, NULL); ++ if (s == NULL) ++ return NULL; ++ patch(e.out, s); ++ PUSH(frag(s, list1(&s->out1))); ++ break; ++ ++ case NFA_QUEST: ++ /* one or zero atoms=> greedy match */ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate ++; ++ break; ++ } ++ e = POP(); ++ s = new_state(NFA_SPLIT, e.start, NULL); ++ if (s == NULL) ++ return NULL; ++ PUSH(frag(s, append(e.out, list1(&s->out1)))); ++ break; ++ ++ case NFA_QUEST_NONGREEDY: ++ /* zero or one atoms => non-greedy match */ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate ++; ++ break; ++ } ++ e = POP(); ++ s = new_state(NFA_SPLIT, NULL, e.start); ++ if (s == NULL) ++ return NULL; ++ PUSH(frag(s, append(e.out, list1(&s->out)))); ++ break; ++ ++ case NFA_PLUS: ++ /* One or more */ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate ++; ++ break; ++ } ++ e = POP(); ++ s = new_state(NFA_SPLIT, e.start, NULL); ++ if (s == NULL) ++ return NULL; ++ patch(e.out, s); ++ PUSH(frag(e.start, list1(&s->out1))); ++ break; ++ ++ case NFA_SKIP_CHAR: ++ /* Symbol of 0-length, Used in a repetition ++ * with max/min count of 0 */ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate ++; ++ break; ++ } ++ s = new_state(NFA_SKIP_CHAR, NULL, NULL); ++ if (s == NULL) ++ return NULL; ++ PUSH(frag(s, list1(&s->out))); ++ break; ++ ++ case NFA_PREV_ATOM_NO_WIDTH: ++ /* The \@= operator: match the preceding atom with 0 width. ++ * Surrounds the preceding atom with START_INVISIBLE and ++ * END_INVISIBLE, similarly to MOPEN. ++ */ ++ /* TODO: Maybe this drops the speed? */ ++ return NULL; ++ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate += 2; ++ break; ++ } ++ e = POP(); ++ s1 = new_state(NFA_END_INVISIBLE, NULL, NULL); ++ if (s1 == NULL) ++ return NULL; ++ patch(e.out, s1); ++ ++ s = new_state(NFA_START_INVISIBLE, e.start, s1); ++ if (s == NULL) ++ return NULL; ++ PUSH(frag(s, list1(&s1->out))); ++ break; ++ ++ case NFA_MOPEN + 0: /* Submatch */ ++ case NFA_MOPEN + 1: ++ case NFA_MOPEN + 2: ++ case NFA_MOPEN + 3: ++ case NFA_MOPEN + 4: ++ case NFA_MOPEN + 5: ++ case NFA_MOPEN + 6: ++ case NFA_MOPEN + 7: ++ case NFA_MOPEN + 8: ++ case NFA_MOPEN + 9: ++ case NFA_NOPEN: /* \%( "Invisible Submatch" */ ++ case NFA_MULTIBYTE: /* mbyte char */ ++ case NFA_COMPOSING: /* composing char */ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate += 2; ++ break; ++ } ++ ++ mopen = *p; ++ switch (*p) ++ { ++ case NFA_NOPEN: ++ mclose = NFA_NCLOSE; ++ break; ++ case NFA_MULTIBYTE: ++ mclose = NFA_END_MULTIBYTE; ++ break; ++ case NFA_COMPOSING: ++ mclose = NFA_END_COMPOSING; ++ break; ++ default: ++ /* NFA_MOPEN(0) ... NFA_MOPEN(9) */ ++ mclose = *p + NSUBEXP; ++ break; ++ } ++ ++ /* Allow "NFA_MOPEN" as a valid postfix representation for ++ * the empty regexp "". In this case, the NFA will be ++ * NFA_MOPEN -> NFA_MCLOSE. Note that this also allows ++ * empty groups of parenthesis, and empty mbyte chars */ ++ if (stackp == stack) ++ { ++ s = new_state(mopen, NULL, NULL); ++ if (s == NULL) ++ return NULL; ++ s1 = new_state(mclose, NULL, NULL); ++ if (s1 == NULL) ++ return NULL; ++ patch(list1(&s->out), s1); ++ PUSH(frag(s, list1(&s1->out))); ++ break; ++ } ++ ++ /* At least one node was emitted before NFA_MOPEN, so ++ * at least one node will be between NFA_MOPEN and NFA_MCLOSE */ ++ e = POP(); ++ s = new_state(mopen, e.start, NULL); /* `(' */ ++ if (s == NULL) ++ return NULL; ++ ++ s1 = new_state(mclose, NULL, NULL); /* `)' */ ++ if (s1 == NULL) ++ return NULL; ++ patch(e.out, s1); ++ ++ if (mopen == NFA_MULTIBYTE || mopen == NFA_COMPOSING) ++ /* MULTIBYTE->out1 = END_MULTIBYTE ++ * COMPOSING->out1 = END_COMPOSING */ ++ patch(list1(&s->out1), s1); ++ ++ PUSH(frag(s, list1(&s1->out))); ++ break; ++ ++ case NFA_ZSTART: ++ case NFA_ZEND: ++ default: ++ /* Operands */ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate ++; ++ break; ++ } ++ s = new_state(*p, NULL, NULL); ++ if (s == NULL) ++ return NULL; ++ PUSH(frag(s, list1(&s->out))); ++ break; ++ ++ } /* switch(*p) */ ++ ++ } /* for(p = postfix; *p; ++p) */ ++ ++ if (nfa_calc_size == TRUE) ++ { ++ nstate ++; ++ return NULL; /* Return value when counting size is ignored anyway */ ++ } ++ ++ e = POP(); ++ if (stackp != stack) ++ EMSG_RET_NULL(_("E875: (NFA regexp) (While converting from postfix to NFA), too many states left on stack")); ++ ++ if (istate >= nstate) ++ EMSG_RET_NULL(_("E876: (NFA regexp) Not enough space to store the whole NFA ")); ++ ++ vim_free(stack); ++ ++ matchstate = &state_ptr[istate++]; /* the match state */ ++ matchstate->c = NFA_MATCH; ++ matchstate->out = matchstate->out1 = NULL; ++ ++ patch(e.out, matchstate); ++ return e.start; ++ ++ #undef POP1 ++ #undef PUSH1 ++ #undef POP2 ++ #undef PUSH2 ++ #undef POP ++ #undef PUSH ++ } ++ ++ /**************************************************************** ++ * NFA execution code. ++ ****************************************************************/ ++ ++ /* thread_T contains runtime information of a NFA state */ ++ struct thread ++ { ++ nfa_state_T *state; ++ regsub_T sub; /* submatch info */ ++ }; ++ ++ typedef struct ++ { ++ thread_T *t; ++ int n; ++ } List; ++ ++ static void addstate __ARGS((List *l, nfa_state_T *state, regsub_T *m, int off, int lid, int *match)); ++ ++ static void ++ addstate(l, state, m, off, lid, match) ++ List *l; /* runtime state list */ ++ nfa_state_T *state; /* state to update */ ++ regsub_T *m; /* pointers to subexpressions */ ++ int off; ++ int lid; ++ int *match; /* found match? */ ++ { ++ regsub_T save; ++ int subidx = 0; ++ ++ if (l == NULL || state == NULL) ++ return; ++ ++ switch (state->c) ++ { ++ case NFA_SPLIT: ++ case NFA_NOT: ++ case NFA_NOPEN: ++ case NFA_NCLOSE: ++ case NFA_MCLOSE: ++ case NFA_MCLOSE + 1: ++ case NFA_MCLOSE + 2: ++ case NFA_MCLOSE + 3: ++ case NFA_MCLOSE + 4: ++ case NFA_MCLOSE + 5: ++ case NFA_MCLOSE + 6: ++ case NFA_MCLOSE + 7: ++ case NFA_MCLOSE + 8: ++ case NFA_MCLOSE + 9: ++ /* Do not remember these nodes in list "thislist" or "nextlist" */ ++ break; ++ ++ default: ++ if (state->lastlist == lid) ++ { ++ if (++state->visits > 2) ++ return; ++ } ++ else ++ { ++ /* add the state to the list */ ++ state->lastlist = lid; ++ state->lastthread = &l->t[l->n++]; ++ state->lastthread->state = state; ++ state->lastthread->sub = *m; ++ } ++ } ++ ++ #ifdef ENABLE_LOG ++ nfa_set_code(state->c); ++ fprintf(log_fd, "> Adding state %d to list. Character %s, code %d\n", ++ abs(state->id), code, state->c); ++ #endif ++ switch (state->c) ++ { ++ case NFA_MATCH: ++ *match = TRUE; ++ break; ++ ++ case NFA_SPLIT: ++ addstate(l, state->out, m, off, lid, match); ++ addstate(l, state->out1, m, off, lid, match); ++ break; ++ ++ case NFA_SKIP_CHAR: ++ addstate(l, state->out, m, off, lid, match); ++ break; ++ ++ #if 0 ++ case NFA_END_NEG_RANGE: ++ /* Nothing to handle here. nfa_regmatch() will take care of it */ ++ break; ++ ++ case NFA_NOT: ++ EMSG(_("E999: (NFA regexp internal error) Should not process NOT node !")); ++ #ifdef ENABLE_LOG ++ fprintf(f, "\n\n>>> E999: Added state NFA_NOT to a list ... Something went wrong ! Why wasn't it processed already? \n\n"); ++ #endif ++ break; ++ ++ case NFA_COMPOSING: ++ /* nfa_regmatch() will match all the bytes of this composing char. */ ++ break; ++ ++ case NFA_MULTIBYTE: ++ /* nfa_regmatch() will match all the bytes of this multibyte char. */ ++ break; ++ #endif ++ ++ case NFA_END_MULTIBYTE: ++ /* Successfully matched this mbyte char */ ++ addstate(l, state->out, m, off, lid, match); ++ break; ++ ++ case NFA_NOPEN: ++ case NFA_NCLOSE: ++ addstate(l, state->out, m, off, lid, match); ++ break; ++ ++ /* If this state is reached, then a recursive call of nfa_regmatch() ++ * succeeded. the next call saves the found submatches in the ++ * first state after the "invisible" branch. */ ++ #if 0 ++ case NFA_END_INVISIBLE: ++ break; ++ #endif ++ ++ case NFA_MOPEN + 0: ++ case NFA_MOPEN + 1: ++ case NFA_MOPEN + 2: ++ case NFA_MOPEN + 3: ++ case NFA_MOPEN + 4: ++ case NFA_MOPEN + 5: ++ case NFA_MOPEN + 6: ++ case NFA_MOPEN + 7: ++ case NFA_MOPEN + 8: ++ case NFA_MOPEN + 9: ++ case NFA_ZSTART: ++ subidx = state->c - NFA_MOPEN; ++ if (state->c == NFA_ZSTART) ++ subidx = 0; ++ ++ if (REG_MULTI) ++ { ++ save.startpos[subidx] = m->startpos[subidx]; ++ save.endpos[subidx] = m->endpos[subidx]; ++ m->startpos[subidx].lnum = reglnum; ++ m->startpos[subidx].col = reginput - regline + off; ++ } ++ else ++ { ++ save.start[subidx] = m->start[subidx]; ++ save.end[subidx] = m->end[subidx]; ++ m->start[subidx] = reginput + off; ++ } ++ ++ addstate(l, state->out, m, off, lid, match); ++ ++ if (REG_MULTI) ++ { ++ m->startpos[subidx] = save.startpos[subidx]; ++ m->endpos[subidx] = save.endpos[subidx]; ++ } ++ else ++ { ++ m->start[subidx] = save.start[subidx]; ++ m->end[subidx] = save.end[subidx]; ++ } ++ break; ++ ++ case NFA_MCLOSE + 0: ++ if (nfa_has_zend == TRUE) ++ { ++ addstate(l, state->out, m, off, lid, match); ++ break; ++ } ++ case NFA_MCLOSE + 1: ++ case NFA_MCLOSE + 2: ++ case NFA_MCLOSE + 3: ++ case NFA_MCLOSE + 4: ++ case NFA_MCLOSE + 5: ++ case NFA_MCLOSE + 6: ++ case NFA_MCLOSE + 7: ++ case NFA_MCLOSE + 8: ++ case NFA_MCLOSE + 9: ++ case NFA_ZEND: ++ subidx = state->c - NFA_MCLOSE; ++ if (state->c == NFA_ZEND) ++ subidx = 0; ++ ++ if (REG_MULTI) ++ { ++ save.startpos[subidx] = m->startpos[subidx]; ++ save.endpos[subidx] = m->endpos[subidx]; ++ m->endpos[subidx].lnum = reglnum; ++ m->endpos[subidx].col = reginput - regline + off; ++ } ++ else ++ { ++ save.start[subidx] = m->start[subidx]; ++ save.end[subidx] = m->end[subidx]; ++ m->end[subidx] = reginput + off; ++ } ++ ++ addstate(l, state->out, m, off, lid, match); ++ ++ if (REG_MULTI) ++ { ++ m->startpos[subidx] = save.startpos[subidx]; ++ m->endpos[subidx] = save.endpos[subidx]; ++ } ++ else ++ { ++ m->start[subidx] = save.start[subidx]; ++ m->end[subidx] = save.end[subidx]; ++ } ++ break; ++ } ++ } ++ ++ /* ++ * Check character class "class" against current character c. ++ */ ++ static int ++ check_char_class(class, c) ++ int class; ++ int c; ++ { ++ switch (class) ++ { ++ case NFA_CLASS_ALNUM: ++ if (isalnum(c)) ++ return OK; ++ break; ++ case NFA_CLASS_ALPHA: ++ if (isalpha(c)) ++ return OK; ++ break; ++ case NFA_CLASS_BLANK: ++ if (c == ' ' || c == '\t') ++ return OK; ++ break; ++ case NFA_CLASS_CNTRL: ++ if (iscntrl(c)) ++ return OK; ++ break; ++ case NFA_CLASS_DIGIT: ++ if (VIM_ISDIGIT(c)) ++ return OK; ++ break; ++ case NFA_CLASS_GRAPH: ++ if (isgraph(c)) ++ return OK; ++ break; ++ case NFA_CLASS_LOWER: ++ if (MB_ISLOWER(c)) ++ return OK; ++ break; ++ case NFA_CLASS_PRINT: ++ if (vim_isprintc(c)) ++ return OK; ++ break; ++ case NFA_CLASS_PUNCT: ++ if (ispunct(c)) ++ return OK; ++ break; ++ case NFA_CLASS_SPACE: ++ if ((c >=9 && c <= 13) || (c == ' ')) ++ return OK; ++ break; ++ case NFA_CLASS_UPPER: ++ if (MB_ISUPPER(c)) ++ return OK; ++ break; ++ case NFA_CLASS_XDIGIT: ++ if (vim_isxdigit(c)) ++ return OK; ++ break; ++ case NFA_CLASS_TAB: ++ if (c == '\t') ++ return OK; ++ break; ++ case NFA_CLASS_RETURN: ++ if (c == '\r') ++ return OK; ++ break; ++ case NFA_CLASS_BACKSPACE: ++ if (c == '\b') ++ return OK; ++ break; ++ case NFA_CLASS_ESCAPE: ++ if (c == '\033') ++ return OK; ++ break; ++ ++ default: ++ /* should not be here :P */ ++ EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class ")); ++ } ++ return FAIL; ++ } ++ ++ /* ++ * Set all NFA nodes' list ID equal to -1. ++ */ ++ static void ++ nfa_set_neg_listids(start) ++ nfa_state_T *start; ++ { ++ if (start == NULL) ++ return; ++ if (start->lastlist >= 0) ++ { ++ start->lastlist = -1; ++ nfa_set_neg_listids(start->out); ++ nfa_set_neg_listids(start->out1); ++ } ++ } ++ ++ /* ++ * Set all NFA nodes' list ID equal to 0. ++ */ ++ static void ++ nfa_set_null_listids(start) ++ nfa_state_T *start; ++ { ++ if (start == NULL) ++ return; ++ if (start->lastlist == -1) ++ { ++ start->lastlist = 0; ++ nfa_set_null_listids(start->out); ++ nfa_set_null_listids(start->out1); ++ } ++ } ++ ++ /* ++ * Save list IDs for all NFA states in "list". ++ */ ++ static void ++ nfa_save_listids(start, list) ++ nfa_state_T *start; ++ int *list; ++ { ++ if (start == NULL) ++ return; ++ if (start->lastlist != -1) ++ { ++ list[abs(start->id)] = start->lastlist; ++ start->lastlist = -1; ++ nfa_save_listids(start->out, list); ++ nfa_save_listids(start->out1, list); ++ } ++ } ++ ++ /* ++ * Restore list IDs from "list" to all NFA states. ++ */ ++ static void ++ nfa_restore_listids(start, list) ++ nfa_state_T *start; ++ int *list; ++ { ++ if (start == NULL) ++ return; ++ if (start->lastlist == -1) ++ { ++ start->lastlist = list[abs(start->id)]; ++ nfa_restore_listids(start->out, list); ++ nfa_restore_listids(start->out1, list); ++ } ++ } ++ ++ /* ++ * Main matching routine. ++ * ++ * Run NFA to determine whether it matches reginput. ++ * ++ * Return TRUE if there is a match, FALSE otherwise. ++ * Note: Caller must ensure that: start != NULL. ++ */ ++ static int ++ nfa_regmatch(start, submatch, m) ++ nfa_state_T *start; ++ regsub_T *submatch; ++ regsub_T *m; ++ { ++ int c = -1; ++ int n; ++ int i = 0; ++ int result; ++ int size = 0; ++ int match = FALSE; ++ int flag = 0; ++ int old_reglnum = -1; ++ int reginput_updated = FALSE; ++ thread_T *t; ++ char_u *cc; ++ char_u *old_reginput = NULL; ++ char_u *old_regline = NULL; ++ nfa_state_T *sta; ++ nfa_state_T *end; ++ List list[3]; ++ List *listtbl[2][2]; ++ List *ll; ++ int listid = 1; ++ int endnode = 0; ++ List *thislist; ++ List *nextlist; ++ List *neglist; ++ int *listids = NULL; ++ int j = 0; ++ int len = 0; ++ #ifdef DEBUG ++ FILE *debug = fopen("list.log", "a"); ++ ++ if (debug == NULL) ++ { ++ EMSG(_("(NFA) COULD NOT OPEN list.log !")); ++ return FALSE; ++ } ++ #endif ++ ++ /* Allocate memory for the lists of nodes */ ++ size = (nstate + 1) * sizeof(thread_T); ++ list[0].t = (thread_T *)lalloc(size, TRUE); ++ list[1].t = (thread_T *)lalloc(size, TRUE); ++ list[2].t = (thread_T *)lalloc(size, TRUE); ++ if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) ++ goto theend; ++ vim_memset(list[0].t, 0, size); ++ vim_memset(list[1].t, 0, size); ++ vim_memset(list[2].t, 0, size); ++ ++ #ifdef ENABLE_LOG ++ log_fd = fopen(LOG_NAME, "a"); ++ if (log_fd != NULL) ++ { ++ fprintf(log_fd, "**********************************\n"); ++ nfa_set_code(start->c); ++ fprintf(log_fd, " RUNNING nfa_regmatch() starting with state %d, code %s\n", ++ abs(start->id), code); ++ fprintf(log_fd, "**********************************\n"); ++ } ++ else ++ { ++ EMSG(_("Could not open temporary log file for writing, displaying on stderr ... ")); ++ log_fd = stderr; ++ } ++ #endif ++ ++ thislist = &list[0]; ++ thislist->n = 0; ++ nextlist = &list[1]; ++ nextlist->n = 0; ++ neglist = &list[2]; ++ neglist->n = 0; ++ #ifdef ENABLE_LOG ++ fprintf(log_fd, "(---) STARTSTATE\n"); ++ #endif ++ addstate(thislist, start, m, 0, listid, &match); ++ ++ /* There are two cases when the NFA advances: 1. input char matches the ++ * NFA node and 2. input char does not match the NFA node, but the next ++ * node is NFA_NOT. The following macro calls addstate() according to ++ * these rules. It is used A LOT, so use the "listtbl" table for speed */ ++ listtbl[0][0] = NULL; ++ listtbl[0][1] = neglist; ++ listtbl[1][0] = nextlist; ++ listtbl[1][1] = NULL; ++ #define ADD_POS_NEG_STATE(node) \ ++ ll = listtbl[result ? 1 : 0][node->negated]; \ ++ if (ll != NULL) \ ++ addstate(ll, node->out , &t->sub, n, listid + 1, &match); ++ ++ ++ /* ++ * Run for each character. ++ */ ++ do { ++ again: ++ #ifdef FEAT_MBYTE ++ if (has_mbyte) ++ { ++ c = (*mb_ptr2char)(reginput); ++ n = (*mb_ptr2len)(reginput); ++ } ++ else ++ #endif ++ { ++ c = *reginput; ++ n = 1; ++ } ++ if (c == NUL) ++ n = 0; ++ cc = (char_u *)&c; ++ ++ /* swap lists */ ++ thislist = &list[flag]; ++ nextlist = &list[flag ^= 1]; ++ nextlist->n = 0; /* `clear' nextlist */ ++ listtbl[1][0] = nextlist; ++ ++listid; ++ ++ #ifdef ENABLE_LOG ++ fprintf(log_fd, "------------------------------------------\n"); ++ fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput); ++ fprintf(log_fd, ">>> Advanced one character ... Current char is %c (code %d) \n", c, (int)c); ++ fprintf(log_fd, ">>> Thislist has %d states available: ", thislist->n); ++ for (i = 0; i< thislist->n; i++) ++ fprintf(log_fd, "%d ", abs(thislist->t[i].state->id)); ++ fprintf(log_fd, "\n"); ++ #endif ++ ++ #ifdef DEBUG ++ fprintf(debug, "\n-------------------\n"); ++ #endif ++ ++ /* compute nextlist */ ++ for (i = 0; i < thislist->n || neglist->n > 0; ++i) ++ { ++ if (neglist->n > 0) ++ { ++ t = &neglist->t[0]; ++ neglist->n --; ++ i--; ++ } ++ else ++ t = &thislist->t[i]; ++ ++ #ifdef DEBUG ++ nfa_set_code(t->state->c); ++ fprintf(debug, "%s, ", code); ++ #endif ++ #ifdef ENABLE_LOG ++ nfa_set_code(t->state->c); ++ fprintf(log_fd, "(%d) %s, code %d ... \n", abs(t->state->id), ++ code, (int)t->state->c); ++ #endif ++ ++ /* ++ * Handle the possible codes of the current state. ++ * The most important is NFA_MATCH. ++ */ ++ switch (t->state->c) ++ { ++ case NFA_MATCH: ++ match = TRUE; ++ *submatch = t->sub; ++ #ifdef ENABLE_LOG ++ for (j = 0; j < 4; j++) ++ if (REG_MULTI) ++ fprintf(log_fd, "\n *** group %d, start: c=%d, l=%d, end: c=%d, l=%d", ++ j, ++ t->sub.startpos[j].col, ++ (int)t->sub.startpos[j].lnum, ++ t->sub.endpos[j].col, ++ (int)t->sub.endpos[j].lnum); ++ else ++ fprintf(log_fd, "\n *** group %d, start: \"%s\", end: \"%s\"", ++ j, ++ (char *)t->sub.start[j], ++ (char *)t->sub.end[j]); ++ fprintf(log_fd, "\n"); ++ #endif ++ goto nextchar; /* found the left-most longest match */ ++ ++ case NFA_END_INVISIBLE: ++ /* This is only encountered after a NFA_START_INVISIBLE node. ++ * They surround a zero-width group, used with "\@=" and "\&". ++ * If we got here, it means that the current "invisible" group ++ * finished successfully, so return control to the parent ++ * nfa_regmatch(). Submatches are stored in *m, and used in ++ * the parent call. */ ++ if (start->c == NFA_MOPEN + 0) ++ addstate(thislist, t->state->out, &t->sub, 0, listid, ++ &match); ++ else ++ { ++ *m = t->sub; ++ match = TRUE; ++ } ++ break; ++ ++ case NFA_START_INVISIBLE: ++ /* Save global variables, and call nfa_regmatch() to check if ++ * the current concat matches at this position. The concat ++ * ends with the node NFA_END_INVISIBLE */ ++ old_reginput = reginput; ++ old_regline = regline; ++ old_reglnum = reglnum; ++ if (listids == NULL) ++ { ++ listids = (int *) lalloc(sizeof(int) * nstate, TRUE); ++ if (listids == NULL) ++ { ++ EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!")); ++ return 0; ++ } ++ } ++ #ifdef ENABLE_LOG ++ if (log_fd != stderr) ++ fclose(log_fd); ++ log_fd = NULL; ++ #endif ++ /* Have to clear the listid field of the NFA nodes, so that ++ * nfa_regmatch() and addstate() can run properly after ++ * recursion. */ ++ nfa_save_listids(start, listids); ++ nfa_set_null_listids(start); ++ result = nfa_regmatch(t->state->out, submatch, m); ++ nfa_set_neg_listids(start); ++ nfa_restore_listids(start, listids); ++ ++ #ifdef ENABLE_LOG ++ log_fd = fopen(LOG_NAME, "a"); ++ if (log_fd != NULL) ++ { ++ fprintf(log_fd, "****************************\n"); ++ fprintf(log_fd, "FINISHED RUNNING nfa_regmatch() recursively\n"); ++ fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE"); ++ fprintf(log_fd, "****************************\n"); ++ } ++ else ++ { ++ EMSG(_("Could not open temporary log file for writing, displaying on stderr ... ")); ++ log_fd = stderr; ++ } ++ #endif ++ if (result == TRUE) ++ { ++ /* Restore position in input text */ ++ reginput = old_reginput; ++ regline = old_regline; ++ reglnum = old_reglnum; ++ /* Copy submatch info from the recursive call */ ++ if (REG_MULTI) ++ for (j = 1; j < NSUBEXP; j++) ++ { ++ t->sub.startpos[j] = m->startpos[j]; ++ t->sub.endpos[j] = m->endpos[j]; ++ } ++ else ++ for (j = 1; j < NSUBEXP; j++) ++ { ++ t->sub.start[j] = m->start[j]; ++ t->sub.end[j] = m->end[j]; ++ } ++ /* t->state->out1 is the corresponding END_INVISIBLE node */ ++ addstate(thislist, t->state->out1->out, &t->sub, 0, listid, ++ &match); ++ } ++ else ++ { ++ /* continue with next input char */ ++ reginput = old_reginput; ++ } ++ break; ++ ++ case NFA_BOL: ++ if (reginput == regline) ++ addstate(thislist, t->state->out, &t->sub, 0, listid, ++ &match); ++ break; ++ ++ case NFA_EOL: ++ if (c == NUL) ++ addstate(thislist, t->state->out, &t->sub, 0, listid, ++ &match); ++ break; ++ ++ case NFA_BOW: ++ { ++ int bow = TRUE; ++ ++ if (c == NUL) ++ bow = FALSE; ++ #ifdef FEAT_MBYTE ++ else if (has_mbyte) ++ { ++ int this_class; ++ ++ /* Get class of current and previous char (if it exists). */ ++ this_class = mb_get_class(reginput); ++ if (this_class <= 1) ++ bow = FALSE; ++ else if (reg_prev_class() == this_class) ++ bow = FALSE; ++ } ++ #endif ++ else if (!vim_iswordc(c) ++ || (reginput > regline && vim_iswordc(reginput[-1]))) ++ bow = FALSE; ++ if (bow) ++ addstate(thislist, t->state->out, &t->sub, 0, listid, ++ &match); ++ break; ++ } ++ ++ case NFA_EOW: ++ { ++ int eow = TRUE; ++ ++ if (reginput == regline) ++ eow = FALSE; ++ #ifdef FEAT_MBYTE ++ else if (has_mbyte) ++ { ++ int this_class, prev_class; ++ ++ /* Get class of current and previous char (if it exists). */ ++ this_class = mb_get_class(reginput); ++ prev_class = reg_prev_class(); ++ if (this_class == prev_class ++ || prev_class == 0 || prev_class == 1) ++ eow = FALSE; ++ } ++ #endif ++ else if (!vim_iswordc(reginput[-1]) ++ || (reginput[0] != NUL && vim_iswordc(c))) ++ eow = FALSE; ++ if (eow) ++ addstate(thislist, t->state->out, &t->sub, 0, listid, ++ &match); ++ break; ++ } ++ ++ case NFA_MULTIBYTE: ++ case NFA_COMPOSING: ++ switch (t->state->c) ++ { ++ case NFA_MULTIBYTE: endnode = NFA_END_MULTIBYTE; break; ++ case NFA_COMPOSING: endnode = NFA_END_COMPOSING; break; ++ default: endnode = 0; ++ } ++ ++ result = OK; ++ sta = t->state->out; ++ len = 1; ++ while (sta->c != endnode && len <= n) ++ { ++ if (reginput[len-1] != sta->c) ++ { ++ result = OK - 1; ++ break; ++ } ++ len++; ++ sta = sta->out; ++ } ++ ++ /* if input char length doesn't match regexp char length */ ++ if (len -1 < n || sta->c != endnode) ++ result = OK - 1; ++ end = t->state->out1; /* NFA_END_MULTIBYTE or ++ NFA_END_COMPOSING */ ++ /* If \Z was present, then ignore composing characters */ ++ if (regflags & RF_ICOMBINE) ++ result = 1 ^ sta->negated; ++ ADD_POS_NEG_STATE(end); ++ break; ++ ++ case NFA_NEWL: ++ if (!reg_line_lbr && REG_MULTI ++ && c == NUL && reglnum <= reg_maxline) ++ { ++ if (reginput_updated == FALSE) ++ { ++ reg_nextline(); ++ reginput_updated = TRUE; ++ } ++ addstate(nextlist, t->state->out, &t->sub, n, listid + 1, ++ &match); ++ } ++ break; ++ ++ case NFA_CLASS_ALNUM: ++ case NFA_CLASS_ALPHA: ++ case NFA_CLASS_BLANK: ++ case NFA_CLASS_CNTRL: ++ case NFA_CLASS_DIGIT: ++ case NFA_CLASS_GRAPH: ++ case NFA_CLASS_LOWER: ++ case NFA_CLASS_PRINT: ++ case NFA_CLASS_PUNCT: ++ case NFA_CLASS_SPACE: ++ case NFA_CLASS_UPPER: ++ case NFA_CLASS_XDIGIT: ++ case NFA_CLASS_TAB: ++ case NFA_CLASS_RETURN: ++ case NFA_CLASS_BACKSPACE: ++ case NFA_CLASS_ESCAPE: ++ result = check_char_class(t->state->c, c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_END_NEG_RANGE: ++ /* This follows a series of negated nodes, like: ++ * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */ ++ if (c > 0) ++ addstate(nextlist, t->state->out, &t->sub, n, listid + 1, ++ &match); ++ break; ++ ++ case NFA_ANY: ++ /* Any printable char, not just any char. '\0' (end of input) ++ * must not match */ ++ if (c > 0) ++ addstate(nextlist, t->state->out, &t->sub, n, listid + 1, ++ &match); ++ break; ++ ++ /* ++ * Character classes like \a for alpha, \d for digit etc. ++ */ ++ case NFA_IDENT: /* \i */ ++ result = vim_isIDc(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_SIDENT: /* \I */ ++ result = !VIM_ISDIGIT(c) && vim_isIDc(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_KWORD: /* \k */ ++ result = vim_iswordp(cc); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_SKWORD: /* \K */ ++ result = !VIM_ISDIGIT(c) && vim_iswordp(cc); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_FNAME: /* \f */ ++ result = vim_isfilec(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_SFNAME: /* \F */ ++ result = !VIM_ISDIGIT(c) && vim_isfilec(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_PRINT: /* \p */ ++ result = ptr2cells(cc) == 1; ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_SPRINT: /* \P */ ++ result = !VIM_ISDIGIT(c) && ptr2cells(cc) == 1; ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_WHITE: /* \s */ ++ result = vim_iswhite(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_NWHITE: /* \S */ ++ result = c != NUL && !vim_iswhite(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_DIGIT: /* \d */ ++ result = ri_digit(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_NDIGIT: /* \D */ ++ result = c != NUL && !ri_digit(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_HEX: /* \x */ ++ result = ri_hex(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_NHEX: /* \X */ ++ result = c != NUL && !ri_hex(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_OCTAL: /* \o */ ++ result = ri_octal(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_NOCTAL: /* \O */ ++ result = c != NUL && !ri_octal(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_WORD: /* \w */ ++ result = ri_word(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_NWORD: /* \W */ ++ result = c != NUL && !ri_word(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_HEAD: /* \h */ ++ result = ri_head(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_NHEAD: /* \H */ ++ result = c != NUL && !ri_head(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_ALPHA: /* \a */ ++ result = ri_alpha(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_NALPHA: /* \A */ ++ result = c != NUL && !ri_alpha(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_LOWER: /* \l */ ++ result = ri_lower(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_NLOWER: /* \L */ ++ result = c != NUL && !ri_lower(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_UPPER: /* \u */ ++ result = ri_upper(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ case NFA_NUPPER: /* \U */ ++ result = c != NUL && !ri_upper(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ ++ default: /* regular character */ ++ result = (no_Magic(t->state->c) == c); ++ if (!result) ++ result = ireg_ic == TRUE ++ && MB_TOLOWER(t->state->c) == MB_TOLOWER(c); ++ ADD_POS_NEG_STATE(t->state); ++ break; ++ } ++ ++ } /* for (thislist = thislist; thislist->state; thislist++) */ ++ ++ /* The first found match is the leftmost one, but there may be a ++ * longer one. Keep running the NFA, but don't start from the ++ * beginning. Also, do not add the start state in recursive calls of ++ * nfa_regmatch(), because recursive calls should only start in the ++ * first position. */ ++ if (match == FALSE && start->c == NFA_MOPEN + 0) ++ { ++ #ifdef ENABLE_LOG ++ fprintf(log_fd, "(---) STARTSTATE\n"); ++ #endif ++ addstate(nextlist, start, m, n, listid + 1, &match); ++ } ++ ++ if (reginput_updated) ++ { ++ reginput_updated = FALSE; ++ goto again; ++ } ++ ++ #ifdef ENABLE_LOG ++ fprintf(log_fd, ">>> Thislist had %d states available: ", thislist->n); ++ for (i = 0; i< thislist->n; i++) ++ fprintf(log_fd, "%d ", abs(thislist->t[i].state->id)); ++ fprintf(log_fd, "\n"); ++ #endif ++ ++ nextchar: ++ reginput += n; ++ } while (c || reginput_updated); ++ ++ #ifdef ENABLE_LOG ++ if (log_fd != stderr) ++ fclose(log_fd); ++ log_fd = NULL; ++ #endif ++ ++ theend: ++ /* Free memory */ ++ vim_free(list[0].t); ++ vim_free(list[1].t); ++ vim_free(list[2].t); ++ list[0].t = list[1].t = list[2].t = NULL; ++ if (listids != NULL) ++ vim_free(listids); ++ #undef ADD_POS_NEG_STATE ++ #ifdef DEBUG ++ fclose(debug); ++ #endif ++ ++ return match; ++ } ++ ++ /* ++ * Try match of "prog" with at regline["col"]. ++ * Returns 0 for failure, number of lines contained in the match otherwise. ++ */ ++ static long ++ nfa_regtry(start, col) ++ nfa_state_T *start; ++ colnr_T col; ++ { ++ int i; ++ regsub_T sub, m; ++ #ifdef ENABLE_LOG ++ FILE *f; ++ #endif ++ ++ reginput = regline + col; ++ need_clear_subexpr = TRUE; ++ ++ #ifdef ENABLE_LOG ++ f = fopen(LOG_NAME, "a"); ++ if (f != NULL) ++ { ++ fprintf(f, "\n\n\n\n\n\n\t\t=======================================================\n"); ++ fprintf(f, " =======================================================\n"); ++ #ifdef DEBUG ++ fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); ++ #endif ++ fprintf(f, "\tInput text is \"%s\" \n", reginput); ++ fprintf(f, " =======================================================\n\n\n\n\n\n\n"); ++ nfa_print_state(f, start, 0); ++ fprintf(f, "\n\n"); ++ fclose(f); ++ } ++ else ++ EMSG(_("Could not open temporary log file for writing ")); ++ #endif ++ ++ if (REG_MULTI) ++ { ++ /* Use 0xff to set lnum to -1 */ ++ vim_memset(sub.startpos, 0xff, sizeof(lpos_T) * NSUBEXP); ++ vim_memset(sub.endpos, 0xff, sizeof(lpos_T) * NSUBEXP); ++ vim_memset(m.startpos, 0xff, sizeof(lpos_T) * NSUBEXP); ++ vim_memset(m.endpos, 0xff, sizeof(lpos_T) * NSUBEXP); ++ } ++ else ++ { ++ vim_memset(sub.start, 0, sizeof(char_u *) * NSUBEXP); ++ vim_memset(sub.end, 0, sizeof(char_u *) * NSUBEXP); ++ vim_memset(m.start, 0, sizeof(char_u *) * NSUBEXP); ++ vim_memset(m.end, 0, sizeof(char_u *) * NSUBEXP); ++ } ++ ++ if (nfa_regmatch(start, &sub, &m) == FALSE) ++ return 0; ++ ++ cleanup_subexpr(); ++ if (REG_MULTI) ++ { ++ for (i = 0; i < NSUBEXP; i++) ++ { ++ reg_startpos[i] = sub.startpos[i]; ++ reg_endpos[i] = sub.endpos[i]; ++ } ++ ++ if (reg_startpos[0].lnum < 0) ++ { ++ reg_startpos[0].lnum = 0; ++ reg_startpos[0].col = col; ++ } ++ if (reg_endpos[0].lnum < 0) ++ { ++ reg_endpos[0].lnum = reglnum; ++ reg_endpos[0].col = (int)(reginput - regline); ++ } ++ else ++ /* Use line number of "\ze". */ ++ reglnum = reg_endpos[0].lnum; ++ } ++ else ++ { ++ for (i = 0; i < NSUBEXP; i++) ++ { ++ reg_startp[i] = sub.start[i]; ++ reg_endp[i] = sub.end[i]; ++ } ++ ++ if (reg_startp[0] == NULL) ++ reg_startp[0] = regline + col; ++ if (reg_endp[0] == NULL) ++ reg_endp[0] = reginput; ++ } ++ ++ return 1 + reglnum; ++ } ++ ++ /* ++ * Match a regexp against a string ("line" points to the string) or multiple ++ * lines ("line" is NULL, use reg_getline()). ++ * ++ * Returns 0 for failure, number of lines contained in the match otherwise. ++ */ ++ static long ++ nfa_regexec_both(line, col) ++ char_u *line; ++ colnr_T col; /* column to start looking for match */ ++ { ++ nfa_regprog_T *prog; ++ long retval = 0L; ++ int i; ++ ++ if (REG_MULTI) ++ { ++ prog = (nfa_regprog_T *)reg_mmatch->regprog; ++ line = reg_getline((linenr_T)0); /* relative to the cursor */ ++ reg_startpos = reg_mmatch->startpos; ++ reg_endpos = reg_mmatch->endpos; ++ } ++ else ++ { ++ prog = (nfa_regprog_T *)reg_match->regprog; ++ reg_startp = reg_match->startp; ++ reg_endp = reg_match->endp; ++ } ++ ++ /* Be paranoid... */ ++ if (prog == NULL || line == NULL) ++ { ++ EMSG(_(e_null)); ++ goto theend; ++ } ++ ++ /* If the start column is past the maximum column: no need to try. */ ++ if (ireg_maxcol > 0 && col >= ireg_maxcol) ++ goto theend; ++ ++ /* If pattern contains "\c" or "\C": overrule value of ireg_ic */ ++ if (prog->regflags & RF_ICASE) ++ ireg_ic = TRUE; ++ else if (prog->regflags & RF_NOICASE) ++ ireg_ic = FALSE; ++ ++ #ifdef FEAT_MBYTE ++ /* If pattern contains "\Z" overrule value of ireg_icombine */ ++ if (prog->regflags & RF_ICOMBINE) ++ ireg_icombine = TRUE; ++ #endif ++ ++ regline = line; ++ reglnum = 0; /* relative to line */ ++ ++ nstate = prog->nstate; ++ ++ for (i = 0; i < nstate; ++i) ++ { ++ prog->state[i].id = i; ++ prog->state[i].lastlist = 0; ++ prog->state[i].visits = 0; ++ prog->state[i].lastthread = NULL; ++ } ++ ++ retval = nfa_regtry(prog->start, col); ++ ++ theend: ++ return retval; ++ } ++ ++ /* ++ * Compile a regular expression into internal code for the NFA matcher. ++ * Returns the program in allocated space. Returns NULL for an error. ++ */ ++ static regprog_T * ++ nfa_regcomp(expr, re_flags) ++ char_u *expr; ++ int re_flags; ++ { ++ nfa_regprog_T *prog; ++ int prog_size; ++ int *postfix; ++ ++ if (expr == NULL) ++ return NULL; ++ ++ #ifdef DEBUG ++ nfa_regengine.expr = expr; ++ #endif ++ ++ init_class_tab(); ++ ++ if (nfa_regcomp_start(expr, re_flags) == FAIL) ++ return NULL; ++ ++ /* Space for compiled regexp */ ++ prog_size = sizeof(nfa_regprog_T) + sizeof(nfa_state_T) * nstate_max; ++ prog = (nfa_regprog_T *)lalloc(prog_size, TRUE); ++ if (prog == NULL) ++ goto fail; ++ vim_memset(prog, 0, prog_size); ++ ++ /* Build postfix form of the regexp. Needed to build the NFA ++ * (and count its size) */ ++ postfix = re2post(); ++ if (postfix == NULL) ++ goto fail; /* Cascaded (syntax?) error */ ++ ++ /* ++ * In order to build the NFA, we parse the input regexp twice: ++ * 1. first pass to count size (so we can allocate space) ++ * 2. second to emit code ++ */ ++ #ifdef ENABLE_LOG ++ { ++ FILE *f = fopen(LOG_NAME, "a"); ++ ++ if (f != NULL) ++ { ++ fprintf(f, "\n*****************************\n\n\n\n\tCompiling regexp \"%s\" ... hold on !\n", expr); ++ fclose(f); ++ } ++ } ++ #endif ++ ++ /* ++ * PASS 1 ++ * Count number of NFA states in "nstate". Do not build the NFA. ++ */ ++ post2nfa(postfix, post_ptr, TRUE); ++ state_ptr = prog->state; ++ ++ /* ++ * PASS 2 ++ * Build the NFA ++ */ ++ prog->start = post2nfa(postfix, post_ptr, FALSE); ++ if (prog->start == NULL) ++ goto fail; ++ ++ prog->regflags = regflags; ++ prog->engine = &nfa_regengine; ++ prog->nstate = nstate; ++ #ifdef ENABLE_LOG ++ nfa_postfix_dump(expr, OK); ++ nfa_dump(prog); ++ #endif ++ ++ out: ++ vim_free(post_start); ++ post_start = post_ptr = post_end = NULL; ++ state_ptr = NULL; ++ return (regprog_T *)prog; ++ ++ fail: ++ vim_free(prog); ++ prog = NULL; ++ #ifdef ENABLE_LOG ++ nfa_postfix_dump(expr, FAIL); ++ #endif ++ #ifdef DEBUG ++ nfa_regengine.expr = NULL; ++ #endif ++ goto out; ++ } ++ ++ ++ /* ++ * Match a regexp against a string. ++ * "rmp->regprog" is a compiled regexp as returned by nfa_regcomp(). ++ * Uses curbuf for line count and 'iskeyword'. ++ * ++ * Return TRUE if there is a match, FALSE if not. ++ */ ++ static int ++ nfa_regexec(rmp, line, col) ++ regmatch_T *rmp; ++ char_u *line; /* string to match against */ ++ colnr_T col; /* column to start looking for match */ ++ { ++ reg_match = rmp; ++ reg_mmatch = NULL; ++ reg_maxline = 0; ++ reg_line_lbr = FALSE; ++ reg_buf = curbuf; ++ reg_win = NULL; ++ ireg_ic = rmp->rm_ic; ++ #ifdef FEAT_MBYTE ++ ireg_icombine = FALSE; ++ #endif ++ ireg_maxcol = 0; ++ return (nfa_regexec_both(line, col) != 0); ++ } ++ ++ #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ ++ || defined(FIND_REPLACE_DIALOG) || defined(PROTO) ++ ++ static int nfa_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); ++ ++ /* ++ * Like nfa_regexec(), but consider a "\n" in "line" to be a line break. ++ */ ++ static int ++ nfa_regexec_nl(rmp, line, col) ++ regmatch_T *rmp; ++ char_u *line; /* string to match against */ ++ colnr_T col; /* column to start looking for match */ ++ { ++ reg_match = rmp; ++ reg_mmatch = NULL; ++ reg_maxline = 0; ++ reg_line_lbr = TRUE; ++ reg_buf = curbuf; ++ reg_win = NULL; ++ ireg_ic = rmp->rm_ic; ++ #ifdef FEAT_MBYTE ++ ireg_icombine = FALSE; ++ #endif ++ ireg_maxcol = 0; ++ return (nfa_regexec_both(line, col) != 0); ++ } ++ #endif ++ ++ ++ /* ++ * Match a regexp against multiple lines. ++ * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). ++ * Uses curbuf for line count and 'iskeyword'. ++ * ++ * Return zero if there is no match. Return number of lines contained in the ++ * match otherwise. ++ * ++ * Note: the body is the same as bt_regexec() except for nfa_regexec_both() ++ * ++ * ! Also NOTE : match may actually be in another line. e.g.: ++ * when r.e. is \nc, cursor is at 'a' and the text buffer looks like ++ * ++ * +-------------------------+ ++ * |a | ++ * |b | ++ * |c | ++ * | | ++ * +-------------------------+ ++ * ++ * then nfa_regexec_multi() returns 3. while the original ++ * vim_regexec_multi() returns 0 and a second call at line 2 will return 2. ++ * ++ * FIXME if this behavior is not compatible. ++ */ ++ static long ++ nfa_regexec_multi(rmp, win, buf, lnum, col, tm) ++ regmmatch_T *rmp; ++ win_T *win; /* window in which to search or NULL */ ++ buf_T *buf; /* buffer in which to search */ ++ linenr_T lnum; /* nr of line to start looking for match */ ++ colnr_T col; /* column to start looking for match */ ++ proftime_T *tm UNUSED; /* timeout limit or NULL */ ++ { ++ long r; ++ buf_T *save_curbuf = curbuf; ++ ++ reg_match = NULL; ++ reg_mmatch = rmp; ++ reg_buf = buf; ++ reg_win = win; ++ reg_firstlnum = lnum; ++ reg_maxline = reg_buf->b_ml.ml_line_count - lnum; ++ reg_line_lbr = FALSE; ++ ireg_ic = rmp->rmm_ic; ++ #ifdef FEAT_MBYTE ++ ireg_icombine = FALSE; ++ #endif ++ ireg_maxcol = rmp->rmm_maxcol; ++ ++ /* Need to switch to buffer "buf" to make vim_iswordc() work. */ ++ curbuf = buf; ++ r = nfa_regexec_both(NULL, col); ++ curbuf = save_curbuf; ++ ++ return r; ++ } ++ ++ #ifdef DEBUG ++ # undef ENABLE_LOG ++ #endif +*** ../vim-7.3.969/src/structs.h 2013-05-15 15:12:25.000000000 +0200 +--- src/structs.h 2013-05-17 18:54:19.000000000 +0200 +*************** +*** 63,77 **** + + #define GA_EMPTY {0, 0, 0, 0, NULL} + +- /* +- * This is here because regexp.h needs pos_T and below regprog_T is used. +- */ +- #include "regexp.h" +- + typedef struct window_S win_T; + typedef struct wininfo_S wininfo_T; + typedef struct frame_S frame_T; + typedef int scid_T; /* script ID */ + + /* + * This is here because gui.h needs the pos_T and win_T, and win_T needs gui.h +--- 63,78 ---- + + #define GA_EMPTY {0, 0, 0, 0, NULL} + + typedef struct window_S win_T; + typedef struct wininfo_S wininfo_T; + typedef struct frame_S frame_T; + typedef int scid_T; /* script ID */ ++ typedef struct file_buffer buf_T; /* forward declaration */ ++ ++ /* ++ * This is here because regexp.h needs pos_T and below regprog_T is used. ++ */ ++ #include "regexp.h" + + /* + * This is here because gui.h needs the pos_T and win_T, and win_T needs gui.h +*************** +*** 526,533 **** + # endif + } cmdmod_T; + +- typedef struct file_buffer buf_T; /* forward declaration */ +- + #define MF_SEED_LEN 8 + + struct memfile +--- 527,532 ---- +*** ../vim-7.3.969/src/testdir/Makefile 2013-04-24 12:56:13.000000000 +0200 +--- src/testdir/Makefile 2013-05-18 14:22:50.000000000 +0200 +*************** +*** 29,35 **** + test79.out test80.out test81.out test82.out test83.out \ + test84.out test85.out test86.out test87.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out + + SCRIPTS_GUI = test16.out + +--- 29,35 ---- + test79.out test80.out test81.out test82.out test83.out \ + test84.out test85.out test86.out test87.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out test95.out + + SCRIPTS_GUI = test16.out + +*************** +*** 85,97 **** + fi" + + # Check if the test.out file matches test.ok. +! @/bin/sh -c "if test -f test.out; then\ + if diff test.out $*.ok; \ + then mv -f test.out $*.out; \ + else echo $* FAILED >>test.log; mv -f test.out $*.failed; \ + fi \ + else echo $* NO OUTPUT >>test.log; \ + fi" + -rm -rf X* test.ok viminfo + + test49.out: test49.vim +--- 85,100 ---- + fi" + + # Check if the test.out file matches test.ok. +! @/bin/sh -c "if test -f test.out; then \ + if diff test.out $*.ok; \ + then mv -f test.out $*.out; \ + else echo $* FAILED >>test.log; mv -f test.out $*.failed; \ + fi \ + else echo $* NO OUTPUT >>test.log; \ + fi" ++ @/bin/sh -c "if test -f valgrind; then\ ++ mv -f valgrind valgrind.$*; \ ++ fi" + -rm -rf X* test.ok viminfo + + test49.out: test49.vim +*** ../vim-7.3.969/src/testdir/test64.in 2010-08-15 21:57:29.000000000 +0200 +--- src/testdir/test64.in 2013-05-19 16:05:36.000000000 +0200 +*************** +*** 1,4 **** +! Test for regexp patterns. + + A pattern that gives the expected result produces OK, so that we know it was + actually tried. +--- 1,5 ---- +! Test for regexp patterns without multi-byte support. +! See test95 for multi-byte tests. + + A pattern that gives the expected result produces OK, so that we know it was + actually tried. +*************** +*** 14,19 **** +--- 15,25 ---- + :" etc. + :" When there is no match use only the first two items. + :let tl = [] ++ ++ :"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" ++ :"""" Previously written tests """""""""""""""""""""""""""""""" ++ :"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" ++ + :call add(tl, ['ab', 'aab', 'ab']) + :call add(tl, ['b', 'abcdef', 'b']) + :call add(tl, ['bc*', 'abccccdef', 'bcccc']) +*************** +*** 132,137 **** +--- 138,301 ---- + :" + :call add(tl, ['\v(a*)+', 'aaaa', 'aaaa', '']) + :call add(tl, ['x', 'abcdef']) ++ ++ :"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" ++ :""""" Simple tests """"""""""""""""""""""""""""""""""""""""""" ++ :"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" ++ ++ :" Search single groups ++ :call add(tl, ['ab', 'aab', 'ab']) ++ :call add(tl, ['ab', 'baced']) ++ :call add(tl, ['ab', ' ab ', 'ab']) ++ ++ :" Search multi-modifiers ++ :call add(tl, ['x*', 'xcd', 'x']) ++ :call add(tl, ['x*', 'xxxxxxxxxxxxxxxxsofijiojgf', 'xxxxxxxxxxxxxxxx']) ++ :call add(tl, ['x*', 'abcdoij', '']) " empty match is good ++ :call add(tl, ['x\+', 'abcdoin']) " no match here ++ :call add(tl, ['x\+', 'abcdeoijdfxxiuhfij', 'xx']) ++ :call add(tl, ['x\+', 'xxxxx', 'xxxxx']) ++ :call add(tl, ['x\+', 'abc x siufhiush xxxxxxxxx', 'x']) ++ :call add(tl, ['x\=', 'x sdfoij', 'x']) ++ :call add(tl, ['x\=', 'abc sfoij', '']) " empty match is good ++ :call add(tl, ['x\=', 'xxxxxxxxx c', 'x']) ++ :call add(tl, ['x\?', 'x sdfoij', 'x']) ++ :call add(tl, ['x\?', 'abc sfoij', '']) " empty match is good ++ :call add(tl, ['x\?', 'xxxxxxxxxx c', 'x']) ++ ++ :call add(tl, ['a\{0,0}', 'abcdfdoij', '']) ++ :call add(tl, ['a\{0,1}', 'asiubid axxxaaa', 'a']) " same thing as 'a?' ++ :call add(tl, ['a\{1,0}', 'asiubid axxxaaa', 'a']) " same thing as 'a\{0,1}' ++ :call add(tl, ['a\{3,6}', 'aa siofuh']) ++ :call add(tl, ['a\{3,6}', 'aaaaa asfoij afaa', 'aaaaa']) ++ :call add(tl, ['a\{3,6}', 'aaaaaaaa', 'aaaaaa']) ++ :call add(tl, ['a\{0}', 'asoiuj', '']) ++ :call add(tl, ['a\{2}', 'aaaa', 'aa']) ++ :call add(tl, ['a\{2}', 'iuash fiusahfliusah fiushfilushfi uhsaifuh askfj nasfvius afg aaaa sfiuhuhiushf', 'aa']) ++ :call add(tl, ['a\{2}', 'abcdefghijklmnopqrestuvwxyz1234567890']) ++ :call add(tl, ['a\{0,}', 'oij sdigfusnf', '']) " same thing as 'a*' ++ :call add(tl, ['a\{0,}', 'aaaaa aa', 'aaaaa']) ++ :call add(tl, ['a\{2,}', 'sdfiougjdsafg']) ++ :call add(tl, ['a\{2,}', 'aaaaasfoij ', 'aaaaa']) ++ :call add(tl, ['a\{,0}', 'oidfguih iuhi hiu aaaa', '']) ++ :call add(tl, ['a\{,5}', 'abcd', 'a']) ++ :call add(tl, ['a\{,5}', 'aaaaaaaaaa', 'aaaaa']) ++ :call add(tl, ['a\{}', 'bbbcddiuhfcd', '']) " same thing as 'a*' ++ :call add(tl, ['a\{}', 'aaaaioudfh coisf jda', 'aaaa']) ++ ++ :call add(tl, ['a\{-0,0}', 'abcdfdoij', '']) ++ :call add(tl, ['a\{-0,1}', 'asiubid axxxaaa', '']) " anti-greedy version of 'a?' ++ :call add(tl, ['a\{-3,6}', 'aa siofuh']) ++ :call add(tl, ['a\{-3,6}', 'aaaaa asfoij afaa', 'aaa']) ++ :call add(tl, ['a\{-3,6}', 'aaaaaaaa', 'aaa']) ++ :call add(tl, ['a\{-0}', 'asoiuj', '']) ++ :call add(tl, ['a\{-2}', 'aaaa', 'aa']) ++ :call add(tl, ['a\{-2}', 'abcdefghijklmnopqrestuvwxyz1234567890']) ++ :call add(tl, ['a\{-0,}', 'oij sdigfusnf', '']) ++ :call add(tl, ['a\{-0,}', 'aaaaa aa', '']) ++ :call add(tl, ['a\{-2,}', 'sdfiougjdsafg']) ++ :call add(tl, ['a\{-2,}', 'aaaaasfoij ', 'aa']) ++ :call add(tl, ['a\{-,0}', 'oidfguih iuhi hiu aaaa', '']) ++ :call add(tl, ['a\{-,5}', 'abcd', '']) ++ :call add(tl, ['a\{-,5}', 'aaaaaaaaaa', '']) ++ :call add(tl, ['a\{-}', 'bbbcddiuhfcd', '']) " anti-greedy version of 'a*' ++ :call add(tl, ['a\{-}', 'aaaaioudfh coisf jda', '']) ++ ++ :" Test groups of characters and submatches ++ :call add(tl, ['\(abc\)*', 'abcabcabc', 'abcabcabc', 'abc']) ++ :call add(tl, ['\(ab\)\+', 'abababaaaaa', 'ababab', 'ab']) ++ :call add(tl, ['\(abaaaaa\)*cd', 'cd', 'cd', '']) ++ :call add(tl, ['\(test1\)\? \(test2\)\?', 'test1 test3', 'test1 ', 'test1', '']) ++ :call add(tl, ['\(test1\)\= \(test2\) \(test4443\)\=', ' test2 test4443 yupiiiiiiiiiii', ' test2 test4443', '', 'test2', 'test4443']) ++ :call add(tl, ['\(\(sub1\) hello \(sub 2\)\)', 'asterix sub1 hello sub 2 obelix', 'sub1 hello sub 2', 'sub1 hello sub 2', 'sub1', 'sub 2']) ++ :call add(tl, ['\(\(\(yyxxzz\)\)\)', 'abcdddsfiusfyyzzxxyyxxzz', 'yyxxzz', 'yyxxzz', 'yyxxzz', 'yyxxzz']) ++ :call add(tl, ['\v((ab)+|c+)+', 'abcccaba', 'abcccab', 'ab', 'ab']) ++ :call add(tl, ['\v((ab)|c*)+', 'abcccaba', 'abcccab', '', 'ab']) ++ :call add(tl, ['\v(a(c*)+b)+', 'acbababaaa', 'acbabab', 'ab', '']) ++ :call add(tl, ['\v(a|b*)+', 'aaaa', 'aaaa', '']) ++ ++ :" Test greedy-ness and lazy-ness ++ :call add(tl, ['a\{-2,7}','aaaaaaaaaaaaa', 'aa']) ++ :call add(tl, ['a\{2,7}','aaaaaaaaaaaaaaaaaaaa', 'aaaaaaa']) ++ :call add(tl, ['\vx(.{-,8})yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz','ayxa','xayzxayz']) ++ :call add(tl, ['\vx(.*)yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz', 'ayxayzxayzxa','']) ++ :call add(tl, ['\v(a{1,2}){-2,3}','aaaaaaa','aaaa','aa']) ++ :call add(tl, ['\v(a{-1,3})+','aa','aa','a']) ++ ++ :" Test Character classes ++ :call add(tl, ['\d\+e\d\d','test 10e23 fd','10e23']) ++ ++ :" Test collections and character range [] ++ :call add(tl, ['\v[a]', 'abcd', 'a']) ++ :call add(tl, ['a[bcd]', 'abcd', 'ab']) ++ :call add(tl, ['a[b-d]', 'acbd', 'ac']) ++ :call add(tl, ['[a-d][e-f][x-x]d', 'cexdxx', 'cexd']) ++ :call add(tl, ['\v[[:alpha:]]+', 'abcdefghijklmnopqrstuvwxyz6','abcdefghijklmnopqrstuvwxyz']) ++ :call add(tl, ['[[:alpha:]\+]', '6x8','x']) ++ :call add(tl, ['[^abc]\+','abcabcabc']) ++ :call add(tl, ['[^abc]','defghiasijvoinasoiunbvb','d']) ++ :call add(tl, ['[^abc]\+','ddddddda','ddddddd']) ++ :call add(tl, ['[^a-d]\+','aaaAAAZIHFNCddd','AAAZIHFNC']) ++ :call add(tl, ['[a-f]*','iiiiiiii','']) ++ :call add(tl, ['[a-f]*','abcdefgh','abcdef']) ++ :call add(tl, ['[^a-f]\+','abcdefgh','gh']) ++ :call add(tl, ['[a-c]\{-3,6}','abcabc','abc']) ++ :call add(tl, ['[^[:alpha:]]\+','abcccadfoij7787ysf287yrnccdu','7787']) ++ :call add(tl, ['[-a]', '-', '-']) ++ :call add(tl, ['[a-]', '-', '-']) ++ :call add(tl, ['[-./[:alnum:]_~]\+', 'log13.file', 'log13.file']) " filename regexp ++ :call add(tl, ['[\]\^\-\\]\+', '\^\\\-\---^', '\^\\\-\---^']) " special chars ++ :call add(tl, ['[[.a.]]\+', 'aa', 'aa']) " collation elem ++ :call add(tl, ['abc[0-9]*ddd', 'siuhabc ii']) " middle of regexp ++ :call add(tl, ['abc[0-9]*ddd', 'adf abc44482ddd oijs', 'abc44482ddd']) ++ :call add(tl, ['\_[0-9]\+', 'asfi9888u', '9888']) ++ :call add(tl, ['[0-9\n]\+', 'asfi9888u', '9888']) ++ ++ ++ :"""" Test recognition of some character classes ++ :call add(tl, ['[0-9]', '8', '8']) ++ :call add(tl, ['[^0-9]', '8']) ++ :call add(tl, ['[0-9a-fA-F]*', '0a7', '0a7']) ++ :call add(tl, ['[^0-9A-Fa-f]\+', '0a7']) ++ :call add(tl, ['[a-z_A-Z0-9]\+', 'aso_sfoij', 'aso_sfoij']) ++ :call add(tl, ['[a-z]', 'a', 'a']) ++ :call add(tl, ['[a-zA-Z]', 'a', 'a']) ++ :call add(tl, ['[A-Z]', 'a']) ++ :call add(tl, ['\C[^A-Z]\+', 'ABCOIJDEOIFNSD jsfoij sa', ' jsfoij sa']) ++ ++ :"""" Tests for \z features ++ :call add(tl, ['xx \ze test', 'xx ']) " must match after \ze ++ :call add(tl, ['abc\zeend', 'oij abcend', 'abc']) ++ :call add(tl, ['abc\zsdd', 'ddabcddxyzt', 'dd']) ++ :call add(tl, ['aa \zsax', ' ax']) " must match before \zs ++ :call add(tl, ['abc \zsmatch\ze abc', 'abc abc abc match abc abc', 'match']) ++ :call add(tl, ['\v(a \zsif .*){2}', 'a if then a if last', 'if last', 'a if last']) ++ ++ :"""" Tests for \@ features ++ :call add(tl, ['abc\@=', 'abc', 'ab']) ++ :call add(tl, ['abc\@=cd', 'abcd', 'abcd']) ++ :call add(tl, ['abc\@=', 'ababc', 'ab']) ++ :call add(tl, ['abcd\@=e', 'abcd']) " will never match, no matter the input text ++ :call add(tl, ['abcd\@=e', 'any text in here ... ']) " will never match ++ :call add(tl, ['\v(abc)@=..', 'xabcd', 'ab', 'abc']) ++ :call add(tl, ['\(.*John\)\@=.*Bob', 'here is John, and here is B']) " no match ++ :call add(tl, ['\(John.*\)\@=.*Bob', 'John is Bobs friend', 'John is Bob', 'John is Bobs friend']) ++ :call add(tl, ['.*John\&.*Bob', 'here is John, and here is B']) " no match ++ :call add(tl, ['.*John\&.*Bob', 'John is Bobs friend', 'John is Bob']) ++ :call add(tl, ['\v(test1)@=.*yep', 'this is a test1, yep it is', 'test1, yep', 'test1']) ++ ++ :"""" Combining different tests and features ++ :call add(tl, ['[[:alpha:]]\{-2,6}', '787abcdiuhsasiuhb4', 'ab']) ++ :call add(tl, ['[^[=a=]]\+', 'ddaãâbcd', 'dd']) ++ :call add(tl, ['', 'abcd', '']) ++ :call add(tl, ['\v(())', 'any possible text', '']) ++ :call add(tl, ['\v%(ab(xyz)c)', ' abxyzc ', 'abxyzc', 'xyz']) ++ :call add(tl, ['\v(test|)empty', 'tesempty', 'empty', '']) ++ :call add(tl, ['\v(a|aa)(a|aa)', 'aaa', 'aa', 'a', 'a']) ++ ++ ++ :"""" Run the tests ++ + :" + :for t in tl + : let l = matchlist(t[1], t[0]) +*************** +*** 143,149 **** + : elseif len(t) > 2 && l[0] != t[2] + : $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", match: \"' . l[0] . '\", expected: \"' . t[2] . '\"' + : else +! : $put ='OK' + : endif + : if len(l) > 0 + :" check all the nine submatches +--- 307,313 ---- + : elseif len(t) > 2 && l[0] != t[2] + : $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", match: \"' . l[0] . '\", expected: \"' . t[2] . '\"' + : else +! : $put ='OK - ' . t[0] + : endif + : if len(l) > 0 + :" check all the nine submatches +*************** +*** 161,167 **** + : endif + :endfor + :unlet t tl e l +! :/^Results/,$wq! test.out + ENDTEST + + Results of test64: +--- 325,341 ---- + : endif + :endfor + :unlet t tl e l +! +! :" Check that \_[0-9] matching EOL does not break a following \> +! :" This only works on a buffer line, not with expression evaluation +! /^Find this +! /\<\(\(25\_[0-5]\|2\_[0-4]\_[0-9]\|\_[01]\?\_[0-9]\_[0-9]\?\)\.\)\{3\}\(25\_[0-5]\|2\_[0-4]\_[0-9]\|\_[01]\?\_[0-9]\_[0-9]\?\)\> +! y$Gop:" +! +! :/\%#=1^Results/,$wq! test.out + ENDTEST + ++ Find this: ++ localnet/192.168.0.1 ++ + Results of test64: +*** ../vim-7.3.969/src/testdir/test64.ok 2010-08-15 21:57:29.000000000 +0200 +--- src/testdir/test64.ok 2013-05-19 16:05:16.000000000 +0200 +*************** +*** 1,102 **** + Results of test64: +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +! OK +--- 1,230 ---- + Results of test64: +! OK - ab +! OK - b +! OK - bc* +! OK - bc\{-} +! OK - bc\{-}\(d\) +! OK - bc* +! OK - c* +! OK - bc* +! OK - c* +! OK - bc\+ +! OK - bc\+ +! OK - a\|ab +! OK - c\? +! OK - bc\? +! OK - bc\? +! OK - \va{1} +! OK - \va{2} +! OK - \va{2} +! OK - \va{2} +! OK - \va{2} +! OK - \va{2} +! OK - \va{2} +! OK - \vb{1} +! OK - \vba{2} +! OK - \vba{3} +! OK - \v(ab){1} +! OK - \v(ab){1} +! OK - \v(ab){1} +! OK - \v(ab){0,2} +! OK - \v(ab){0,2} +! OK - \v(ab){1,2} +! OK - \v(ab){1,2} +! OK - \v(ab){2,4} +! OK - \v(ab){2,4} +! OK - \v(ab){2} +! OK - \v(ab){2} +! OK - \v(ab){2} +! OK - \v(ab){2} +! OK - \v((ab){2}){2} +! OK - \v((ab){2}){2} +! OK - \v(a{1}){1} +! OK - \v(a{2}){1} +! OK - \v(a{2}){1} +! OK - \v(a{2}){1} +! OK - \v(a{1}){2} +! OK - \v(a{1}){2} +! OK - \v(a{2})+ +! OK - \v(a{2})+ +! OK - \v(a{2}){1} +! OK - \v(a{1}){2} +! OK - \v(a{1}){1} +! OK - \v(a{2}){2} +! OK - \v(a{2}){2} +! OK - \v(a+){2} +! OK - \v(a{3}){2} +! OK - \v(a{1,2}){2} +! OK - \v(a{1,3}){2} +! OK - \v(a{1,3}){2} +! OK - \v(a{1,3}){3} +! OK - \v(a{1,2}){2} +! OK - \v(a+)+ +! OK - \v(a+)+ +! OK - \v(a+){1,2} +! OK - \v(a+)(a+) +! OK - \v(a{3})+ +! OK - \v(a|b|c)+ +! OK - \v(a|b|c){2} +! OK - \v(abc){2} +! OK - \v(abc){2} +! OK - a* +! OK - \v(a*)+ +! OK - \v((ab)+)+ +! OK - \v(((ab)+)+)+ +! OK - \v(((ab)+)+)+ +! OK - \v(a{0,2})+ +! OK - \v(a*)+ +! OK - \v((a*)+)+ +! OK - \v((ab)*)+ +! OK - \va{1,3} +! OK - \va{2,3} +! OK - \v((ab)+|c*)+ +! OK - \v(a{2})|(b{3}) +! OK - \va{2}|b{2} +! OK - \v(a)+|(c)+ +! OK - \vab{2,3}c +! OK - \vab{2,3}c +! OK - \vab{2,3}cd{2,3}e +! OK - \va(bc){2}d +! OK - \va*a{2} +! OK - \va*a{2} +! OK - \va*a{2} +! OK - \va*a{2} +! OK - \va*b*|a*c* +! OK - \va{1}b{1}|a{1}b{1} +! OK - \v(a) +! OK - \v(a)(b) +! OK - \v(ab)(b)(c) +! OK - \v((a)(b)) +! OK - \v(a)|(b) +! OK - \v(a*)+ +! OK - x +! OK - ab +! OK - ab +! OK - ab +! OK - x* +! OK - x* +! OK - x* +! OK - x\+ +! OK - x\+ +! OK - x\+ +! OK - x\+ +! OK - x\= +! OK - x\= +! OK - x\= +! OK - x\? +! OK - x\? +! OK - x\? +! OK - a\{0,0} +! OK - a\{0,1} +! OK - a\{1,0} +! OK - a\{3,6} +! OK - a\{3,6} +! OK - a\{3,6} +! OK - a\{0} +! OK - a\{2} +! OK - a\{2} +! OK - a\{2} +! OK - a\{0,} +! OK - a\{0,} +! OK - a\{2,} +! OK - a\{2,} +! OK - a\{,0} +! OK - a\{,5} +! OK - a\{,5} +! OK - a\{} +! OK - a\{} +! OK - a\{-0,0} +! OK - a\{-0,1} +! OK - a\{-3,6} +! OK - a\{-3,6} +! OK - a\{-3,6} +! OK - a\{-0} +! OK - a\{-2} +! OK - a\{-2} +! OK - a\{-0,} +! OK - a\{-0,} +! OK - a\{-2,} +! OK - a\{-2,} +! OK - a\{-,0} +! OK - a\{-,5} +! OK - a\{-,5} +! OK - a\{-} +! OK - a\{-} +! OK - \(abc\)* +! OK - \(ab\)\+ +! OK - \(abaaaaa\)*cd +! OK - \(test1\)\? \(test2\)\? +! OK - \(test1\)\= \(test2\) \(test4443\)\= +! OK - \(\(sub1\) hello \(sub 2\)\) +! OK - \(\(\(yyxxzz\)\)\) +! OK - \v((ab)+|c+)+ +! OK - \v((ab)|c*)+ +! OK - \v(a(c*)+b)+ +! OK - \v(a|b*)+ +! OK - a\{-2,7} +! OK - a\{2,7} +! OK - \vx(.{-,8})yz(.*) +! OK - \vx(.*)yz(.*) +! OK - \v(a{1,2}){-2,3} +! OK - \v(a{-1,3})+ +! OK - \d\+e\d\d +! OK - \v[a] +! OK - a[bcd] +! OK - a[b-d] +! OK - [a-d][e-f][x-x]d +! OK - \v[[:alpha:]]+ +! OK - [[:alpha:]\+] +! OK - [^abc]\+ +! OK - [^abc] +! OK - [^abc]\+ +! OK - [^a-d]\+ +! OK - [a-f]* +! OK - [a-f]* +! OK - [^a-f]\+ +! OK - [a-c]\{-3,6} +! OK - [^[:alpha:]]\+ +! OK - [-a] +! OK - [a-] +! OK - [-./[:alnum:]_~]\+ +! OK - [\]\^\-\\]\+ +! OK - [[.a.]]\+ +! OK - abc[0-9]*ddd +! OK - abc[0-9]*ddd +! OK - \_[0-9]\+ +! OK - [0-9\n]\+ +! OK - [0-9] +! OK - [^0-9] +! OK - [0-9a-fA-F]* +! OK - [^0-9A-Fa-f]\+ +! OK - [a-z_A-Z0-9]\+ +! OK - [a-z] +! OK - [a-zA-Z] +! OK - [A-Z] +! OK - \C[^A-Z]\+ +! OK - xx \ze test +! OK - abc\zeend +! OK - abc\zsdd +! OK - aa \zsax +! OK - abc \zsmatch\ze abc +! OK - \v(a \zsif .*){2} +! OK - abc\@= +! OK - abc\@=cd +! OK - abc\@= +! OK - abcd\@=e +! OK - abcd\@=e +! OK - \v(abc)@=.. +! OK - \(.*John\)\@=.*Bob +! OK - \(John.*\)\@=.*Bob +! OK - .*John\&.*Bob +! OK - .*John\&.*Bob +! OK - \v(test1)@=.*yep +! OK - [[:alpha:]]\{-2,6} +! OK - [^[=a=]]\+ +! OK - +! OK - \v(()) +! OK - \v%(ab(xyz)c) +! OK - \v(test|)empty +! OK - \v(a|aa)(a|aa) +! 192.168.0.1 +*** ../vim-7.3.969/Filelist 2013-03-07 13:32:03.000000000 +0100 +--- Filelist 2013-05-17 19:25:39.000000000 +0200 +*************** +*** 57,62 **** +--- 57,63 ---- + src/popupmnu.c \ + src/quickfix.c \ + src/regexp.c \ ++ src/regexp_nfa.c \ + src/regexp.h \ + src/screen.c \ + src/search.c \ +*** ../vim-7.3.969/runtime/doc/pattern.txt 2011-07-20 17:58:14.000000000 +0200 +--- runtime/doc/pattern.txt 2013-05-17 22:57:02.000000000 +0200 +*************** +*** 21,27 **** + 10. Highlighting matches |match-highlight| + + ============================================================================== +! 1. Search commands *search-commands* *E486* + + */* + /{pattern}[/] Search forward for the [count]'th occurrence of +--- 21,27 ---- + 10. Highlighting matches |match-highlight| + + ============================================================================== +! 1. Search commands *search-commands* + + */* + /{pattern}[/] Search forward for the [count]'th occurrence of +*************** +*** 150,155 **** +--- 150,160 ---- + All matches for the last used search pattern will be highlighted if you set + the 'hlsearch' option. This can be suspended with the |:nohlsearch| command. + ++ When no match is found you get the error: *E486* Pattern not found ++ Note that for the |:global| command this behaves like a normal message, for Vi ++ compatibility. For the |:s| command the "e" flag can be used to avoid the ++ error message |:s_flags|. ++ + *search-offset* *{offset}* + These commands search for the specified pattern. With "/" and "?" an + additional offset may be given. There are two types of offsets: line offsets +*************** +*** 214,220 **** + the search, possibly in another direction or with another count. Note that + two patterns are remembered: One for 'normal' search commands and one for the + substitute command ":s". Each time an empty pattern is given, the previously +! used pattern is used. + + The 'magic' option sticks with the last used pattern. If you change 'magic', + this will not change how the last used pattern will be interpreted. +--- 219,226 ---- + the search, possibly in another direction or with another count. Note that + two patterns are remembered: One for 'normal' search commands and one for the + substitute command ":s". Each time an empty pattern is given, the previously +! used pattern is used. However, if there is no previous search command, a +! previous substitute pattern is used, if possible. + + The 'magic' option sticks with the last used pattern. If you change 'magic', + this will not change how the last used pattern will be interpreted. +*************** +*** 344,349 **** +--- 350,376 ---- + or \z( pattern \) |/\z(| + + ++ */\%#=* *two-engines* ++ Vim includes two regexp engines: ++ 1. An old, backtracking engine that supports everything. ++ 2. A new, NFA engine that works much faster on some patterns, but does not ++ support everything. ++ ++ Vim will automatically select the right engine for you. However, if you run ++ into a problem or want to specifically select one engine or the other, you can ++ prepend one of the following to the pattern: ++ ++ \%#=0 Force automatic selection. Only has an effect when ++ 'regexpengine' has been set to a non-zero value. ++ \%#=1 Force using the old engine. ++ \%#=2 Force using the NFA engine. ++ ++ You can also use the 'regexpengine' option to change the default. ++ ++ *E864* *E868* *E874* *E875* *E876* *E877* *E878* ++ If selecting the NFA engine and it runs into something that is not implemented ++ the pattern will not match. This is only useful when debugging Vim. ++ + ============================================================================== + 3. Magic */magic* + +*************** +*** 390,398 **** + + ============================================================================== + 4. Overview of pattern items *pattern-overview* + + Overview of multi items. */multi* *E61* *E62* +! More explanation and examples below, follow the links. *E64* + + multi ~ + 'magic' 'nomagic' matches of the preceding atom ~ +--- 417,426 ---- + + ============================================================================== + 4. Overview of pattern items *pattern-overview* ++ *E865* *E866* *E867* *E869* + + Overview of multi items. */multi* *E61* *E62* +! More explanation and examples below, follow the links. *E64* *E871* + + multi ~ + 'magic' 'nomagic' matches of the preceding atom ~ +*************** +*** 498,513 **** + x x a character with no special meaning matches itself + + |/[]| [] \[] any character specified inside the [] +! |/\%[]| \%[] \%[] a sequence of optionally matched atoms + + |/\c| \c \c ignore case, do not use the 'ignorecase' option + |/\C| \C \C match case, do not use the 'ignorecase' option + |/\m| \m \m 'magic' on for the following chars in the pattern + |/\M| \M \M 'magic' off for the following chars in the pattern + |/\v| \v \v the following chars in the pattern are "very magic" + |/\V| \V \V the following chars in the pattern are "very nomagic" +! |/\Z| \Z \Z ignore differences in Unicode "combining characters". +! Useful when searching voweled Hebrew or Arabic text. + + |/\%d| \%d \%d match specified decimal character (eg \%d123) + |/\%x| \%x \%x match specified hex character (eg \%x2a) +--- 526,543 ---- + x x a character with no special meaning matches itself + + |/[]| [] \[] any character specified inside the [] +! |/\%[]| \%[] \%[] a sequence of optionally matched atoms + + |/\c| \c \c ignore case, do not use the 'ignorecase' option + |/\C| \C \C match case, do not use the 'ignorecase' option ++ |/\Z| \Z \Z ignore differences in Unicode "combining characters". ++ Useful when searching voweled Hebrew or Arabic text. ++ + |/\m| \m \m 'magic' on for the following chars in the pattern + |/\M| \M \M 'magic' off for the following chars in the pattern + |/\v| \v \v the following chars in the pattern are "very magic" + |/\V| \V \V the following chars in the pattern are "very nomagic" +! |/\%#=| \%#=1 \%#=1 select regexp engine |/zero-width| + + |/\%d| \%d \%d match specified decimal character (eg \%d123) + |/\%x| \%x \%x match specified hex character (eg \%x2a) +*************** +*** 575,581 **** + \? Just like \=. Cannot be used when searching backwards with the "?" + command. {not in Vi} + +! */\{* *E58* *E60* *E554* + \{n,m} Matches n to m of the preceding atom, as many as possible + \{n} Matches n of the preceding atom + \{n,} Matches at least n of the preceding atom, as many as possible +--- 605,611 ---- + \? Just like \=. Cannot be used when searching backwards with the "?" + command. {not in Vi} + +! */\{* *E58* *E60* *E554* *E870* + \{n,m} Matches n to m of the preceding atom, as many as possible + \{n} Matches n of the preceding atom + \{n,} Matches at least n of the preceding atom, as many as possible +*************** +*** 631,647 **** + */\@!* + \@! Matches with zero width if the preceding atom does NOT match at the + current position. |/zero-width| {not in Vi} +! Like '(?!pattern)" in Perl. + Example matches ~ + foo\(bar\)\@! any "foo" not followed by "bar" +! a.\{-}p\@! "a", "ap", "app", etc. not followed by a "p" + if \(\(then\)\@!.\)*$ "if " not followed by "then" + + Using "\@!" is tricky, because there are many places where a pattern + does not match. "a.*p\@!" will match from an "a" to the end of the + line, because ".*" can match all characters in the line and the "p" + doesn't match at the end of the line. "a.\{-}p\@!" will match any +! "a", "ap", "aap", etc. that isn't followed by a "p", because the "." + can match a "p" and "p\@!" doesn't match after that. + + You can't use "\@!" to look for a non-match before the matching +--- 661,678 ---- + */\@!* + \@! Matches with zero width if the preceding atom does NOT match at the + current position. |/zero-width| {not in Vi} +! Like "(?!pattern)" in Perl. + Example matches ~ + foo\(bar\)\@! any "foo" not followed by "bar" +! a.\{-}p\@! "a", "ap", "app", "appp", etc. not immediately +! followed by a "p" + if \(\(then\)\@!.\)*$ "if " not followed by "then" + + Using "\@!" is tricky, because there are many places where a pattern + does not match. "a.*p\@!" will match from an "a" to the end of the + line, because ".*" can match all characters in the line and the "p" + doesn't match at the end of the line. "a.\{-}p\@!" will match any +! "a", "ap", "app", etc. that isn't followed by a "p", because the "." + can match a "p" and "p\@!" doesn't match after that. + + You can't use "\@!" to look for a non-match before the matching +*************** +*** 650,659 **** + "foobar" you could use "\(foo\)\@!...bar", but that doesn't match a + bar at the start of a line. Use "\(foo\)\@ ++ /^\%(.*bar\)\@!.*\zsfoo ++ < This pattern first checks that there is not a single position in the ++ line where "bar" matches. If ".*bar" matches somewhere the \@! will ++ reject the pattern. When there is no match any "foo" will be found. ++ The "\zs" is to have the match start just before "foo". ++ + */\@<=* + \@<= Matches with zero width if the preceding atom matches just before what + follows. |/zero-width| {not in Vi} +! Like "(?<=pattern)" in Perl, but Vim allows non-fixed-width patterns. + Example matches ~ + \(an\_s\+\)\@<=file "file" after "an" and white space or an + end-of-line +*************** +*** 677,683 **** + before what follows. Thus this matches if there is no position in the + current or previous line where the atom matches such that it ends just + before what follows. |/zero-width| {not in Vi} +! Like '(? +*** ../vim-7.3.969/src/option.c 2013-05-11 13:45:00.000000000 +0200 +--- src/option.c 2013-05-17 22:23:14.000000000 +0200 +*************** +*** 2077,2082 **** +--- 2077,2085 ---- + (char_u *)NULL, PV_NONE, + #endif + {(char_u *)2000L, (char_u *)0L} SCRIPTID_INIT}, ++ {"regexpengine", "re", P_NUM|P_VI_DEF, ++ (char_u *)&p_re, PV_NONE, ++ {(char_u *)0L, (char_u *)0L} SCRIPTID_INIT}, + {"relativenumber", "rnu", P_BOOL|P_VI_DEF|P_RWIN, + (char_u *)VAR_WIN, PV_RNU, + {(char_u *)FALSE, (char_u *)0L} SCRIPTID_INIT}, +*************** +*** 8604,8609 **** +--- 8607,8617 ---- + errmsg = e_positive; + p_hi = 0; + } ++ if (p_re < 0 || p_re > 2) ++ { ++ errmsg = e_invarg; ++ p_re = 0; ++ } + if (p_report < 0) + { + errmsg = e_positive; +*** ../vim-7.3.969/src/option.h 2013-03-19 16:46:59.000000000 +0100 +--- src/option.h 2013-05-17 22:23:39.000000000 +0200 +*************** +*** 653,658 **** +--- 653,659 ---- + EXTERN long p_rdt; /* 'redrawtime' */ + #endif + EXTERN int p_remap; /* 'remap' */ ++ EXTERN long p_re; /* 'regexpengine' */ + EXTERN long p_report; /* 'report' */ + #if defined(FEAT_WINDOWS) && defined(FEAT_QUICKFIX) + EXTERN long p_pvh; /* 'previewheight' */ +*** ../vim-7.3.969/src/testdir/test95.in 2013-05-19 19:10:10.000000000 +0200 +--- src/testdir/test95.in 2013-05-18 15:11:43.000000000 +0200 +*************** +*** 0 **** +--- 1,63 ---- ++ Test for regexp patterns with multi-byte support. ++ See test64 for the non-multi-byte tests. ++ ++ A pattern that gives the expected result produces OK, so that we know it was ++ actually tried. ++ ++ STARTTEST ++ :so small.vim ++ :so mbyte.vim ++ :" tl is a List of Lists with: ++ :" regexp pattern ++ :" text to test the pattern on ++ :" expected match (optional) ++ :" expected submatch 1 (optional) ++ :" expected submatch 2 (optional) ++ :" etc. ++ :" When there is no match use only the first two items. ++ :let tl = [] ++ ++ :"""" Multi-byte character tests. These will fail unless vim is compiled ++ :"""" with Multibyte (FEAT_MBYTE) or BIG/HUGE features. ++ :call add(tl, ['[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna']) ++ :call add(tl, ['[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes ++ :call add(tl, ['[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos']) ++ :call add(tl, [' [^ ]\+', 'start มabcdม ', ' มabcdม']) ++ :call add(tl, ['[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna']) ++ ++ :"""" Run the tests ++ ++ :" ++ :for t in tl ++ : let l = matchlist(t[1], t[0]) ++ :" check the match itself ++ : if len(l) == 0 && len(t) > 2 ++ : $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", did not match, expected: \"' . t[2] . '\"' ++ : elseif len(l) > 0 && len(t) == 2 ++ : $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", match: \"' . l[0] . '\", expected no match' ++ : elseif len(t) > 2 && l[0] != t[2] ++ : $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", match: \"' . l[0] . '\", expected: \"' . t[2] . '\"' ++ : else ++ : $put ='OK - ' . t[0] ++ : endif ++ : if len(l) > 0 ++ :" check all the nine submatches ++ : for i in range(1, 9) ++ : if len(t) <= i + 2 ++ : let e = '' ++ : else ++ : let e = t[i + 2] ++ : endif ++ : if l[i] != e ++ : $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", submatch ' . i . ': \"' . l[i] . '\", expected: \"' . e . '\"' ++ : endif ++ : endfor ++ : unlet i ++ : endif ++ :endfor ++ :unlet t tl e l ++ ++ :/\%#=1^Results/,$wq! test.out ++ ENDTEST ++ ++ Results of test95: +*** ../vim-7.3.969/src/testdir/test95.ok 2013-05-19 19:10:10.000000000 +0200 +--- src/testdir/test95.ok 2013-05-18 14:30:49.000000000 +0200 +*************** +*** 0 **** +--- 1,6 ---- ++ Results of test95: ++ OK - [[:alpha:][=a=]]\+ ++ OK - [[=a=]]\+ ++ OK - [^ม ]\+ ++ OK - [^ ]\+ ++ OK - [ม[:alpha:][=a=]]\+ +*** ../vim-7.3.969/src/testdir/Make_amiga.mak 2013-04-12 13:44:49.000000000 +0200 +--- src/testdir/Make_amiga.mak 2013-05-18 14:21:35.000000000 +0200 +*************** +*** 33,39 **** + test76.out test77.out test78.out test79.out test80.out \ + test81.out test82.out test83.out test84.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out + + .SUFFIXES: .in .out + +--- 33,39 ---- + test76.out test77.out test78.out test79.out test80.out \ + test81.out test82.out test83.out test84.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out test95.out + + .SUFFIXES: .in .out + +*************** +*** 144,146 **** +--- 144,147 ---- + test92.out: test92.in + test93.out: test93.in + test94.out: test94.in ++ test95.out: test95.in +*** ../vim-7.3.969/src/testdir/Make_dos.mak 2013-04-12 13:44:49.000000000 +0200 +--- src/testdir/Make_dos.mak 2013-05-18 14:21:47.000000000 +0200 +*************** +*** 32,38 **** + test79.out test80.out test81.out test82.out test83.out \ + test84.out test85.out test86.out test87.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out + + SCRIPTS32 = test50.out test70.out + +--- 32,38 ---- + test79.out test80.out test81.out test82.out test83.out \ + test84.out test85.out test86.out test87.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out test95.out + + SCRIPTS32 = test50.out test70.out + +*** ../vim-7.3.969/src/testdir/Make_ming.mak 2013-04-12 13:44:49.000000000 +0200 +--- src/testdir/Make_ming.mak 2013-05-18 14:21:56.000000000 +0200 +*************** +*** 52,58 **** + test79.out test80.out test81.out test82.out test83.out \ + test84.out test85.out test86.out test87.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out + + SCRIPTS32 = test50.out test70.out + +--- 52,58 ---- + test79.out test80.out test81.out test82.out test83.out \ + test84.out test85.out test86.out test87.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out test95.out + + SCRIPTS32 = test50.out test70.out + +*** ../vim-7.3.969/src/testdir/Make_os2.mak 2013-04-12 13:44:49.000000000 +0200 +--- src/testdir/Make_os2.mak 2013-05-18 14:22:00.000000000 +0200 +*************** +*** 33,39 **** + test76.out test77.out test78.out test79.out test80.out \ + test81.out test82.out test83.out test84.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out + + .SUFFIXES: .in .out + +--- 33,39 ---- + test76.out test77.out test78.out test79.out test80.out \ + test81.out test82.out test83.out test84.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out test95.out + + .SUFFIXES: .in .out + +*** ../vim-7.3.969/src/testdir/Make_vms.mms 2013-04-12 13:44:49.000000000 +0200 +--- src/testdir/Make_vms.mms 2013-05-18 14:22:11.000000000 +0200 +*************** +*** 4,10 **** + # Authors: Zoltan Arpadffy, + # Sandor Kopanyi, + # +! # Last change: 2013 Apr 12 + # + # This has been tested on VMS 6.2 to 8.3 on DEC Alpha, VAX and IA64. + # Edit the lines in the Configuration section below to select. +--- 4,10 ---- + # Authors: Zoltan Arpadffy, + # Sandor Kopanyi, + # +! # Last change: 2013 May 18 + # + # This has been tested on VMS 6.2 to 8.3 on DEC Alpha, VAX and IA64. + # Edit the lines in the Configuration section below to select. +*************** +*** 77,83 **** + test71.out test72.out test74.out test75.out test76.out \ + test77.out test78.out test79.out test80.out test81.out \ + test82.out test83.out test84.out test88.out test89.out \ +! test90.out test91.out test92.out test93.out test94.out + + # Known problems: + # Test 30: a problem around mac format - unknown reason +--- 77,84 ---- + test71.out test72.out test74.out test75.out test76.out \ + test77.out test78.out test79.out test80.out test81.out \ + test82.out test83.out test84.out test88.out test89.out \ +! test90.out test91.out test92.out test93.out test94.out \ +! test95.out + + # Known problems: + # Test 30: a problem around mac format - unknown reason +*** ../vim-7.3.969/src/testdir/Makefile 2013-04-24 12:56:13.000000000 +0200 +--- src/testdir/Makefile 2013-05-18 14:22:50.000000000 +0200 +*************** +*** 29,35 **** + test79.out test80.out test81.out test82.out test83.out \ + test84.out test85.out test86.out test87.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out + + SCRIPTS_GUI = test16.out + +--- 29,35 ---- + test79.out test80.out test81.out test82.out test83.out \ + test84.out test85.out test86.out test87.out test88.out \ + test89.out test90.out test91.out test92.out test93.out \ +! test94.out test95.out + + SCRIPTS_GUI = test16.out + +*************** +*** 85,97 **** + fi" + + # Check if the test.out file matches test.ok. +! @/bin/sh -c "if test -f test.out; then\ + if diff test.out $*.ok; \ + then mv -f test.out $*.out; \ + else echo $* FAILED >>test.log; mv -f test.out $*.failed; \ + fi \ + else echo $* NO OUTPUT >>test.log; \ + fi" + -rm -rf X* test.ok viminfo + + test49.out: test49.vim +--- 85,100 ---- + fi" + + # Check if the test.out file matches test.ok. +! @/bin/sh -c "if test -f test.out; then \ + if diff test.out $*.ok; \ + then mv -f test.out $*.out; \ + else echo $* FAILED >>test.log; mv -f test.out $*.failed; \ + fi \ + else echo $* NO OUTPUT >>test.log; \ + fi" ++ @/bin/sh -c "if test -f valgrind; then\ ++ mv -f valgrind valgrind.$*; \ ++ fi" + -rm -rf X* test.ok viminfo + + test49.out: test49.vim +*** ../vim-7.3.969/src/version.c 2013-05-18 20:55:31.000000000 +0200 +--- src/version.c 2013-05-19 19:10:25.000000000 +0200 +*************** +*** 730,731 **** +--- 730,733 ---- + { /* Add new patch number below this line */ ++ /**/ ++ 970, + /**/ + +-- +BRIDGEKEEPER: What is the air-speed velocity of an unladen swallow? +ARTHUR: What do you mean? An African or European swallow? +BRIDGEKEEPER: Er ... I don't know that ... Aaaaarrrrrrggghhh! + BRIDGEKEEPER is cast into the gorge. + "Monty Python and the Holy Grail" PYTHON (MONTY) PICTURES LTD + + /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ +/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ +\\\ an exciting new programming language -- http://www.Zimbu.org /// + \\\ help me help AIDS victims -- http://ICCF-Holland.org ///