diff --git a/7.3.1029 b/7.3.1029 new file mode 100644 index 0000000..dcc59d2 --- /dev/null +++ b/7.3.1029 @@ -0,0 +1,404 @@ +To: vim_dev@googlegroups.com +Subject: Patch 7.3.1029 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.3.1029 +Problem: New regexp performance: Unused position state being copied. +Solution: Keep track of which positions are actually valid. +Files: src/regexp_nfa.c + + +*** ../vim-7.3.1028/src/regexp_nfa.c 2013-05-26 21:47:22.000000000 +0200 +--- src/regexp_nfa.c 2013-05-26 22:45:26.000000000 +0200 +*************** +*** 1649,1670 **** + return OK; + } + +- typedef union +- { +- struct multipos +- { +- lpos_T start; +- lpos_T end; +- } multilist[NSUBEXP]; +- struct linepos +- { +- char_u *start; +- char_u *end; +- } linelist[NSUBEXP]; +- } regsub_T; +- +- static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m)); +- + #ifdef DEBUG + static char_u code[50]; + +--- 1649,1654 ---- +*************** +*** 2489,2494 **** +--- 2473,2498 ---- + * NFA execution code. + ****************************************************************/ + ++ typedef struct ++ { ++ int in_use; /* number of subexpr with useful info */ ++ ++ /* When REG_MULTI is TRUE multilist is used, otherwise linelist. */ ++ union ++ { ++ struct multipos ++ { ++ lpos_T start; ++ lpos_T end; ++ } multilist[NSUBEXP]; ++ struct linepos ++ { ++ char_u *start; ++ char_u *end; ++ } linelist[NSUBEXP]; ++ }; ++ } regsub_T; ++ + /* nfa_thread_T contains execution information of a NFA state */ + typedef struct + { +*************** +*** 2507,2513 **** + static int nfa_match; + + static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid)); +- + static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *ip)); + + static void +--- 2511,2516 ---- +*************** +*** 2521,2527 **** +--- 2524,2532 ---- + int subidx; + nfa_thread_T *lastthread; + lpos_T save_lpos; ++ int save_in_use; + char_u *save_ptr; ++ int i; + + if (l == NULL || state == NULL) + return; +*************** +*** 2557,2572 **** + state->lastlist = lid; + lastthread = &l->t[l->n++]; + lastthread->state = state; +! +! /* Copy the match start and end positions. */ +! if (REG_MULTI) +! mch_memmove(&lastthread->sub.multilist[0], +! &m->multilist[0], +! sizeof(struct multipos) * nfa_nsubexpr); +! else +! mch_memmove(&lastthread->sub.linelist[0], +! &m->linelist[0], +! sizeof(struct linepos) * nfa_nsubexpr); + } + } + +--- 2562,2580 ---- + state->lastlist = lid; + lastthread = &l->t[l->n++]; + lastthread->state = state; +! lastthread->sub.in_use = m->in_use; +! if (m->in_use > 0) +! { +! /* Copy the match start and end positions. */ +! if (REG_MULTI) +! mch_memmove(&lastthread->sub.multilist[0], +! &m->multilist[0], +! sizeof(struct multipos) * m->in_use); +! else +! mch_memmove(&lastthread->sub.linelist[0], +! &m->linelist[0], +! sizeof(struct linepos) * m->in_use); +! } + } + } + +*************** +*** 2636,2644 **** + else + subidx = state->c - NFA_MOPEN; + + if (REG_MULTI) + { +! save_lpos = m->multilist[subidx].start; + if (off == -1) + { + m->multilist[subidx].start.lnum = reglnum + 1; +--- 2644,2668 ---- + else + subidx = state->c - NFA_MOPEN; + ++ /* Set the position (with "off") in the subexpression. Save and ++ * restore it when it was in use. Otherwise fill any gap. */ + if (REG_MULTI) + { +! if (subidx < m->in_use) +! { +! save_lpos = m->multilist[subidx].start; +! save_in_use = -1; +! } +! else +! { +! save_in_use = m->in_use; +! for (i = m->in_use; i < subidx; ++i) +! { +! m->multilist[i].start.lnum = -1; +! m->multilist[i].end.lnum = -1; +! } +! m->in_use = subidx + 1; +! } + if (off == -1) + { + m->multilist[subidx].start.lnum = reglnum + 1; +*************** +*** 2653,2668 **** + } + else + { +! save_ptr = m->linelist[subidx].start; + m->linelist[subidx].start = reginput + off; + } + + addstate(l, state->out, m, off, lid); + +! if (REG_MULTI) +! m->multilist[subidx].start = save_lpos; + else +! m->linelist[subidx].start = save_ptr; + break; + + case NFA_MCLOSE + 0: +--- 2677,2711 ---- + } + else + { +! if (subidx < m->in_use) +! { +! save_ptr = m->linelist[subidx].start; +! save_in_use = -1; +! } +! else +! { +! save_in_use = m->in_use; +! for (i = m->in_use; i < subidx; ++i) +! { +! m->linelist[i].start = NULL; +! m->linelist[i].end = NULL; +! } +! m->in_use = subidx + 1; +! } + m->linelist[subidx].start = reginput + off; + } + + addstate(l, state->out, m, off, lid); + +! if (save_in_use == -1) +! { +! if (REG_MULTI) +! m->multilist[subidx].start = save_lpos; +! else +! m->linelist[subidx].start = save_ptr; +! } + else +! m->in_use = save_in_use; + break; + + case NFA_MCLOSE + 0: +*************** +*** 2686,2691 **** +--- 2729,2739 ---- + else + subidx = state->c - NFA_MCLOSE; + ++ /* We don't fill in gaps here, there must have been an MOPEN that ++ * has done that. */ ++ save_in_use = m->in_use; ++ if (m->in_use <= subidx) ++ m->in_use = subidx + 1; + if (REG_MULTI) + { + save_lpos = m->multilist[subidx].end; +*************** +*** 2713,2718 **** +--- 2761,2767 ---- + m->multilist[subidx].end = save_lpos; + else + m->linelist[subidx].end = save_ptr; ++ m->in_use = save_in_use; + break; + } + } +*************** +*** 2917,2922 **** +--- 2966,2973 ---- + } + } + ++ static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m)); ++ + /* + * Main matching routine. + * +*************** +*** 2960,2966 **** + #endif + nfa_match = FALSE; + +! /* Allocate memory for the lists of nodes */ + size = (nstate + 1) * sizeof(nfa_thread_T); + list[0].t = (nfa_thread_T *)lalloc(size, TRUE); + list[1].t = (nfa_thread_T *)lalloc(size, TRUE); +--- 3011,3017 ---- + #endif + nfa_match = FALSE; + +! /* Allocate memory for the lists of nodes. */ + size = (nstate + 1) * sizeof(nfa_thread_T); + list[0].t = (nfa_thread_T *)lalloc(size, TRUE); + list[1].t = (nfa_thread_T *)lalloc(size, TRUE); +*************** +*** 3099,3105 **** + { + case NFA_MATCH: + nfa_match = TRUE; +! *submatch = t->sub; + #ifdef ENABLE_LOG + for (j = 0; j < 4; j++) + if (REG_MULTI) +--- 3150,3168 ---- + { + case NFA_MATCH: + nfa_match = TRUE; +! submatch->in_use = t->sub.in_use; +! if (REG_MULTI) +! for (j = 0; j < submatch->in_use; j++) +! { +! submatch->multilist[j].start = t->sub.multilist[j].start; +! submatch->multilist[j].end = t->sub.multilist[j].end; +! } +! else +! for (j = 0; j < submatch->in_use; j++) +! { +! submatch->linelist[j].start = t->sub.linelist[j].start; +! submatch->linelist[j].end = t->sub.linelist[j].end; +! } + #ifdef ENABLE_LOG + for (j = 0; j < 4; j++) + if (REG_MULTI) +*************** +*** 3194,3210 **** + reglnum = old_reglnum; + /* Copy submatch info from the recursive call */ + if (REG_MULTI) +! for (j = 1; j < nfa_nsubexpr; j++) + { + t->sub.multilist[j].start = m->multilist[j].start; + t->sub.multilist[j].end = m->multilist[j].end; + } + else +! for (j = 1; j < nfa_nsubexpr; j++) + { + t->sub.linelist[j].start = m->linelist[j].start; + t->sub.linelist[j].end = m->linelist[j].end; + } + /* t->state->out1 is the corresponding END_INVISIBLE node */ + addstate_here(thislist, t->state->out1->out, &t->sub, + listid, &listidx); +--- 3257,3275 ---- + reglnum = old_reglnum; + /* Copy submatch info from the recursive call */ + if (REG_MULTI) +! for (j = 1; j < m->in_use; j++) + { + t->sub.multilist[j].start = m->multilist[j].start; + t->sub.multilist[j].end = m->multilist[j].end; + } + else +! for (j = 1; j < m->in_use; j++) + { + t->sub.linelist[j].start = m->linelist[j].start; + t->sub.linelist[j].end = m->linelist[j].end; + } ++ t->sub.in_use = m->in_use; ++ + /* t->state->out1 is the corresponding END_INVISIBLE node */ + addstate_here(thislist, t->state->out1->out, &t->sub, + listid, &listidx); +*************** +*** 3703,3708 **** +--- 3768,3775 ---- + vim_memset(sub.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr); + vim_memset(m.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr); + } ++ sub.in_use = 0; ++ m.in_use = 0; + + if (nfa_regmatch(start, &sub, &m) == FALSE) + return 0; +*************** +*** 3710,3716 **** + cleanup_subexpr(); + if (REG_MULTI) + { +! for (i = 0; i < nfa_nsubexpr; i++) + { + reg_startpos[i] = sub.multilist[i].start; + reg_endpos[i] = sub.multilist[i].end; +--- 3777,3783 ---- + cleanup_subexpr(); + if (REG_MULTI) + { +! for (i = 0; i < sub.in_use; i++) + { + reg_startpos[i] = sub.multilist[i].start; + reg_endpos[i] = sub.multilist[i].end; +*************** +*** 3732,3738 **** + } + else + { +! for (i = 0; i < nfa_nsubexpr; i++) + { + reg_startp[i] = sub.linelist[i].start; + reg_endp[i] = sub.linelist[i].end; +--- 3799,3805 ---- + } + else + { +! for (i = 0; i < sub.in_use; i++) + { + reg_startp[i] = sub.linelist[i].start; + reg_endp[i] = sub.linelist[i].end; +*** ../vim-7.3.1028/src/version.c 2013-05-26 21:47:22.000000000 +0200 +--- src/version.c 2013-05-26 22:53:55.000000000 +0200 +*************** +*** 730,731 **** +--- 730,733 ---- + { /* Add new patch number below this line */ ++ /**/ ++ 1029, + /**/ + +-- +I used to be indecisive, now I'm not sure. + + /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ +/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ +\\\ an exciting new programming language -- http://www.Zimbu.org /// + \\\ help me help AIDS victims -- http://ICCF-Holland.org ///