| To: vim_dev@googlegroups.com |
| Subject: Patch 7.3.1029 |
| Fcc: outbox |
| From: Bram Moolenaar <Bram@moolenaar.net> |
| Mime-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| |
| Patch 7.3.1029 |
| Problem: New regexp performance: Unused position state being copied. |
| Solution: Keep track of which positions are actually valid. |
| Files: src/regexp_nfa.c |
| |
| |
| |
| |
| |
| *** 1649,1670 **** |
| return OK; |
| } |
| |
| - typedef union |
| - { |
| - struct multipos |
| - { |
| - lpos_T start; |
| - lpos_T end; |
| - } multilist[NSUBEXP]; |
| - struct linepos |
| - { |
| - char_u *start; |
| - char_u *end; |
| - } linelist[NSUBEXP]; |
| - } regsub_T; |
| - |
| - static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m)); |
| - |
| #ifdef DEBUG |
| static char_u code[50]; |
| |
| --- 1649,1654 ---- |
| |
| *** 2489,2494 **** |
| --- 2473,2498 ---- |
| * NFA execution code. |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| *** 2507,2513 **** |
| static int nfa_match; |
| |
| static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid)); |
| - |
| static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *ip)); |
| |
| static void |
| --- 2511,2516 ---- |
| |
| *** 2521,2527 **** |
| --- 2524,2532 ---- |
| int subidx; |
| nfa_thread_T *lastthread; |
| lpos_T save_lpos; |
| + int save_in_use; |
| char_u *save_ptr; |
| + int i; |
| |
| if (l == NULL || state == NULL) |
| return; |
| |
| *** 2557,2572 **** |
| state->lastlist = lid; |
| lastthread = &l->t[l->n++]; |
| lastthread->state = state; |
| ! |
| ! /* Copy the match start and end positions. */ |
| ! if (REG_MULTI) |
| ! mch_memmove(&lastthread->sub.multilist[0], |
| ! &m->multilist[0], |
| ! sizeof(struct multipos) * nfa_nsubexpr); |
| ! else |
| ! mch_memmove(&lastthread->sub.linelist[0], |
| ! &m->linelist[0], |
| ! sizeof(struct linepos) * nfa_nsubexpr); |
| } |
| } |
| |
| --- 2562,2580 ---- |
| state->lastlist = lid; |
| lastthread = &l->t[l->n++]; |
| lastthread->state = state; |
| ! lastthread->sub.in_use = m->in_use; |
| ! if (m->in_use > 0) |
| ! { |
| ! /* Copy the match start and end positions. */ |
| ! if (REG_MULTI) |
| ! mch_memmove(&lastthread->sub.multilist[0], |
| ! &m->multilist[0], |
| ! sizeof(struct multipos) * m->in_use); |
| ! else |
| ! mch_memmove(&lastthread->sub.linelist[0], |
| ! &m->linelist[0], |
| ! sizeof(struct linepos) * m->in_use); |
| ! } |
| } |
| } |
| |
| |
| *** 2636,2644 **** |
| else |
| subidx = state->c - NFA_MOPEN; |
| |
| if (REG_MULTI) |
| { |
| ! save_lpos = m->multilist[subidx].start; |
| if (off == -1) |
| { |
| m->multilist[subidx].start.lnum = reglnum + 1; |
| --- 2644,2668 ---- |
| else |
| subidx = state->c - NFA_MOPEN; |
| |
| + /* Set the position (with "off") in the subexpression. Save and |
| + * restore it when it was in use. Otherwise fill any gap. */ |
| if (REG_MULTI) |
| { |
| ! if (subidx < m->in_use) |
| ! { |
| ! save_lpos = m->multilist[subidx].start; |
| ! save_in_use = -1; |
| ! } |
| ! else |
| ! { |
| ! save_in_use = m->in_use; |
| ! for (i = m->in_use; i < subidx; ++i) |
| ! { |
| ! m->multilist[i].start.lnum = -1; |
| ! m->multilist[i].end.lnum = -1; |
| ! } |
| ! m->in_use = subidx + 1; |
| ! } |
| if (off == -1) |
| { |
| m->multilist[subidx].start.lnum = reglnum + 1; |
| |
| *** 2653,2668 **** |
| } |
| else |
| { |
| ! save_ptr = m->linelist[subidx].start; |
| m->linelist[subidx].start = reginput + off; |
| } |
| |
| addstate(l, state->out, m, off, lid); |
| |
| ! if (REG_MULTI) |
| ! m->multilist[subidx].start = save_lpos; |
| else |
| ! m->linelist[subidx].start = save_ptr; |
| break; |
| |
| case NFA_MCLOSE + 0: |
| --- 2677,2711 ---- |
| } |
| else |
| { |
| ! if (subidx < m->in_use) |
| ! { |
| ! save_ptr = m->linelist[subidx].start; |
| ! save_in_use = -1; |
| ! } |
| ! else |
| ! { |
| ! save_in_use = m->in_use; |
| ! for (i = m->in_use; i < subidx; ++i) |
| ! { |
| ! m->linelist[i].start = NULL; |
| ! m->linelist[i].end = NULL; |
| ! } |
| ! m->in_use = subidx + 1; |
| ! } |
| m->linelist[subidx].start = reginput + off; |
| } |
| |
| addstate(l, state->out, m, off, lid); |
| |
| ! if (save_in_use == -1) |
| ! { |
| ! if (REG_MULTI) |
| ! m->multilist[subidx].start = save_lpos; |
| ! else |
| ! m->linelist[subidx].start = save_ptr; |
| ! } |
| else |
| ! m->in_use = save_in_use; |
| break; |
| |
| case NFA_MCLOSE + 0: |
| |
| *** 2686,2691 **** |
| --- 2729,2739 ---- |
| else |
| subidx = state->c - NFA_MCLOSE; |
| |
| + /* We don't fill in gaps here, there must have been an MOPEN that |
| + * has done that. */ |
| + save_in_use = m->in_use; |
| + if (m->in_use <= subidx) |
| + m->in_use = subidx + 1; |
| if (REG_MULTI) |
| { |
| save_lpos = m->multilist[subidx].end; |
| |
| *** 2713,2718 **** |
| --- 2761,2767 ---- |
| m->multilist[subidx].end = save_lpos; |
| else |
| m->linelist[subidx].end = save_ptr; |
| + m->in_use = save_in_use; |
| break; |
| } |
| } |
| |
| *** 2917,2922 **** |
| --- 2966,2973 ---- |
| } |
| } |
| |
| + static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m)); |
| + |
| /* |
| * Main matching routine. |
| * |
| |
| *** 2960,2966 **** |
| #endif |
| nfa_match = FALSE; |
| |
| ! /* Allocate memory for the lists of nodes */ |
| size = (nstate + 1) * sizeof(nfa_thread_T); |
| list[0].t = (nfa_thread_T *)lalloc(size, TRUE); |
| list[1].t = (nfa_thread_T *)lalloc(size, TRUE); |
| --- 3011,3017 ---- |
| #endif |
| nfa_match = FALSE; |
| |
| ! /* Allocate memory for the lists of nodes. */ |
| size = (nstate + 1) * sizeof(nfa_thread_T); |
| list[0].t = (nfa_thread_T *)lalloc(size, TRUE); |
| list[1].t = (nfa_thread_T *)lalloc(size, TRUE); |
| |
| *** 3099,3105 **** |
| { |
| case NFA_MATCH: |
| nfa_match = TRUE; |
| ! *submatch = t->sub; |
| #ifdef ENABLE_LOG |
| for (j = 0; j < 4; j++) |
| if (REG_MULTI) |
| --- 3150,3168 ---- |
| { |
| case NFA_MATCH: |
| nfa_match = TRUE; |
| ! submatch->in_use = t->sub.in_use; |
| ! if (REG_MULTI) |
| ! for (j = 0; j < submatch->in_use; j++) |
| ! { |
| ! submatch->multilist[j].start = t->sub.multilist[j].start; |
| ! submatch->multilist[j].end = t->sub.multilist[j].end; |
| ! } |
| ! else |
| ! for (j = 0; j < submatch->in_use; j++) |
| ! { |
| ! submatch->linelist[j].start = t->sub.linelist[j].start; |
| ! submatch->linelist[j].end = t->sub.linelist[j].end; |
| ! } |
| #ifdef ENABLE_LOG |
| for (j = 0; j < 4; j++) |
| if (REG_MULTI) |
| |
| *** 3194,3210 **** |
| reglnum = old_reglnum; |
| /* Copy submatch info from the recursive call */ |
| if (REG_MULTI) |
| ! for (j = 1; j < nfa_nsubexpr; j++) |
| { |
| t->sub.multilist[j].start = m->multilist[j].start; |
| t->sub.multilist[j].end = m->multilist[j].end; |
| } |
| else |
| ! for (j = 1; j < nfa_nsubexpr; j++) |
| { |
| t->sub.linelist[j].start = m->linelist[j].start; |
| t->sub.linelist[j].end = m->linelist[j].end; |
| } |
| /* t->state->out1 is the corresponding END_INVISIBLE node */ |
| addstate_here(thislist, t->state->out1->out, &t->sub, |
| listid, &listidx); |
| --- 3257,3275 ---- |
| reglnum = old_reglnum; |
| /* Copy submatch info from the recursive call */ |
| if (REG_MULTI) |
| ! for (j = 1; j < m->in_use; j++) |
| { |
| t->sub.multilist[j].start = m->multilist[j].start; |
| t->sub.multilist[j].end = m->multilist[j].end; |
| } |
| else |
| ! for (j = 1; j < m->in_use; j++) |
| { |
| t->sub.linelist[j].start = m->linelist[j].start; |
| t->sub.linelist[j].end = m->linelist[j].end; |
| } |
| + t->sub.in_use = m->in_use; |
| + |
| /* t->state->out1 is the corresponding END_INVISIBLE node */ |
| addstate_here(thislist, t->state->out1->out, &t->sub, |
| listid, &listidx); |
| |
| *** 3703,3708 **** |
| --- 3768,3775 ---- |
| vim_memset(sub.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr); |
| vim_memset(m.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr); |
| } |
| + sub.in_use = 0; |
| + m.in_use = 0; |
| |
| if (nfa_regmatch(start, &sub, &m) == FALSE) |
| return 0; |
| |
| *** 3710,3716 **** |
| cleanup_subexpr(); |
| if (REG_MULTI) |
| { |
| ! for (i = 0; i < nfa_nsubexpr; i++) |
| { |
| reg_startpos[i] = sub.multilist[i].start; |
| reg_endpos[i] = sub.multilist[i].end; |
| --- 3777,3783 ---- |
| cleanup_subexpr(); |
| if (REG_MULTI) |
| { |
| ! for (i = 0; i < sub.in_use; i++) |
| { |
| reg_startpos[i] = sub.multilist[i].start; |
| reg_endpos[i] = sub.multilist[i].end; |
| |
| *** 3732,3738 **** |
| } |
| else |
| { |
| ! for (i = 0; i < nfa_nsubexpr; i++) |
| { |
| reg_startp[i] = sub.linelist[i].start; |
| reg_endp[i] = sub.linelist[i].end; |
| --- 3799,3805 ---- |
| } |
| else |
| { |
| ! for (i = 0; i < sub.in_use; i++) |
| { |
| reg_startp[i] = sub.linelist[i].start; |
| reg_endp[i] = sub.linelist[i].end; |
| |
| |
| |
| *** 730,731 **** |
| --- 730,733 ---- |
| { /* Add new patch number below this line */ |
| + /**/ |
| + 1029, |
| /**/ |
| |
| -- |
| I used to be indecisive, now I'm not sure. |
| |
| /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ |
| /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ |
| \\\ an exciting new programming language -- http://www.Zimbu.org /// |
| \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |