To: vim_dev@googlegroups.com Subject: Patch 7.3.1106 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.3.1106 Problem: New regexp engine: saving and restoring lastlist in the states takes a lot of time. Solution: Use a second lastlist value for the first recursive call. Files: src/regexp.h, src/regexp_nfa.c *** ../vim-7.3.1105/src/regexp.h 2013-06-02 15:55:52.000000000 +0200 --- src/regexp.h 2013-06-03 11:29:26.000000000 +0200 *************** *** 72,78 **** nfa_state_T *out; nfa_state_T *out1; int id; ! int lastlist; int negated; int val; }; --- 72,78 ---- nfa_state_T *out; nfa_state_T *out1; int id; ! int lastlist[2]; /* 0: normal, 1: recursive */ int negated; int val; }; *** ../vim-7.3.1105/src/regexp_nfa.c 2013-06-02 22:37:39.000000000 +0200 --- src/regexp_nfa.c 2013-06-03 12:15:17.000000000 +0200 *************** *** 255,260 **** --- 255,269 ---- /* If not NULL match must end at this position */ static save_se_T *nfa_endp = NULL; + /* listid is global, so that it increases on recursive calls to + * nfa_regmatch(), which means we don't have to clear the lastlist field of + * all the states. */ + static int nfa_listid; + static int nfa_alt_listid; + + /* 0 for first call to nfa_regmatch(), 1 for recursive call. */ + static int nfa_ll_index = 0; + static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags)); static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); static int nfa_emit_equi_class __ARGS((int c, int neg)); *************** *** 2169,2175 **** s->out1 = out1; s->id = istate; ! s->lastlist = 0; s->negated = FALSE; return s; --- 2178,2185 ---- s->out1 = out1; s->id = istate; ! s->lastlist[0] = 0; ! s->lastlist[1] = 0; s->negated = FALSE; return s; *************** *** 3113,3121 **** #endif /* These nodes do not need to be added, but we need to bail out * when it was tried to be added to this list before. */ ! if (state->lastlist == l->id) goto skip_add; ! state->lastlist = l->id; break; case NFA_BOL: --- 3123,3131 ---- #endif /* These nodes do not need to be added, but we need to bail out * when it was tried to be added to this list before. */ ! if (state->lastlist[nfa_ll_index] == l->id) goto skip_add; ! state->lastlist[nfa_ll_index] = l->id; break; case NFA_BOL: *************** *** 3131,3137 **** /* FALLTHROUGH */ default: ! if (state->lastlist == l->id) { /* This state is already in the list, don't add it again, * unless it is an MOPEN that is used for a backreference. */ --- 3141,3147 ---- /* FALLTHROUGH */ default: ! if (state->lastlist[nfa_ll_index] == l->id) { /* This state is already in the list, don't add it again, * unless it is an MOPEN that is used for a backreference. */ *************** *** 3173,3179 **** } /* add the state to the list */ ! state->lastlist = l->id; thread = &l->t[l->n++]; thread->state = state; copy_sub(&thread->subs.norm, &subs->norm); --- 3183,3189 ---- } /* add the state to the list */ ! state->lastlist[nfa_ll_index] = l->id; thread = &l->t[l->n++]; thread->state = state; copy_sub(&thread->subs.norm, &subs->norm); *************** *** 3616,3621 **** --- 3626,3632 ---- /* * Save list IDs for all NFA states of "prog" into "list". * Also reset the IDs to zero. + * Only used for the recursive value lastlist[1]. */ static void nfa_save_listids(prog, list) *************** *** 3629,3636 **** p = &prog->state[0]; for (i = prog->nstate; --i >= 0; ) { ! list[i] = p->lastlist; ! p->lastlist = 0; ++p; } } --- 3640,3647 ---- p = &prog->state[0]; for (i = prog->nstate; --i >= 0; ) { ! list[i] = p->lastlist[1]; ! p->lastlist[1] = 0; ++p; } } *************** *** 3649,3655 **** p = &prog->state[0]; for (i = prog->nstate; --i >= 0; ) { ! p->lastlist = list[i]; ++p; } } --- 3660,3666 ---- p = &prog->state[0]; for (i = prog->nstate; --i >= 0; ) { ! p->lastlist[1] = list[i]; ++p; } } *************** *** 3683,3692 **** --- 3694,3705 ---- char_u *save_regline = regline; int save_reglnum = reglnum; int save_nfa_match = nfa_match; + int save_nfa_listid = nfa_listid; save_se_T *save_nfa_endp = nfa_endp; save_se_T endpos; save_se_T *endposp = NULL; int result; + int need_restore = FALSE; if (state->c == NFA_START_INVISIBLE_BEFORE) { *************** *** 3745,3774 **** } } - /* Call nfa_regmatch() to check if the current concat matches - * at this position. The concat ends with the node - * NFA_END_INVISIBLE */ - if (*listids == NULL) - { - *listids = (int *)lalloc(sizeof(int) * nstate, TRUE); - if (*listids == NULL) - { - EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!")); - return 0; - } - } #ifdef ENABLE_LOG if (log_fd != stderr) fclose(log_fd); log_fd = NULL; #endif ! /* Have to clear the listid field of the NFA nodes, so that ! * nfa_regmatch() and addstate() can run properly after ! * recursion. */ ! nfa_save_listids(prog, *listids); nfa_endp = endposp; result = nfa_regmatch(prog, state->out, submatch, m); ! nfa_restore_listids(prog, *listids); /* restore position in input text */ reginput = save_reginput; --- 3758,3809 ---- } } #ifdef ENABLE_LOG if (log_fd != stderr) fclose(log_fd); log_fd = NULL; #endif ! /* Have to clear the lastlist field of the NFA nodes, so that ! * nfa_regmatch() and addstate() can run properly after recursion. */ ! if (nfa_ll_index == 1) ! { ! /* Already calling nfa_regmatch() recursively. Save the lastlist[1] ! * values and clear them. */ ! if (*listids == NULL) ! { ! *listids = (int *)lalloc(sizeof(int) * nstate, TRUE); ! if (*listids == NULL) ! { ! EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!")); ! return 0; ! } ! } ! nfa_save_listids(prog, *listids); ! need_restore = TRUE; ! /* any value of nfa_listid will do */ ! } ! else ! { ! /* First recursive nfa_regmatch() call, switch to the second lastlist ! * entry. Make sure nfa_listid is different from a previous recursive ! * call, because some states may still have this ID. */ ! ++nfa_ll_index; ! if (nfa_listid <= nfa_alt_listid) ! nfa_listid = nfa_alt_listid; ! } ! ! /* Call nfa_regmatch() to check if the current concat matches at this ! * position. The concat ends with the node NFA_END_INVISIBLE */ nfa_endp = endposp; result = nfa_regmatch(prog, state->out, submatch, m); ! ! if (need_restore) ! nfa_restore_listids(prog, *listids); ! else ! { ! --nfa_ll_index; ! nfa_alt_listid = nfa_listid; ! } /* restore position in input text */ reginput = save_reginput; *************** *** 3776,3781 **** --- 3811,3817 ---- reglnum = save_reglnum; nfa_match = save_nfa_match; nfa_endp = save_nfa_endp; + nfa_listid = save_nfa_listid; #ifdef ENABLE_LOG log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); *************** *** 3821,3827 **** nfa_list_T list[3]; nfa_list_T *listtbl[2][2]; nfa_list_T *ll; - int listid = 1; int listidx; nfa_list_T *thislist; nfa_list_T *nextlist; --- 3857,3862 ---- *************** *** 3875,3881 **** #ifdef ENABLE_LOG fprintf(log_fd, "(---) STARTSTATE\n"); #endif ! thislist->id = listid; addstate(thislist, start, m, 0); /* There are two cases when the NFA advances: 1. input char matches the --- 3910,3916 ---- #ifdef ENABLE_LOG fprintf(log_fd, "(---) STARTSTATE\n"); #endif ! thislist->id = nfa_listid + 1; addstate(thislist, start, m, 0); /* There are two cases when the NFA advances: 1. input char matches the *************** *** 3923,3932 **** nextlist = &list[flag ^= 1]; nextlist->n = 0; /* clear nextlist */ listtbl[1][0] = nextlist; ! ++listid; ! thislist->id = listid; ! nextlist->id = listid + 1; ! neglist->id = listid + 1; #ifdef ENABLE_LOG fprintf(log_fd, "------------------------------------------\n"); --- 3958,3967 ---- nextlist = &list[flag ^= 1]; nextlist->n = 0; /* clear nextlist */ listtbl[1][0] = nextlist; ! ++nfa_listid; ! thislist->id = nfa_listid; ! nextlist->id = nfa_listid + 1; ! neglist->id = nfa_listid + 1; #ifdef ENABLE_LOG fprintf(log_fd, "------------------------------------------\n"); *************** *** 4843,4848 **** --- 4878,4885 ---- nfa_has_zend = prog->has_zend; nfa_has_backref = prog->has_backref; nfa_nsubexpr = prog->nsubexp; + nfa_listid = 1; + nfa_alt_listid = 2; #ifdef DEBUG nfa_regengine.expr = prog->pattern; #endif *************** *** 4851,4857 **** for (i = 0; i < nstate; ++i) { prog->state[i].id = i; ! prog->state[i].lastlist = 0; } retval = nfa_regtry(prog, col); --- 4888,4895 ---- for (i = 0; i < nstate; ++i) { prog->state[i].id = i; ! prog->state[i].lastlist[0] = 0; ! prog->state[i].lastlist[1] = 0; } retval = nfa_regtry(prog, col); *** ../vim-7.3.1105/src/version.c 2013-06-02 22:37:39.000000000 +0200 --- src/version.c 2013-06-03 12:13:15.000000000 +0200 *************** *** 730,731 **** --- 730,733 ---- { /* Add new patch number below this line */ + /**/ + 1106, /**/ -- hundred-and-one symptoms of being an internet addict: 59. Your wife says communication is important in a marriage...so you buy another computer and install a second phone line so the two of you can chat. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///