| To: vim_dev@googlegroups.com |
| Subject: Patch 7.3.1106 |
| Fcc: outbox |
| From: Bram Moolenaar <Bram@moolenaar.net> |
| Mime-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| |
| Patch 7.3.1106 |
| Problem: New regexp engine: saving and restoring lastlist in the states |
| takes a lot of time. |
| Solution: Use a second lastlist value for the first recursive call. |
| Files: src/regexp.h, src/regexp_nfa.c |
| |
| |
| |
| |
| |
| *** 72,78 **** |
| nfa_state_T *out; |
| nfa_state_T *out1; |
| int id; |
| ! int lastlist; |
| int negated; |
| int val; |
| }; |
| --- 72,78 ---- |
| nfa_state_T *out; |
| nfa_state_T *out1; |
| int id; |
| ! int lastlist[2]; /* 0: normal, 1: recursive */ |
| int negated; |
| int val; |
| }; |
| |
| |
| |
| *** 255,260 **** |
| --- 255,269 ---- |
| /* If not NULL match must end at this position */ |
| static save_se_T *nfa_endp = NULL; |
| |
| + /* listid is global, so that it increases on recursive calls to |
| + * nfa_regmatch(), which means we don't have to clear the lastlist field of |
| + * all the states. */ |
| + static int nfa_listid; |
| + static int nfa_alt_listid; |
| + |
| + /* 0 for first call to nfa_regmatch(), 1 for recursive call. */ |
| + static int nfa_ll_index = 0; |
| + |
| static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags)); |
| static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); |
| static int nfa_emit_equi_class __ARGS((int c, int neg)); |
| |
| *** 2169,2175 **** |
| s->out1 = out1; |
| |
| s->id = istate; |
| ! s->lastlist = 0; |
| s->negated = FALSE; |
| |
| return s; |
| --- 2178,2185 ---- |
| s->out1 = out1; |
| |
| s->id = istate; |
| ! s->lastlist[0] = 0; |
| ! s->lastlist[1] = 0; |
| s->negated = FALSE; |
| |
| return s; |
| |
| *** 3113,3121 **** |
| #endif |
| /* These nodes do not need to be added, but we need to bail out |
| * when it was tried to be added to this list before. */ |
| ! if (state->lastlist == l->id) |
| goto skip_add; |
| ! state->lastlist = l->id; |
| break; |
| |
| case NFA_BOL: |
| --- 3123,3131 ---- |
| #endif |
| /* These nodes do not need to be added, but we need to bail out |
| * when it was tried to be added to this list before. */ |
| ! if (state->lastlist[nfa_ll_index] == l->id) |
| goto skip_add; |
| ! state->lastlist[nfa_ll_index] = l->id; |
| break; |
| |
| case NFA_BOL: |
| |
| *** 3131,3137 **** |
| /* FALLTHROUGH */ |
| |
| default: |
| ! if (state->lastlist == l->id) |
| { |
| /* This state is already in the list, don't add it again, |
| * unless it is an MOPEN that is used for a backreference. */ |
| --- 3141,3147 ---- |
| /* FALLTHROUGH */ |
| |
| default: |
| ! if (state->lastlist[nfa_ll_index] == l->id) |
| { |
| /* This state is already in the list, don't add it again, |
| * unless it is an MOPEN that is used for a backreference. */ |
| |
| *** 3173,3179 **** |
| } |
| |
| /* add the state to the list */ |
| ! state->lastlist = l->id; |
| thread = &l->t[l->n++]; |
| thread->state = state; |
| copy_sub(&thread->subs.norm, &subs->norm); |
| --- 3183,3189 ---- |
| } |
| |
| /* add the state to the list */ |
| ! state->lastlist[nfa_ll_index] = l->id; |
| thread = &l->t[l->n++]; |
| thread->state = state; |
| copy_sub(&thread->subs.norm, &subs->norm); |
| |
| *** 3616,3621 **** |
| --- 3626,3632 ---- |
| /* |
| * Save list IDs for all NFA states of "prog" into "list". |
| * Also reset the IDs to zero. |
| + * Only used for the recursive value lastlist[1]. |
| */ |
| static void |
| nfa_save_listids(prog, list) |
| |
| *** 3629,3636 **** |
| p = &prog->state[0]; |
| for (i = prog->nstate; --i >= 0; ) |
| { |
| ! list[i] = p->lastlist; |
| ! p->lastlist = 0; |
| ++p; |
| } |
| } |
| --- 3640,3647 ---- |
| p = &prog->state[0]; |
| for (i = prog->nstate; --i >= 0; ) |
| { |
| ! list[i] = p->lastlist[1]; |
| ! p->lastlist[1] = 0; |
| ++p; |
| } |
| } |
| |
| *** 3649,3655 **** |
| p = &prog->state[0]; |
| for (i = prog->nstate; --i >= 0; ) |
| { |
| ! p->lastlist = list[i]; |
| ++p; |
| } |
| } |
| --- 3660,3666 ---- |
| p = &prog->state[0]; |
| for (i = prog->nstate; --i >= 0; ) |
| { |
| ! p->lastlist[1] = list[i]; |
| ++p; |
| } |
| } |
| |
| *** 3683,3692 **** |
| --- 3694,3705 ---- |
| char_u *save_regline = regline; |
| int save_reglnum = reglnum; |
| int save_nfa_match = nfa_match; |
| + int save_nfa_listid = nfa_listid; |
| save_se_T *save_nfa_endp = nfa_endp; |
| save_se_T endpos; |
| save_se_T *endposp = NULL; |
| int result; |
| + int need_restore = FALSE; |
| |
| if (state->c == NFA_START_INVISIBLE_BEFORE) |
| { |
| |
| *** 3745,3774 **** |
| } |
| } |
| |
| - /* Call nfa_regmatch() to check if the current concat matches |
| - * at this position. The concat ends with the node |
| - * NFA_END_INVISIBLE */ |
| - if (*listids == NULL) |
| - { |
| - *listids = (int *)lalloc(sizeof(int) * nstate, TRUE); |
| - if (*listids == NULL) |
| - { |
| - EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!")); |
| - return 0; |
| - } |
| - } |
| #ifdef ENABLE_LOG |
| if (log_fd != stderr) |
| fclose(log_fd); |
| log_fd = NULL; |
| #endif |
| ! /* Have to clear the listid field of the NFA nodes, so that |
| ! * nfa_regmatch() and addstate() can run properly after |
| ! * recursion. */ |
| ! nfa_save_listids(prog, *listids); |
| nfa_endp = endposp; |
| result = nfa_regmatch(prog, state->out, submatch, m); |
| ! nfa_restore_listids(prog, *listids); |
| |
| /* restore position in input text */ |
| reginput = save_reginput; |
| --- 3758,3809 ---- |
| } |
| } |
| |
| #ifdef ENABLE_LOG |
| if (log_fd != stderr) |
| fclose(log_fd); |
| log_fd = NULL; |
| #endif |
| ! /* Have to clear the lastlist field of the NFA nodes, so that |
| ! * nfa_regmatch() and addstate() can run properly after recursion. */ |
| ! if (nfa_ll_index == 1) |
| ! { |
| ! /* Already calling nfa_regmatch() recursively. Save the lastlist[1] |
| ! * values and clear them. */ |
| ! if (*listids == NULL) |
| ! { |
| ! *listids = (int *)lalloc(sizeof(int) * nstate, TRUE); |
| ! if (*listids == NULL) |
| ! { |
| ! EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!")); |
| ! return 0; |
| ! } |
| ! } |
| ! nfa_save_listids(prog, *listids); |
| ! need_restore = TRUE; |
| ! /* any value of nfa_listid will do */ |
| ! } |
| ! else |
| ! { |
| ! /* First recursive nfa_regmatch() call, switch to the second lastlist |
| ! * entry. Make sure nfa_listid is different from a previous recursive |
| ! * call, because some states may still have this ID. */ |
| ! ++nfa_ll_index; |
| ! if (nfa_listid <= nfa_alt_listid) |
| ! nfa_listid = nfa_alt_listid; |
| ! } |
| ! |
| ! /* Call nfa_regmatch() to check if the current concat matches at this |
| ! * position. The concat ends with the node NFA_END_INVISIBLE */ |
| nfa_endp = endposp; |
| result = nfa_regmatch(prog, state->out, submatch, m); |
| ! |
| ! if (need_restore) |
| ! nfa_restore_listids(prog, *listids); |
| ! else |
| ! { |
| ! --nfa_ll_index; |
| ! nfa_alt_listid = nfa_listid; |
| ! } |
| |
| /* restore position in input text */ |
| reginput = save_reginput; |
| |
| *** 3776,3781 **** |
| --- 3811,3817 ---- |
| reglnum = save_reglnum; |
| nfa_match = save_nfa_match; |
| nfa_endp = save_nfa_endp; |
| + nfa_listid = save_nfa_listid; |
| |
| #ifdef ENABLE_LOG |
| log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); |
| |
| *** 3821,3827 **** |
| nfa_list_T list[3]; |
| nfa_list_T *listtbl[2][2]; |
| nfa_list_T *ll; |
| - int listid = 1; |
| int listidx; |
| nfa_list_T *thislist; |
| nfa_list_T *nextlist; |
| --- 3857,3862 ---- |
| |
| *** 3875,3881 **** |
| #ifdef ENABLE_LOG |
| fprintf(log_fd, "(---) STARTSTATE\n"); |
| #endif |
| ! thislist->id = listid; |
| addstate(thislist, start, m, 0); |
| |
| /* There are two cases when the NFA advances: 1. input char matches the |
| --- 3910,3916 ---- |
| #ifdef ENABLE_LOG |
| fprintf(log_fd, "(---) STARTSTATE\n"); |
| #endif |
| ! thislist->id = nfa_listid + 1; |
| addstate(thislist, start, m, 0); |
| |
| /* There are two cases when the NFA advances: 1. input char matches the |
| |
| *** 3923,3932 **** |
| nextlist = &list[flag ^= 1]; |
| nextlist->n = 0; /* clear nextlist */ |
| listtbl[1][0] = nextlist; |
| ! ++listid; |
| ! thislist->id = listid; |
| ! nextlist->id = listid + 1; |
| ! neglist->id = listid + 1; |
| |
| #ifdef ENABLE_LOG |
| fprintf(log_fd, "------------------------------------------\n"); |
| --- 3958,3967 ---- |
| nextlist = &list[flag ^= 1]; |
| nextlist->n = 0; /* clear nextlist */ |
| listtbl[1][0] = nextlist; |
| ! ++nfa_listid; |
| ! thislist->id = nfa_listid; |
| ! nextlist->id = nfa_listid + 1; |
| ! neglist->id = nfa_listid + 1; |
| |
| #ifdef ENABLE_LOG |
| fprintf(log_fd, "------------------------------------------\n"); |
| |
| *** 4843,4848 **** |
| --- 4878,4885 ---- |
| nfa_has_zend = prog->has_zend; |
| nfa_has_backref = prog->has_backref; |
| nfa_nsubexpr = prog->nsubexp; |
| + nfa_listid = 1; |
| + nfa_alt_listid = 2; |
| #ifdef DEBUG |
| nfa_regengine.expr = prog->pattern; |
| #endif |
| |
| *** 4851,4857 **** |
| for (i = 0; i < nstate; ++i) |
| { |
| prog->state[i].id = i; |
| ! prog->state[i].lastlist = 0; |
| } |
| |
| retval = nfa_regtry(prog, col); |
| --- 4888,4895 ---- |
| for (i = 0; i < nstate; ++i) |
| { |
| prog->state[i].id = i; |
| ! prog->state[i].lastlist[0] = 0; |
| ! prog->state[i].lastlist[1] = 0; |
| } |
| |
| retval = nfa_regtry(prog, col); |
| |
| |
| |
| *** 730,731 **** |
| --- 730,733 ---- |
| { /* Add new patch number below this line */ |
| + /**/ |
| + 1106, |
| /**/ |
| |
| -- |
| hundred-and-one symptoms of being an internet addict: |
| 59. Your wife says communication is important in a marriage...so you buy |
| another computer and install a second phone line so the two of you can |
| chat. |
| |
| /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ |
| /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ |
| \\\ an exciting new programming language -- http://www.Zimbu.org /// |
| \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |