| To: vim_dev@googlegroups.com |
| Subject: Patch 7.3.1105 |
| Fcc: outbox |
| From: Bram Moolenaar <Bram@moolenaar.net> |
| Mime-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| |
| Patch 7.3.1105 |
| Problem: New regexp engine: too much code in one function. Dead code. |
| Solution: Move the recursive nfa_regmatch call to a separate function. |
| Remove the dead code. |
| Files: src/regexp_nfa.c |
| |
| |
| |
| |
| |
| *** 3665,3673 **** |
| --- 3665,3802 ---- |
| return val == pos; |
| } |
| |
| + static int recursive_regmatch __ARGS((nfa_state_T *state, nfa_regprog_T *prog, regsubs_T *submatch, regsubs_T *m, int **listids)); |
| static int nfa_regmatch __ARGS((nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *submatch, regsubs_T *m)); |
| |
| /* |
| + * Recursively call nfa_regmatch() |
| + */ |
| + static int |
| + recursive_regmatch(state, prog, submatch, m, listids) |
| + nfa_state_T *state; |
| + nfa_regprog_T *prog; |
| + regsubs_T *submatch; |
| + regsubs_T *m; |
| + int **listids; |
| + { |
| + char_u *save_reginput = reginput; |
| + char_u *save_regline = regline; |
| + int save_reglnum = reglnum; |
| + int save_nfa_match = nfa_match; |
| + save_se_T *save_nfa_endp = nfa_endp; |
| + save_se_T endpos; |
| + save_se_T *endposp = NULL; |
| + int result; |
| + |
| + if (state->c == NFA_START_INVISIBLE_BEFORE) |
| + { |
| + /* The recursive match must end at the current position. */ |
| + endposp = &endpos; |
| + if (REG_MULTI) |
| + { |
| + endpos.se_u.pos.col = (int)(reginput - regline); |
| + endpos.se_u.pos.lnum = reglnum; |
| + } |
| + else |
| + endpos.se_u.ptr = reginput; |
| + |
| + /* Go back the specified number of bytes, or as far as the |
| + * start of the previous line, to try matching "\@<=" or |
| + * not matching "\@<!". |
| + * TODO: This is very inefficient! Would be better to |
| + * first check for a match with what follows. */ |
| + if (state->val <= 0) |
| + { |
| + if (REG_MULTI) |
| + { |
| + regline = reg_getline(--reglnum); |
| + if (regline == NULL) |
| + /* can't go before the first line */ |
| + regline = reg_getline(++reglnum); |
| + } |
| + reginput = regline; |
| + } |
| + else |
| + { |
| + if (REG_MULTI && (int)(reginput - regline) < state->val) |
| + { |
| + /* Not enough bytes in this line, go to end of |
| + * previous line. */ |
| + regline = reg_getline(--reglnum); |
| + if (regline == NULL) |
| + { |
| + /* can't go before the first line */ |
| + regline = reg_getline(++reglnum); |
| + reginput = regline; |
| + } |
| + else |
| + reginput = regline + STRLEN(regline); |
| + } |
| + if ((int)(reginput - regline) >= state->val) |
| + { |
| + reginput -= state->val; |
| + #ifdef FEAT_MBYTE |
| + if (has_mbyte) |
| + reginput -= mb_head_off(regline, reginput); |
| + #endif |
| + } |
| + else |
| + reginput = regline; |
| + } |
| + } |
| + |
| + /* Call nfa_regmatch() to check if the current concat matches |
| + * at this position. The concat ends with the node |
| + * NFA_END_INVISIBLE */ |
| + if (*listids == NULL) |
| + { |
| + *listids = (int *)lalloc(sizeof(int) * nstate, TRUE); |
| + if (*listids == NULL) |
| + { |
| + EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!")); |
| + return 0; |
| + } |
| + } |
| + #ifdef ENABLE_LOG |
| + if (log_fd != stderr) |
| + fclose(log_fd); |
| + log_fd = NULL; |
| + #endif |
| + /* Have to clear the listid field of the NFA nodes, so that |
| + * nfa_regmatch() and addstate() can run properly after |
| + * recursion. */ |
| + nfa_save_listids(prog, *listids); |
| + nfa_endp = endposp; |
| + result = nfa_regmatch(prog, state->out, submatch, m); |
| + nfa_restore_listids(prog, *listids); |
| + |
| + /* restore position in input text */ |
| + reginput = save_reginput; |
| + regline = save_regline; |
| + reglnum = save_reglnum; |
| + nfa_match = save_nfa_match; |
| + nfa_endp = save_nfa_endp; |
| + |
| + #ifdef ENABLE_LOG |
| + log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); |
| + if (log_fd != NULL) |
| + { |
| + fprintf(log_fd, "****************************\n"); |
| + fprintf(log_fd, "FINISHED RUNNING nfa_regmatch() recursively\n"); |
| + fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE"); |
| + fprintf(log_fd, "****************************\n"); |
| + } |
| + else |
| + { |
| + EMSG(_("Could not open temporary log file for writing, displaying on stderr ... ")); |
| + log_fd = stderr; |
| + } |
| + #endif |
| + |
| + return result; |
| + } |
| + |
| + /* |
| * Main matching routine. |
| * |
| * Run NFA to determine whether it matches reginput. |
| |
| *** 3881,4051 **** |
| } |
| |
| case NFA_END_INVISIBLE: |
| ! /* This is only encountered after a NFA_START_INVISIBLE or |
| * NFA_START_INVISIBLE_BEFORE node. |
| * They surround a zero-width group, used with "\@=", "\&", |
| * "\@!", "\@<=" and "\@<!". |
| * If we got here, it means that the current "invisible" group |
| * finished successfully, so return control to the parent |
| ! * nfa_regmatch(). Submatches are stored in *m, and used in |
| ! * the parent call. */ |
| ! if (start->c == NFA_MOPEN) |
| ! /* TODO: do we ever get here? */ |
| ! addstate_here(thislist, t->state->out, &t->subs, &listidx); |
| ! else |
| ! { |
| #ifdef ENABLE_LOG |
| ! if (nfa_endp != NULL) |
| ! { |
| ! if (REG_MULTI) |
| ! fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n", |
| ! (int)reglnum, |
| ! (int)nfa_endp->se_u.pos.lnum, |
| ! (int)(reginput - regline), |
| ! nfa_endp->se_u.pos.col); |
| ! else |
| ! fprintf(log_fd, "Current col: %d, endp col: %d\n", |
| ! (int)(reginput - regline), |
| ! (int)(nfa_endp->se_u.ptr - reginput)); |
| ! } |
| ! #endif |
| ! /* It's only a match if it ends at "nfa_endp" */ |
| ! if (nfa_endp != NULL && (REG_MULTI |
| ! ? (reglnum != nfa_endp->se_u.pos.lnum |
| ! || (int)(reginput - regline) |
| ! != nfa_endp->se_u.pos.col) |
| ! : reginput != nfa_endp->se_u.ptr)) |
| ! break; |
| ! |
| ! /* do not set submatches for \@! */ |
| ! if (!t->state->negated) |
| ! { |
| ! copy_sub(&m->norm, &t->subs.norm); |
| ! #ifdef FEAT_SYN_HL |
| ! if (nfa_has_zsubexpr) |
| ! copy_sub(&m->synt, &t->subs.synt); |
| ! #endif |
| ! } |
| ! nfa_match = TRUE; |
| ! } |
| ! break; |
| ! |
| ! case NFA_START_INVISIBLE: |
| ! case NFA_START_INVISIBLE_BEFORE: |
| ! { |
| ! char_u *save_reginput = reginput; |
| ! char_u *save_regline = regline; |
| ! int save_reglnum = reglnum; |
| ! int save_nfa_match = nfa_match; |
| ! save_se_T *save_nfa_endp = nfa_endp; |
| ! save_se_T endpos; |
| ! save_se_T *endposp = NULL; |
| ! |
| ! if (t->state->c == NFA_START_INVISIBLE_BEFORE) |
| { |
| - /* The recursive match must end at the current position. */ |
| - endposp = &endpos; |
| if (REG_MULTI) |
| ! { |
| ! endpos.se_u.pos.col = (int)(reginput - regline); |
| ! endpos.se_u.pos.lnum = reglnum; |
| ! } |
| else |
| ! endpos.se_u.ptr = reginput; |
| ! |
| ! /* Go back the specified number of bytes, or as far as the |
| ! * start of the previous line, to try matching "\@<=" or |
| ! * not matching "\@<!". |
| ! * TODO: This is very inefficient! Would be better to |
| ! * first check for a match with what follows. */ |
| ! if (t->state->val <= 0) |
| ! { |
| ! if (REG_MULTI) |
| ! { |
| ! regline = reg_getline(--reglnum); |
| ! if (regline == NULL) |
| ! /* can't go before the first line */ |
| ! regline = reg_getline(++reglnum); |
| ! } |
| ! reginput = regline; |
| ! } |
| ! else |
| ! { |
| ! if (REG_MULTI |
| ! && (int)(reginput - regline) < t->state->val) |
| ! { |
| ! /* Not enough bytes in this line, go to end of |
| ! * previous line. */ |
| ! regline = reg_getline(--reglnum); |
| ! if (regline == NULL) |
| ! { |
| ! /* can't go before the first line */ |
| ! regline = reg_getline(++reglnum); |
| ! reginput = regline; |
| ! } |
| ! else |
| ! reginput = regline + STRLEN(regline); |
| ! } |
| ! if ((int)(reginput - regline) >= t->state->val) |
| ! { |
| ! reginput -= t->state->val; |
| ! #ifdef FEAT_MBYTE |
| ! if (has_mbyte) |
| ! reginput -= mb_head_off(regline, reginput); |
| ! #endif |
| ! } |
| ! else |
| ! reginput = regline; |
| ! } |
| } |
| |
| ! /* Call nfa_regmatch() to check if the current concat matches |
| ! * at this position. The concat ends with the node |
| ! * NFA_END_INVISIBLE */ |
| ! if (listids == NULL) |
| { |
| ! listids = (int *)lalloc(sizeof(int) * nstate, TRUE); |
| ! if (listids == NULL) |
| ! { |
| ! EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!")); |
| ! return 0; |
| ! } |
| } |
| ! #ifdef ENABLE_LOG |
| ! if (log_fd != stderr) |
| ! fclose(log_fd); |
| ! log_fd = NULL; |
| ! #endif |
| ! /* Have to clear the listid field of the NFA nodes, so that |
| ! * nfa_regmatch() and addstate() can run properly after |
| ! * recursion. */ |
| ! nfa_save_listids(prog, listids); |
| ! nfa_endp = endposp; |
| ! result = nfa_regmatch(prog, t->state->out, submatch, m); |
| ! nfa_restore_listids(prog, listids); |
| ! |
| ! /* restore position in input text */ |
| ! reginput = save_reginput; |
| ! regline = save_regline; |
| ! reglnum = save_reglnum; |
| ! nfa_match = save_nfa_match; |
| ! nfa_endp = save_nfa_endp; |
| |
| - #ifdef ENABLE_LOG |
| - log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); |
| - if (log_fd != NULL) |
| - { |
| - fprintf(log_fd, "****************************\n"); |
| - fprintf(log_fd, "FINISHED RUNNING nfa_regmatch() recursively\n"); |
| - fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE"); |
| - fprintf(log_fd, "****************************\n"); |
| - } |
| - else |
| - { |
| - EMSG(_("Could not open temporary log file for writing, displaying on stderr ... ")); |
| - log_fd = stderr; |
| - } |
| - #endif |
| /* for \@! it is a match when result is FALSE */ |
| if (result != t->state->negated) |
| { |
| --- 4010,4066 ---- |
| } |
| |
| case NFA_END_INVISIBLE: |
| ! /* |
| ! * This is only encountered after a NFA_START_INVISIBLE or |
| * NFA_START_INVISIBLE_BEFORE node. |
| * They surround a zero-width group, used with "\@=", "\&", |
| * "\@!", "\@<=" and "\@<!". |
| * If we got here, it means that the current "invisible" group |
| * finished successfully, so return control to the parent |
| ! * nfa_regmatch(). For a look-behind match only when it ends |
| ! * in the position in "nfa_endp". |
| ! * Submatches are stored in *m, and used in the parent call. |
| ! */ |
| #ifdef ENABLE_LOG |
| ! if (nfa_endp != NULL) |
| { |
| if (REG_MULTI) |
| ! fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n", |
| ! (int)reglnum, |
| ! (int)nfa_endp->se_u.pos.lnum, |
| ! (int)(reginput - regline), |
| ! nfa_endp->se_u.pos.col); |
| else |
| ! fprintf(log_fd, "Current col: %d, endp col: %d\n", |
| ! (int)(reginput - regline), |
| ! (int)(nfa_endp->se_u.ptr - reginput)); |
| } |
| + #endif |
| + /* It's only a match if it ends at "nfa_endp" */ |
| + if (nfa_endp != NULL && (REG_MULTI |
| + ? (reglnum != nfa_endp->se_u.pos.lnum |
| + || (int)(reginput - regline) |
| + != nfa_endp->se_u.pos.col) |
| + : reginput != nfa_endp->se_u.ptr)) |
| + break; |
| |
| ! /* do not set submatches for \@! */ |
| ! if (!t->state->negated) |
| { |
| ! copy_sub(&m->norm, &t->subs.norm); |
| ! #ifdef FEAT_SYN_HL |
| ! if (nfa_has_zsubexpr) |
| ! copy_sub(&m->synt, &t->subs.synt); |
| ! #endif |
| } |
| ! nfa_match = TRUE; |
| ! break; |
| ! |
| ! case NFA_START_INVISIBLE: |
| ! case NFA_START_INVISIBLE_BEFORE: |
| ! result = recursive_regmatch(t->state, prog, submatch, m, |
| ! &listids); |
| |
| /* for \@! it is a match when result is FALSE */ |
| if (result != t->state->negated) |
| { |
| |
| *** 4056,4067 **** |
| #endif |
| |
| /* t->state->out1 is the corresponding END_INVISIBLE node; |
| ! * Add it to the current list (zero-width match). */ |
| addstate_here(thislist, t->state->out1->out, &t->subs, |
| &listidx); |
| } |
| break; |
| - } |
| |
| case NFA_BOL: |
| if (reginput == regline) |
| --- 4071,4081 ---- |
| #endif |
| |
| /* t->state->out1 is the corresponding END_INVISIBLE node; |
| ! * Add its out to the current list (zero-width match). */ |
| addstate_here(thislist, t->state->out1->out, &t->subs, |
| &listidx); |
| } |
| break; |
| |
| case NFA_BOL: |
| if (reginput == regline) |
| |
| |
| |
| *** 730,731 **** |
| --- 730,733 ---- |
| { /* Add new patch number below this line */ |
| + /**/ |
| + 1105, |
| /**/ |
| |
| -- |
| "I can't complain, but sometimes I still do." (Joe Walsh) |
| |
| /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ |
| /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ |
| \\\ an exciting new programming language -- http://www.Zimbu.org /// |
| \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |