diff --git a/src/dfa.c b/src/dfa.c index b9b7103..cd11db6 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -3894,13 +3894,13 @@ struct must }; static must * -allocmust (must *mp) +allocmust (must *mp, size_t size) { must *new_mp = xmalloc (sizeof *new_mp); new_mp->in = xzalloc (sizeof *new_mp->in); - new_mp->left = xzalloc (2); - new_mp->right = xzalloc (2); - new_mp->is = xzalloc (2); + new_mp->left = xzalloc (size); + new_mp->right = xzalloc (size); + new_mp->is = xzalloc (size); new_mp->begline = false; new_mp->endline = false; new_mp->prev = mp; @@ -3933,24 +3933,23 @@ dfamust (struct dfa *d) { must *mp = NULL; char const *result = ""; - size_t ri; size_t i; bool exact = false; bool begline = false; bool endline = false; struct dfamust *dm; - for (ri = 0; ri < d->tindex; ++ri) + for (size_t ri = 0; ri < d->tindex; ++ri) { token t = d->tokens[ri]; switch (t) { case BEGLINE: - mp = allocmust (mp); + mp = allocmust (mp, 2); mp->begline = true; break; case ENDLINE: - mp = allocmust (mp); + mp = allocmust (mp, 2); mp->endline = true; break; case LPAREN: @@ -3965,7 +3964,7 @@ dfamust (struct dfa *d) case BACKREF: case ANYCHAR: case MBCSET: - mp = allocmust (mp); + mp = allocmust (mp, 2); break; case STAR: @@ -4082,7 +4081,6 @@ dfamust (struct dfa *d) goto done; default: - mp = allocmust (mp); if (CSET <= t) { /* If T is a singleton, or if case-folding in a unibyte @@ -4095,7 +4093,10 @@ dfamust (struct dfa *d) if (tstbit (j, *ccl)) break; if (! (j < NOTCHAR)) - break; + { + mp = allocmust (mp, 2); + break; + } t = j; while (++j < NOTCHAR) if (tstbit (j, *ccl) @@ -4103,12 +4104,36 @@ dfamust (struct dfa *d) && toupper (j) == toupper (t))) break; if (j < NOTCHAR) - break; + { + mp = allocmust (mp, 2); + break; + } } + + size_t rj = ri + 2; + if (d->tokens[ri + 1] == CAT) + { + for (; rj < d->tindex - 1; rj += 2) + { + if ((rj != ri && (d->tokens[rj] <= 0 + || NOTCHAR <= d->tokens[rj])) + || d->tokens[rj + 1] != CAT) + break; + } + } + mp = allocmust (mp, ((rj - ri) >> 1) + 1); mp->is[0] = mp->left[0] = mp->right[0] = case_fold && !d->multibyte ? toupper (t) : t; - mp->is[1] = mp->left[1] = mp->right[1] = '\0'; - mp->in = enlist (mp->in, mp->is, 1); + + for (i = 1; ri + 2 < rj; i++) + { + ri += 2; + t = d->tokens[ri]; + mp->is[i] = mp->left[i] = mp->right[i] + = case_fold && MB_CUR_MAX == 1 ? toupper (t) : t; + } + mp->is[i] = mp->left[i] = mp->right[i] = '\0'; + mp->in = enlist (mp->in, mp->is, i - 1); break; } }