commit 9795a1801eb1a4f3ae6346e32666d3d05f006115 Author: Siddhesh Poyarekar Date: Sun Jun 30 20:45:05 2013 +0530 Fall back to non-cached sequence traversal and comparison strcoll currently falls back to alloca if malloc fails, resulting in a possible stack overflow. This patch implements sequence traversal and comparison without caching indeces and rules. diff --git glibc-2.17-c758a686/string/strcoll_l.c glibc-2.17-c758a686/string/strcoll_l.c index 1bb9e23..1be6874 100644 --- glibc-2.17-c758a686/string/strcoll_l.c +++ glibc-2.17-c758a686/string/strcoll_l.c @@ -55,6 +55,12 @@ typedef struct const USTRING_TYPE *us; /* The string. */ int32_t *idxarr; /* Array to cache weight indeces. */ unsigned char *rulearr; /* Array to cache rules. */ + unsigned char rule; /* Saved rule for the first sequence. */ + int32_t idx; /* Index to weight of the current sequence. */ + int32_t save_idx; /* Save looked up index of a forward + sequence after the last backward + sequence. */ + const USTRING_TYPE *back_us; /* Beginning of the backward sequence. */ } coll_seq; /* Get next sequence. The weight indeces are cached, so we don't need to @@ -227,7 +233,191 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets, seq->us = us; } -/* Compare two sequences. */ +/* Get next sequence. Traverse the string as required. This function does not + set or use any index or rule cache. */ +static void +get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, + const USTRING_TYPE *weights, const int32_t *table, + const USTRING_TYPE *extra, const int32_t *indirect, + int pass) +{ +#include WEIGHT_H + int val = seq->val = 0; + int len = seq->len; + size_t backw_stop = seq->backw_stop; + size_t backw = seq->backw; + size_t idxcnt = seq->idxcnt; + size_t idxmax = seq->idxmax; + int32_t idx = seq->idx; + const USTRING_TYPE *us = seq->us; + + while (len == 0) + { + ++val; + if (backw_stop != ~0ul) + { + /* The is something pushed. */ + if (backw == backw_stop) + { + /* The last pushed character was handled. Continue + with forward characters. */ + if (idxcnt < idxmax) + { + idx = seq->save_idx; + backw_stop = ~0ul; + } + else + { + /* Nothing anymore. The backward sequence ended with + the last sequence in the string. Note that len is + still zero. */ + idx = 0; + break; + } + } + else + { + /* XXX Traverse BACKW sequences from the beginning of + BACKW_STOP to get the next sequence. Is ther a quicker way + to do this? */ + int i = backw_stop; + us = seq->back_us; + while (i < backw) + { + int32_t tmp = findidx (&us, -1); + idx = tmp & 0xffffff; + i++; + } + --backw; + us = seq->us; + } + } + else + { + backw_stop = idxmax; + int32_t prev_idx = idx; + + while (*us != L('\0')) + { + int32_t tmp = findidx (&us, -1); + unsigned char rule = tmp >> 24; + prev_idx = idx; + idx = tmp & 0xffffff; + idxcnt = idxmax++; + + /* Save the rule for the first sequence. */ + if (__glibc_unlikely (idxcnt == 0)) + seq->rule = rule; + + if ((rulesets[rule * nrules + pass] + & sort_backward) == 0) + /* No more backward characters to push. */ + break; + ++idxcnt; + } + + if (backw_stop >= idxcnt) + { + /* No sequence at all or just one. */ + if (idxcnt == idxmax || backw_stop > idxcnt) + /* Note that len is still zero. */ + break; + + backw_stop = ~0ul; + } + else + { + /* We pushed backward sequences. If the stream ended with the + backward sequence, then we process the last sequence we + found. Otherwise we process the sequence before the last + one since the last one was a forward sequence. */ + seq->back_us = seq->us; + seq->us = us; + backw = idxcnt; + if (idxmax > idxcnt) + { + backw--; + seq->save_idx = idx; + idx = prev_idx; + } + if (backw > backw_stop) + backw--; + } + } + + len = weights[idx++]; + /* Skip over indeces of previous levels. */ + for (int i = 0; i < pass; i++) + { + idx += len; + len = weights[idx]; + idx++; + } + } + + /* Update the structure. */ + seq->val = val; + seq->len = len; + seq->backw_stop = backw_stop; + seq->backw = backw; + seq->idxcnt = idxcnt; + seq->idxmax = idxmax; + seq->us = us; + seq->idx = idx; +} + +/* Compare two sequences. This version does not use the index and rules + cache. */ +static int +do_compare_nocache (coll_seq *seq1, coll_seq *seq2, int position, + const USTRING_TYPE *weights) +{ + int seq1len = seq1->len; + int seq2len = seq2->len; + int val1 = seq1->val; + int val2 = seq2->val; + int idx1 = seq1->idx; + int idx2 = seq2->idx; + int result = 0; + + /* Test for position if necessary. */ + if (position && val1 != val2) + { + result = val1 - val2; + goto out; + } + + /* Compare the two sequences. */ + do + { + if (weights[idx1] != weights[idx2]) + { + /* The sequences differ. */ + result = weights[idx1] - weights[idx2]; + goto out; + } + + /* Increment the offsets. */ + ++idx1; + ++idx2; + + --seq1len; + --seq2len; + } + while (seq1len > 0 && seq2len > 0); + + if (position && seq1len != seq2len) + result = seq1len - seq2len; + +out: + seq1->len = seq1len; + seq2->len = seq2len; + seq1->idx = idx1; + seq2->idx = idx2; + return result; +} + +/* Compare two sequences using the index cache. */ static int do_compare (coll_seq *seq1, coll_seq *seq2, int position, const USTRING_TYPE *weights) @@ -334,57 +524,62 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l) memset (&seq1, 0, sizeof (seq1)); seq2 = seq1; - /* We need the elements of the strings as unsigned values since they - are used as indeces. */ - seq1.us = (const USTRING_TYPE *) s1; - seq2.us = (const USTRING_TYPE *) s2; - if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1))) { seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1)); - seq2.idxarr = &seq1.idxarr[s1len]; - seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len]; - seq2.rulearr = &seq1.rulearr[s1len]; - - if (seq1.idxarr == NULL) - /* No memory. Well, go with the stack then. - - XXX Once this implementation is stable we will handle this - differently. Instead of precomputing the indeces we will - do this in time. This means, though, that this happens for - every pass again. */ - goto try_stack; - use_malloc = true; + + /* If we failed to allocate memory, we leave everything as NULL so that + we use the nocache version of traversal and comparison functions. */ + if (seq1.idxarr != NULL) + { + seq2.idxarr = &seq1.idxarr[s1len]; + seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len]; + seq2.rulearr = &seq1.rulearr[s1len]; + use_malloc = true; + } } else { - try_stack: seq1.idxarr = (int32_t *) alloca (s1len * sizeof (int32_t)); seq2.idxarr = (int32_t *) alloca (s2len * sizeof (int32_t)); seq1.rulearr = (unsigned char *) alloca (s1len); seq2.rulearr = (unsigned char *) alloca (s2len); } - seq1.rulearr[0] = 0; + int rule = 0; /* Cache values in the first pass and if needed, use them in subsequent passes. */ for (int pass = 0; pass < nrules; ++pass) { seq1.idxcnt = 0; + seq1.idx = 0; + seq2.idx = 0; seq1.backw_stop = ~0ul; seq1.backw = ~0ul; seq2.idxcnt = 0; seq2.backw_stop = ~0ul; seq2.backw = ~0ul; + /* We need the elements of the strings as unsigned values since they + are used as indeces. */ + seq1.us = (const USTRING_TYPE *) s1; + seq2.us = (const USTRING_TYPE *) s2; + /* We assume that if a rule has defined `position' in one section this is true for all of them. */ - int position = rulesets[seq1.rulearr[0] * nrules + pass] & sort_position; + int position = rulesets[rule * nrules + pass] & sort_position; while (1) { - if (pass == 0) + if (__glibc_unlikely (seq1.idxarr == NULL)) + { + get_next_seq_nocache (&seq1, nrules, rulesets, weights, table, + extra, indirect, pass); + get_next_seq_nocache (&seq2, nrules, rulesets, weights, table, + extra, indirect, pass); + } + else if (pass == 0) { get_next_seq (&seq1, nrules, rulesets, weights, table, extra, indirect); @@ -411,10 +606,18 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l) goto free_and_return; } - result = do_compare (&seq1, &seq2, position, weights); + if (__glibc_unlikely (seq1.idxarr == NULL)) + result = do_compare_nocache (&seq1, &seq2, position, weights); + else + result = do_compare (&seq1, &seq2, position, weights); if (result != 0) goto free_and_return; } + + if (__glibc_likely (seq1.rulearr != NULL)) + rule = seq1.rulearr[0]; + else + rule = seq1.rule; } /* Free the memory if needed. */