Tree - rpms/glibc - CentOS Git server

rpms / glibc

Blame SOURCES/glibc-rh989862-2.patch

Blob History Raw

		ce426f	`commit 9795a1801eb1a4f3ae6346e32666d3d05f006115`
		ce426f	`Author: Siddhesh Poyarekar <siddhesh@redhat.com>`
		ce426f	`Date: Sun Jun 30 20:45:05 2013 +0530`
		ce426f
		ce426f	`Fall back to non-cached sequence traversal and comparison`
		ce426f
		ce426f	`strcoll currently falls back to alloca if malloc fails, resulting in a`
		ce426f	`possible stack overflow. This patch implements sequence traversal and`
		ce426f	`comparison without caching indeces and rules.`
		ce426f
		ce426f	`diff --git glibc-2.17-c758a686/string/strcoll_l.c glibc-2.17-c758a686/string/strcoll_l.c`
		ce426f	`index 1bb9e23..1be6874 100644`
		ce426f	`--- glibc-2.17-c758a686/string/strcoll_l.c`
		ce426f	`+++ glibc-2.17-c758a686/string/strcoll_l.c`
		ce426f	`@@ -55,6 +55,12 @@ typedef struct`
		ce426f	`const USTRING_TYPE us; / The string. */`
		ce426f	`int32_t idxarr; / Array to cache weight indeces. */`
		ce426f	`unsigned char rulearr; / Array to cache rules. */`
		ce426f	`+ unsigned char rule; /* Saved rule for the first sequence. */`
		ce426f	`+ int32_t idx; /* Index to weight of the current sequence. */`
		ce426f	`+ int32_t save_idx; /* Save looked up index of a forward`
		ce426f	`+ sequence after the last backward`
		ce426f	`+ sequence. */`
		ce426f	`+ const USTRING_TYPE back_us; / Beginning of the backward sequence. */`
		ce426f	`} coll_seq;`
		ce426f
		ce426f	`/* Get next sequence. The weight indeces are cached, so we don't need to`
		ce426f	`@@ -227,7 +233,191 @@ get_next_seq (coll_seq seq, int nrules, const unsigned char rulesets,`
		ce426f	`seq->us = us;`
		ce426f	`}`
		ce426f
		ce426f	`-/* Compare two sequences. */`
		ce426f	`+/* Get next sequence. Traverse the string as required. This function does not`
		ce426f	`+ set or use any index or rule cache. */`
		ce426f	`+static void`
		ce426f	`+get_next_seq_nocache (coll_seq seq, int nrules, const unsigned char rulesets,`
		ce426f	`+ const USTRING_TYPE weights, const int32_t table,`
		ce426f	`+ const USTRING_TYPE extra, const int32_t indirect,`
		ce426f	`+ int pass)`
		ce426f	`+{`
		ce426f	`+#include WEIGHT_H`
		ce426f	`+ int val = seq->val = 0;`
		ce426f	`+ int len = seq->len;`
		ce426f	`+ size_t backw_stop = seq->backw_stop;`
		ce426f	`+ size_t backw = seq->backw;`
		ce426f	`+ size_t idxcnt = seq->idxcnt;`
		ce426f	`+ size_t idxmax = seq->idxmax;`
		ce426f	`+ int32_t idx = seq->idx;`
		ce426f	`+ const USTRING_TYPE *us = seq->us;`
		ce426f	`+`
		ce426f	`+ while (len == 0)`
		ce426f	`+ {`
		ce426f	`+ ++val;`
		ce426f	`+ if (backw_stop != ~0ul)`
		ce426f	`+ {`
		ce426f	`+ /* The is something pushed. */`
		ce426f	`+ if (backw == backw_stop)`
		ce426f	`+ {`
		ce426f	`+ /* The last pushed character was handled. Continue`
		ce426f	`+ with forward characters. */`
		ce426f	`+ if (idxcnt < idxmax)`
		ce426f	`+ {`
		ce426f	`+ idx = seq->save_idx;`
		ce426f	`+ backw_stop = ~0ul;`
		ce426f	`+ }`
		ce426f	`+ else`
		ce426f	`+ {`
		ce426f	`+ /* Nothing anymore. The backward sequence ended with`
		ce426f	`+ the last sequence in the string. Note that len is`
		ce426f	`+ still zero. */`
		ce426f	`+ idx = 0;`
		ce426f	`+ break;`
		ce426f	`+ }`
		ce426f	`+ }`
		ce426f	`+ else`
		ce426f	`+ {`
		ce426f	`+ /* XXX Traverse BACKW sequences from the beginning of`
		ce426f	`+ BACKW_STOP to get the next sequence. Is ther a quicker way`
		ce426f	`+ to do this? */`
		ce426f	`+ int i = backw_stop;`
		ce426f	`+ us = seq->back_us;`
		ce426f	`+ while (i < backw)`
		ce426f	`+ {`
		ce426f	`+ int32_t tmp = findidx (&us, -1);`
		ce426f	`+ idx = tmp & 0xffffff;`
		ce426f	`+ i++;`
		ce426f	`+ }`
		ce426f	`+ --backw;`
		ce426f	`+ us = seq->us;`
		ce426f	`+ }`
		ce426f	`+ }`
		ce426f	`+ else`
		ce426f	`+ {`
		ce426f	`+ backw_stop = idxmax;`
		ce426f	`+ int32_t prev_idx = idx;`
		ce426f	`+`
		ce426f	`+ while (*us != L('\0'))`
		ce426f	`+ {`
		ce426f	`+ int32_t tmp = findidx (&us, -1);`
		ce426f	`+ unsigned char rule = tmp >> 24;`
		ce426f	`+ prev_idx = idx;`
		ce426f	`+ idx = tmp & 0xffffff;`
		ce426f	`+ idxcnt = idxmax++;`
		ce426f	`+`
		ce426f	`+ /* Save the rule for the first sequence. */`
		ce426f	`+ if (__glibc_unlikely (idxcnt == 0))`
		ce426f	`+ seq->rule = rule;`
		ce426f	`+`
		ce426f	`+ if ((rulesets[rule * nrules + pass]`
		ce426f	`+ & sort_backward) == 0)`
		ce426f	`+ /* No more backward characters to push. */`
		ce426f	`+ break;`
		ce426f	`+ ++idxcnt;`
		ce426f	`+ }`
		ce426f	`+`
		ce426f	`+ if (backw_stop >= idxcnt)`
		ce426f	`+ {`
		ce426f	`+ /* No sequence at all or just one. */`
		ce426f	`+ if (idxcnt == idxmax \|\| backw_stop > idxcnt)`
		ce426f	`+ /* Note that len is still zero. */`
		ce426f	`+ break;`
		ce426f	`+`
		ce426f	`+ backw_stop = ~0ul;`
		ce426f	`+ }`
		ce426f	`+ else`
		ce426f	`+ {`
		ce426f	`+ /* We pushed backward sequences. If the stream ended with the`
		ce426f	`+ backward sequence, then we process the last sequence we`
		ce426f	`+ found. Otherwise we process the sequence before the last`
		ce426f	`+ one since the last one was a forward sequence. */`
		ce426f	`+ seq->back_us = seq->us;`
		ce426f	`+ seq->us = us;`
		ce426f	`+ backw = idxcnt;`
		ce426f	`+ if (idxmax > idxcnt)`
		ce426f	`+ {`
		ce426f	`+ backw--;`
		ce426f	`+ seq->save_idx = idx;`
		ce426f	`+ idx = prev_idx;`
		ce426f	`+ }`
		ce426f	`+ if (backw > backw_stop)`
		ce426f	`+ backw--;`
		ce426f	`+ }`
		ce426f	`+ }`
		ce426f	`+`
		ce426f	`+ len = weights[idx++];`
		ce426f	`+ /* Skip over indeces of previous levels. */`
		ce426f	`+ for (int i = 0; i < pass; i++)`
		ce426f	`+ {`
		ce426f	`+ idx += len;`
		ce426f	`+ len = weights[idx];`
		ce426f	`+ idx++;`
		ce426f	`+ }`
		ce426f	`+ }`
		ce426f	`+`
		ce426f	`+ /* Update the structure. */`
		ce426f	`+ seq->val = val;`
		ce426f	`+ seq->len = len;`
		ce426f	`+ seq->backw_stop = backw_stop;`
		ce426f	`+ seq->backw = backw;`
		ce426f	`+ seq->idxcnt = idxcnt;`
		ce426f	`+ seq->idxmax = idxmax;`
		ce426f	`+ seq->us = us;`
		ce426f	`+ seq->idx = idx;`
		ce426f	`+}`
		ce426f	`+`
		ce426f	`+/* Compare two sequences. This version does not use the index and rules`
		ce426f	`+ cache. */`
		ce426f	`+static int`
		ce426f	`+do_compare_nocache (coll_seq seq1, coll_seq seq2, int position,`
		ce426f	`+ const USTRING_TYPE *weights)`
		ce426f	`+{`
		ce426f	`+ int seq1len = seq1->len;`
		ce426f	`+ int seq2len = seq2->len;`
		ce426f	`+ int val1 = seq1->val;`
		ce426f	`+ int val2 = seq2->val;`
		ce426f	`+ int idx1 = seq1->idx;`
		ce426f	`+ int idx2 = seq2->idx;`
		ce426f	`+ int result = 0;`
		ce426f	`+`
		ce426f	`+ /* Test for position if necessary. */`
		ce426f	`+ if (position && val1 != val2)`
		ce426f	`+ {`
		ce426f	`+ result = val1 - val2;`
		ce426f	`+ goto out;`
		ce426f	`+ }`
		ce426f	`+`
		ce426f	`+ /* Compare the two sequences. */`
		ce426f	`+ do`
		ce426f	`+ {`
		ce426f	`+ if (weights[idx1] != weights[idx2])`
		ce426f	`+ {`
		ce426f	`+ /* The sequences differ. */`
		ce426f	`+ result = weights[idx1] - weights[idx2];`
		ce426f	`+ goto out;`
		ce426f	`+ }`
		ce426f	`+`
		ce426f	`+ /* Increment the offsets. */`
		ce426f	`+ ++idx1;`
		ce426f	`+ ++idx2;`
		ce426f	`+`
		ce426f	`+ --seq1len;`
		ce426f	`+ --seq2len;`
		ce426f	`+ }`
		ce426f	`+ while (seq1len > 0 && seq2len > 0);`
		ce426f	`+`
		ce426f	`+ if (position && seq1len != seq2len)`
		ce426f	`+ result = seq1len - seq2len;`
		ce426f	`+`
		ce426f	`+out:`
		ce426f	`+ seq1->len = seq1len;`
		ce426f	`+ seq2->len = seq2len;`
		ce426f	`+ seq1->idx = idx1;`
		ce426f	`+ seq2->idx = idx2;`
		ce426f	`+ return result;`
		ce426f	`+}`
		ce426f	`+`
		ce426f	`+/* Compare two sequences using the index cache. */`
		ce426f	`static int`
		ce426f	`do_compare (coll_seq seq1, coll_seq seq2, int position,`
		ce426f	`const USTRING_TYPE *weights)`
		ce426f	`@@ -334,57 +524,62 @@ STRCOLL (const STRING_TYPE s1, const STRING_TYPE s2, __locale_t l)`
		ce426f	`memset (&seq1, 0, sizeof (seq1));`
		ce426f	`seq2 = seq1;`
		ce426f
		ce426f	`- /* We need the elements of the strings as unsigned values since they`
		ce426f	`- are used as indeces. */`
		ce426f	`- seq1.us = (const USTRING_TYPE *) s1;`
		ce426f	`- seq2.us = (const USTRING_TYPE *) s2;`
		ce426f	`-`
		ce426f	`if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))`
		ce426f	`{`
		ce426f	`seq1.idxarr = (int32_t ) malloc ((s1len + s2len) (sizeof (int32_t) + 1));`
		ce426f	`- seq2.idxarr = &seq1.idxarr[s1len];`
		ce426f	`- seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];`
		ce426f	`- seq2.rulearr = &seq1.rulearr[s1len];`
		ce426f	`-`
		ce426f	`- if (seq1.idxarr == NULL)`
		ce426f	`- /* No memory. Well, go with the stack then.`
		ce426f	`-`
		ce426f	`- XXX Once this implementation is stable we will handle this`
		ce426f	`- differently. Instead of precomputing the indeces we will`
		ce426f	`- do this in time. This means, though, that this happens for`
		ce426f	`- every pass again. */`
		ce426f	`- goto try_stack;`
		ce426f	`- use_malloc = true;`
		ce426f	`+`
		ce426f	`+ /* If we failed to allocate memory, we leave everything as NULL so that`
		ce426f	`+ we use the nocache version of traversal and comparison functions. */`
		ce426f	`+ if (seq1.idxarr != NULL)`
		ce426f	`+ {`
		ce426f	`+ seq2.idxarr = &seq1.idxarr[s1len];`
		ce426f	`+ seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];`
		ce426f	`+ seq2.rulearr = &seq1.rulearr[s1len];`
		ce426f	`+ use_malloc = true;`
		ce426f	`+ }`
		ce426f	`}`
		ce426f	`else`
		ce426f	`{`
		ce426f	`- try_stack:`
		ce426f	`seq1.idxarr = (int32_t ) alloca (s1len sizeof (int32_t));`
		ce426f	`seq2.idxarr = (int32_t ) alloca (s2len sizeof (int32_t));`
		ce426f	`seq1.rulearr = (unsigned char *) alloca (s1len);`
		ce426f	`seq2.rulearr = (unsigned char *) alloca (s2len);`
		ce426f	`}`
		ce426f
		ce426f	`- seq1.rulearr[0] = 0;`
		ce426f	`+ int rule = 0;`
		ce426f
		ce426f	`/* Cache values in the first pass and if needed, use them in subsequent`
		ce426f	`passes. */`
		ce426f	`for (int pass = 0; pass < nrules; ++pass)`
		ce426f	`{`
		ce426f	`seq1.idxcnt = 0;`
		ce426f	`+ seq1.idx = 0;`
		ce426f	`+ seq2.idx = 0;`
		ce426f	`seq1.backw_stop = ~0ul;`
		ce426f	`seq1.backw = ~0ul;`
		ce426f	`seq2.idxcnt = 0;`
		ce426f	`seq2.backw_stop = ~0ul;`
		ce426f	`seq2.backw = ~0ul;`
		ce426f
		ce426f	`+ /* We need the elements of the strings as unsigned values since they`
		ce426f	`+ are used as indeces. */`
		ce426f	`+ seq1.us = (const USTRING_TYPE *) s1;`
		ce426f	`+ seq2.us = (const USTRING_TYPE *) s2;`
		ce426f	`+`
		ce426f	/* We assume that if a rule has defined `position' in one section
		ce426f	`this is true for all of them. */`
		ce426f	`- int position = rulesets[seq1.rulearr[0] * nrules + pass] & sort_position;`
		ce426f	`+ int position = rulesets[rule * nrules + pass] & sort_position;`
		ce426f
		ce426f	`while (1)`
		ce426f	`{`
		ce426f	`- if (pass == 0)`
		ce426f	`+ if (__glibc_unlikely (seq1.idxarr == NULL))`
		ce426f	`+ {`
		ce426f	`+ get_next_seq_nocache (&seq1, nrules, rulesets, weights, table,`
		ce426f	`+ extra, indirect, pass);`
		ce426f	`+ get_next_seq_nocache (&seq2, nrules, rulesets, weights, table,`
		ce426f	`+ extra, indirect, pass);`
		ce426f	`+ }`
		ce426f	`+ else if (pass == 0)`
		ce426f	`{`
		ce426f	`get_next_seq (&seq1, nrules, rulesets, weights, table, extra,`
		ce426f	`indirect);`
		ce426f	`@@ -411,10 +606,18 @@ STRCOLL (const STRING_TYPE s1, const STRING_TYPE s2, __locale_t l)`
		ce426f	`goto free_and_return;`
		ce426f	`}`
		ce426f
		ce426f	`- result = do_compare (&seq1, &seq2, position, weights);`
		ce426f	`+ if (__glibc_unlikely (seq1.idxarr == NULL))`
		ce426f	`+ result = do_compare_nocache (&seq1, &seq2, position, weights);`
		ce426f	`+ else`
		ce426f	`+ result = do_compare (&seq1, &seq2, position, weights);`
		ce426f	`if (result != 0)`
		ce426f	`goto free_and_return;`
		ce426f	`}`
		ce426f	`+`
		ce426f	`+ if (__glibc_likely (seq1.rulearr != NULL))`
		ce426f	`+ rule = seq1.rulearr[0];`
		ce426f	`+ else`
		ce426f	`+ rule = seq1.rule;`
		ce426f	`}`
		ce426f
		ce426f	`/* Free the memory if needed. */`

rpms / glibc

Source Code

Blame SOURCES/glibc-rh989862-2.patch