Tree - rpms/glibc - CentOS Git server

olga / rpms / glibc

Forked from rpms/glibc 5 years ago

Source
Stats

Files

Commit: 2569bea5e4262a8ae5f07f090c17c4d145c4d73f

Blob Blame History Raw

 commit 9795a1801eb1a4f3ae6346e32666d3d05f006115
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Sun Jun 30 20:45:05 2013 +0530
 
    Fall back to non-cached sequence traversal and comparison
    
    strcoll currently falls back to alloca if malloc fails, resulting in a
    possible stack overflow.  This patch implements sequence traversal and
    comparison without caching indeces and rules.
 
diff --git glibc-2.17-c758a686/string/strcoll_l.c glibc-2.17-c758a686/string/strcoll_l.c
index 1bb9e23..1be6874 100644
--- glibc-2.17-c758a686/string/strcoll_l.c
+++ glibc-2.17-c758a686/string/strcoll_l.c
@@ -55,6 +55,12 @@ typedef struct
   const USTRING_TYPE *us;	/* The string.  */
   int32_t *idxarr;		/* Array to cache weight indeces.  */
   unsigned char *rulearr;	/* Array to cache rules.  */
+  unsigned char rule;		/* Saved rule for the first sequence.  */
+  int32_t idx;			/* Index to weight of the current sequence.  */
+  int32_t save_idx;		/* Save looked up index of a forward
+				   sequence after the last backward
+				   sequence.  */
+  const USTRING_TYPE *back_us;	/* Beginning of the backward sequence.  */
 } coll_seq;
 
 /* Get next sequence.  The weight indeces are cached, so we don't need to
@@ -227,7 +233,191 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
   seq->us = us;
 }
 
-/* Compare two sequences.  */
+/* Get next sequence.  Traverse the string as required.  This function does not
+   set or use any index or rule cache.  */
+static void
+get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets,
+		      const USTRING_TYPE *weights, const int32_t *table,
+		      const USTRING_TYPE *extra, const int32_t *indirect,
+		      int pass)
+{
+#include WEIGHT_H
+  int val = seq->val = 0;
+  int len = seq->len;
+  size_t backw_stop = seq->backw_stop;
+  size_t backw = seq->backw;
+  size_t idxcnt = seq->idxcnt;
+  size_t idxmax = seq->idxmax;
+  int32_t idx = seq->idx;
+  const USTRING_TYPE *us = seq->us;
+
+  while (len == 0)
+    {
+      ++val;
+      if (backw_stop != ~0ul)
+	{
+	  /* The is something pushed.  */
+	  if (backw == backw_stop)
+	    {
+	      /* The last pushed character was handled.  Continue
+		 with forward characters.  */
+	      if (idxcnt < idxmax)
+		{
+		  idx = seq->save_idx;
+		  backw_stop = ~0ul;
+		}
+	      else
+		{
+		  /* Nothing anymore.  The backward sequence ended with
+		     the last sequence in the string.  Note that len is
+		     still zero.  */
+		  idx = 0;
+		  break;
+	        }
+	    }
+	  else
+	    {
+	      /* XXX Traverse BACKW sequences from the beginning of
+		 BACKW_STOP to get the next sequence.  Is ther a quicker way
+	         to do this?  */
+	      int i = backw_stop;
+	      us = seq->back_us;
+	      while (i < backw)
+		{
+		  int32_t tmp = findidx (&us, -1);
+		  idx = tmp & 0xffffff;
+		  i++;
+		}
+	      --backw;
+	      us = seq->us;
+	    }
+	}
+      else
+	{
+	  backw_stop = idxmax;
+	  int32_t prev_idx = idx;
+
+	  while (*us != L('\0'))
+	    {
+	      int32_t tmp = findidx (&us, -1);
+	      unsigned char rule = tmp >> 24;
+	      prev_idx = idx;
+	      idx = tmp & 0xffffff;
+	      idxcnt = idxmax++;
+
+	      /* Save the rule for the first sequence.  */
+	      if (__glibc_unlikely (idxcnt == 0))
+	        seq->rule = rule;
+
+	      if ((rulesets[rule * nrules + pass]
+		   & sort_backward) == 0)
+		/* No more backward characters to push.  */
+		break;
+	      ++idxcnt;
+	    }
+
+	  if (backw_stop >= idxcnt)
+	    {
+	      /* No sequence at all or just one.  */
+	      if (idxcnt == idxmax || backw_stop > idxcnt)
+		/* Note that len is still zero.  */
+		break;
+
+	      backw_stop = ~0ul;
+	    }
+	  else
+	    {
+	      /* We pushed backward sequences.  If the stream ended with the
+		 backward sequence, then we process the last sequence we
+		 found.  Otherwise we process the sequence before the last
+		 one since the last one was a forward sequence.  */
+	      seq->back_us = seq->us;
+	      seq->us = us;
+	      backw = idxcnt;
+	      if (idxmax > idxcnt)
+		{
+		  backw--;
+		  seq->save_idx = idx;
+		  idx = prev_idx;
+		}
+	      if (backw > backw_stop)
+		backw--;
+	    }
+	}
+
+      len = weights[idx++];
+      /* Skip over indeces of previous levels.  */
+      for (int i = 0; i < pass; i++)
+	{
+	  idx += len;
+	  len = weights[idx];
+	  idx++;
+	}
+    }
+
+  /* Update the structure.  */
+  seq->val = val;
+  seq->len = len;
+  seq->backw_stop = backw_stop;
+  seq->backw = backw;
+  seq->idxcnt = idxcnt;
+  seq->idxmax = idxmax;
+  seq->us = us;
+  seq->idx = idx;
+}
+
+/* Compare two sequences.  This version does not use the index and rules
+   cache.  */
+static int
+do_compare_nocache (coll_seq *seq1, coll_seq *seq2, int position,
+		    const USTRING_TYPE *weights)
+{
+  int seq1len = seq1->len;
+  int seq2len = seq2->len;
+  int val1 = seq1->val;
+  int val2 = seq2->val;
+  int idx1 = seq1->idx;
+  int idx2 = seq2->idx;
+  int result = 0;
+
+  /* Test for position if necessary.  */
+  if (position && val1 != val2)
+    {
+      result = val1 - val2;
+      goto out;
+    }
+
+  /* Compare the two sequences.  */
+  do
+    {
+      if (weights[idx1] != weights[idx2])
+	{
+	  /* The sequences differ.  */
+	  result = weights[idx1] - weights[idx2];
+	  goto out;
+	}
+
+      /* Increment the offsets.  */
+      ++idx1;
+      ++idx2;
+
+      --seq1len;
+      --seq2len;
+    }
+  while (seq1len > 0 && seq2len > 0);
+
+  if (position && seq1len != seq2len)
+    result = seq1len - seq2len;
+
+out:
+  seq1->len = seq1len;
+  seq2->len = seq2len;
+  seq1->idx = idx1;
+  seq2->idx = idx2;
+  return result;
+}
+
+/* Compare two sequences using the index cache.  */
 static int
 do_compare (coll_seq *seq1, coll_seq *seq2, int position,
 	    const USTRING_TYPE *weights)
@@ -334,57 +524,62 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
   memset (&seq1, 0, sizeof (seq1));
   seq2 = seq1;
 
-  /* We need the elements of the strings as unsigned values since they
-     are used as indeces.  */
-  seq1.us = (const USTRING_TYPE *) s1;
-  seq2.us = (const USTRING_TYPE *) s2;
-
   if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))
     {
       seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1));
-      seq2.idxarr = &seq1.idxarr[s1len];
-      seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];
-      seq2.rulearr = &seq1.rulearr[s1len];
-
-      if (seq1.idxarr == NULL)
-	/* No memory.  Well, go with the stack then.
-
-	   XXX Once this implementation is stable we will handle this
-	   differently.  Instead of precomputing the indeces we will
-	   do this in time.  This means, though, that this happens for
-	   every pass again.  */
-	goto try_stack;
-      use_malloc = true;
+
+      /* If we failed to allocate memory, we leave everything as NULL so that
+	 we use the nocache version of traversal and comparison functions.  */
+      if (seq1.idxarr != NULL)
+	{
+	  seq2.idxarr = &seq1.idxarr[s1len];
+	  seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];
+	  seq2.rulearr = &seq1.rulearr[s1len];
+	  use_malloc = true;
+	}
     }
   else
     {
-    try_stack:
       seq1.idxarr = (int32_t *) alloca (s1len * sizeof (int32_t));
       seq2.idxarr = (int32_t *) alloca (s2len * sizeof (int32_t));
       seq1.rulearr = (unsigned char *) alloca (s1len);
       seq2.rulearr = (unsigned char *) alloca (s2len);
     }
 
-  seq1.rulearr[0] = 0;
+  int rule = 0;
 
   /* Cache values in the first pass and if needed, use them in subsequent
      passes.  */
   for (int pass = 0; pass < nrules; ++pass)
     {
       seq1.idxcnt = 0;
+      seq1.idx = 0;
+      seq2.idx = 0;
       seq1.backw_stop = ~0ul;
       seq1.backw = ~0ul;
       seq2.idxcnt = 0;
       seq2.backw_stop = ~0ul;
       seq2.backw = ~0ul;
 
+      /* We need the elements of the strings as unsigned values since they
+	 are used as indeces.  */
+      seq1.us = (const USTRING_TYPE *) s1;
+      seq2.us = (const USTRING_TYPE *) s2;
+
       /* We assume that if a rule has defined `position' in one section
 	 this is true for all of them.  */
-      int position = rulesets[seq1.rulearr[0] * nrules + pass] & sort_position;
+      int position = rulesets[rule * nrules + pass] & sort_position;
 
       while (1)
 	{
-	  if (pass == 0)
+	  if (__glibc_unlikely (seq1.idxarr == NULL))
+	    {
+	      get_next_seq_nocache (&seq1, nrules, rulesets, weights, table,
+				    extra, indirect, pass);
+	      get_next_seq_nocache (&seq2, nrules, rulesets, weights, table,
+				    extra, indirect, pass);
+	    }
+	  else if (pass == 0)
 	    {
 	      get_next_seq (&seq1, nrules, rulesets, weights, table, extra,
 			    indirect);
@@ -411,10 +606,18 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
 	      goto free_and_return;
 	    }
 
-	  result = do_compare (&seq1, &seq2, position, weights);
+	  if (__glibc_unlikely (seq1.idxarr == NULL))
+	    result = do_compare_nocache (&seq1, &seq2, position, weights);
+	  else
+	    result = do_compare (&seq1, &seq2, position, weights);
 	  if (result != 0)
 	    goto free_and_return;
 	}
+
+      if (__glibc_likely (seq1.rulearr != NULL))
+	rule = seq1.rulearr[0];
+      else
+	rule = seq1.rule;
     }
 
   /* Free the memory if needed.  */

	commit 9795a1801eb1a4f3ae6346e32666d3d05f006115
	Author: Siddhesh Poyarekar <siddhesh@redhat.com>
	Date: Sun Jun 30 20:45:05 2013 +0530

	Fall back to non-cached sequence traversal and comparison

	strcoll currently falls back to alloca if malloc fails, resulting in a
	possible stack overflow. This patch implements sequence traversal and
	comparison without caching indeces and rules.

	diff --git glibc-2.17-c758a686/string/strcoll_l.c glibc-2.17-c758a686/string/strcoll_l.c
	index 1bb9e23..1be6874 100644
	--- glibc-2.17-c758a686/string/strcoll_l.c
	+++ glibc-2.17-c758a686/string/strcoll_l.c
	@@ -55,6 +55,12 @@ typedef struct
	const USTRING_TYPE us; / The string. */
	int32_t idxarr; / Array to cache weight indeces. */
	unsigned char rulearr; / Array to cache rules. */
	+ unsigned char rule; /* Saved rule for the first sequence. */
	+ int32_t idx; /* Index to weight of the current sequence. */
	+ int32_t save_idx; /* Save looked up index of a forward
	+ sequence after the last backward
	+ sequence. */
	+ const USTRING_TYPE back_us; / Beginning of the backward sequence. */
	} coll_seq;

	/* Get next sequence. The weight indeces are cached, so we don't need to
	@@ -227,7 +233,191 @@ get_next_seq (coll_seq seq, int nrules, const unsigned char rulesets,
	seq->us = us;
	}

	-/* Compare two sequences. */
	+/* Get next sequence. Traverse the string as required. This function does not
	+ set or use any index or rule cache. */
	+static void
	+get_next_seq_nocache (coll_seq seq, int nrules, const unsigned char rulesets,
	+ const USTRING_TYPE weights, const int32_t table,
	+ const USTRING_TYPE extra, const int32_t indirect,
	+ int pass)
	+{
	+#include WEIGHT_H
	+ int val = seq->val = 0;
	+ int len = seq->len;
	+ size_t backw_stop = seq->backw_stop;
	+ size_t backw = seq->backw;
	+ size_t idxcnt = seq->idxcnt;
	+ size_t idxmax = seq->idxmax;
	+ int32_t idx = seq->idx;
	+ const USTRING_TYPE *us = seq->us;
	+
	+ while (len == 0)
	+ {
	+ ++val;
	+ if (backw_stop != ~0ul)
	+ {
	+ /* The is something pushed. */
	+ if (backw == backw_stop)
	+ {
	+ /* The last pushed character was handled. Continue
	+ with forward characters. */
	+ if (idxcnt < idxmax)
	+ {
	+ idx = seq->save_idx;
	+ backw_stop = ~0ul;
	+ }
	+ else
	+ {
	+ /* Nothing anymore. The backward sequence ended with
	+ the last sequence in the string. Note that len is
	+ still zero. */
	+ idx = 0;
	+ break;
	+ }
	+ }
	+ else
	+ {
	+ /* XXX Traverse BACKW sequences from the beginning of
	+ BACKW_STOP to get the next sequence. Is ther a quicker way
	+ to do this? */
	+ int i = backw_stop;
	+ us = seq->back_us;
	+ while (i < backw)
	+ {
	+ int32_t tmp = findidx (&us, -1);
	+ idx = tmp & 0xffffff;
	+ i++;
	+ }
	+ --backw;
	+ us = seq->us;
	+ }
	+ }
	+ else
	+ {
	+ backw_stop = idxmax;
	+ int32_t prev_idx = idx;
	+
	+ while (*us != L('\0'))
	+ {
	+ int32_t tmp = findidx (&us, -1);
	+ unsigned char rule = tmp >> 24;
	+ prev_idx = idx;
	+ idx = tmp & 0xffffff;
	+ idxcnt = idxmax++;
	+
	+ /* Save the rule for the first sequence. */
	+ if (__glibc_unlikely (idxcnt == 0))
	+ seq->rule = rule;
	+
	+ if ((rulesets[rule * nrules + pass]
	+ & sort_backward) == 0)
	+ /* No more backward characters to push. */
	+ break;
	+ ++idxcnt;
	+ }
	+
	+ if (backw_stop >= idxcnt)
	+ {
	+ /* No sequence at all or just one. */
	+ if (idxcnt == idxmax \|\| backw_stop > idxcnt)
	+ /* Note that len is still zero. */
	+ break;
	+
	+ backw_stop = ~0ul;
	+ }
	+ else
	+ {
	+ /* We pushed backward sequences. If the stream ended with the
	+ backward sequence, then we process the last sequence we
	+ found. Otherwise we process the sequence before the last
	+ one since the last one was a forward sequence. */
	+ seq->back_us = seq->us;
	+ seq->us = us;
	+ backw = idxcnt;
	+ if (idxmax > idxcnt)
	+ {
	+ backw--;
	+ seq->save_idx = idx;
	+ idx = prev_idx;
	+ }
	+ if (backw > backw_stop)
	+ backw--;
	+ }
	+ }
	+
	+ len = weights[idx++];
	+ /* Skip over indeces of previous levels. */
	+ for (int i = 0; i < pass; i++)
	+ {
	+ idx += len;
	+ len = weights[idx];
	+ idx++;
	+ }
	+ }
	+
	+ /* Update the structure. */
	+ seq->val = val;
	+ seq->len = len;
	+ seq->backw_stop = backw_stop;
	+ seq->backw = backw;
	+ seq->idxcnt = idxcnt;
	+ seq->idxmax = idxmax;
	+ seq->us = us;
	+ seq->idx = idx;
	+}
	+
	+/* Compare two sequences. This version does not use the index and rules
	+ cache. */
	+static int
	+do_compare_nocache (coll_seq seq1, coll_seq seq2, int position,
	+ const USTRING_TYPE *weights)
	+{
	+ int seq1len = seq1->len;
	+ int seq2len = seq2->len;
	+ int val1 = seq1->val;
	+ int val2 = seq2->val;
	+ int idx1 = seq1->idx;
	+ int idx2 = seq2->idx;
	+ int result = 0;
	+
	+ /* Test for position if necessary. */
	+ if (position && val1 != val2)
	+ {
	+ result = val1 - val2;
	+ goto out;
	+ }
	+
	+ /* Compare the two sequences. */
	+ do
	+ {
	+ if (weights[idx1] != weights[idx2])
	+ {
	+ /* The sequences differ. */
	+ result = weights[idx1] - weights[idx2];
	+ goto out;
	+ }
	+
	+ /* Increment the offsets. */
	+ ++idx1;
	+ ++idx2;
	+
	+ --seq1len;
	+ --seq2len;
	+ }
	+ while (seq1len > 0 && seq2len > 0);
	+
	+ if (position && seq1len != seq2len)
	+ result = seq1len - seq2len;
	+
	+out:
	+ seq1->len = seq1len;
	+ seq2->len = seq2len;
	+ seq1->idx = idx1;
	+ seq2->idx = idx2;
	+ return result;
	+}
	+
	+/* Compare two sequences using the index cache. */
	static int
	do_compare (coll_seq seq1, coll_seq seq2, int position,
	const USTRING_TYPE *weights)
	@@ -334,57 +524,62 @@ STRCOLL (const STRING_TYPE s1, const STRING_TYPE s2, __locale_t l)
	memset (&seq1, 0, sizeof (seq1));
	seq2 = seq1;

	- /* We need the elements of the strings as unsigned values since they
	- are used as indeces. */
	- seq1.us = (const USTRING_TYPE *) s1;
	- seq2.us = (const USTRING_TYPE *) s2;
	-
	if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))
	{
	seq1.idxarr = (int32_t ) malloc ((s1len + s2len) (sizeof (int32_t) + 1));
	- seq2.idxarr = &seq1.idxarr[s1len];
	- seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];
	- seq2.rulearr = &seq1.rulearr[s1len];
	-
	- if (seq1.idxarr == NULL)
	- /* No memory. Well, go with the stack then.
	-
	- XXX Once this implementation is stable we will handle this
	- differently. Instead of precomputing the indeces we will
	- do this in time. This means, though, that this happens for
	- every pass again. */
	- goto try_stack;
	- use_malloc = true;
	+
	+ /* If we failed to allocate memory, we leave everything as NULL so that
	+ we use the nocache version of traversal and comparison functions. */
	+ if (seq1.idxarr != NULL)
	+ {
	+ seq2.idxarr = &seq1.idxarr[s1len];
	+ seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];
	+ seq2.rulearr = &seq1.rulearr[s1len];
	+ use_malloc = true;
	+ }
	}
	else
	{
	- try_stack:
	seq1.idxarr = (int32_t ) alloca (s1len sizeof (int32_t));
	seq2.idxarr = (int32_t ) alloca (s2len sizeof (int32_t));
	seq1.rulearr = (unsigned char *) alloca (s1len);
	seq2.rulearr = (unsigned char *) alloca (s2len);
	}

	- seq1.rulearr[0] = 0;
	+ int rule = 0;

	/* Cache values in the first pass and if needed, use them in subsequent
	passes. */
	for (int pass = 0; pass < nrules; ++pass)
	{
	seq1.idxcnt = 0;
	+ seq1.idx = 0;
	+ seq2.idx = 0;
	seq1.backw_stop = ~0ul;
	seq1.backw = ~0ul;
	seq2.idxcnt = 0;
	seq2.backw_stop = ~0ul;
	seq2.backw = ~0ul;

	+ /* We need the elements of the strings as unsigned values since they
	+ are used as indeces. */
	+ seq1.us = (const USTRING_TYPE *) s1;
	+ seq2.us = (const USTRING_TYPE *) s2;
	+
	/* We assume that if a rule has defined `position' in one section
	this is true for all of them. */
	- int position = rulesets[seq1.rulearr[0] * nrules + pass] & sort_position;
	+ int position = rulesets[rule * nrules + pass] & sort_position;

	while (1)
	{
	- if (pass == 0)
	+ if (__glibc_unlikely (seq1.idxarr == NULL))
	+ {
	+ get_next_seq_nocache (&seq1, nrules, rulesets, weights, table,
	+ extra, indirect, pass);
	+ get_next_seq_nocache (&seq2, nrules, rulesets, weights, table,
	+ extra, indirect, pass);
	+ }
	+ else if (pass == 0)
	{
	get_next_seq (&seq1, nrules, rulesets, weights, table, extra,
	indirect);
	@@ -411,10 +606,18 @@ STRCOLL (const STRING_TYPE s1, const STRING_TYPE s2, __locale_t l)
	goto free_and_return;
	}

	- result = do_compare (&seq1, &seq2, position, weights);
	+ if (__glibc_unlikely (seq1.idxarr == NULL))
	+ result = do_compare_nocache (&seq1, &seq2, position, weights);
	+ else
	+ result = do_compare (&seq1, &seq2, position, weights);
	if (result != 0)
	goto free_and_return;
	}
	+
	+ if (__glibc_likely (seq1.rulearr != NULL))
	+ rule = seq1.rulearr[0];
	+ else
	+ rule = seq1.rule;
	}

	/* Free the memory if needed. */

olga / rpms / glibc

Source Code

Files