Blame SOURCES/gcc34-rh172117.patch

4e62ec
2005-12-18  Alexandre Oliva  <aoliva@redhat.com>
4e62ec
4e62ec
	* optabs.c (expand_vector_binop): Do not use a SUBREG to modify
4e62ec
	a subword in the output if it matches any of the inputs.
4e62ec
4e62ec
2006-04-20  Jakub Jelinek  <jakub@redhat.com>
4e62ec
4e62ec
	* gcc.c-torture/execute/20060420-1.c: New test.
4e62ec
4e62ec
--- gcc/optabs.c.orig	2005-11-21 11:43:20.000000000 -0200
4e62ec
+++ gcc/optabs.c	2005-12-18 18:35:14.000000000 -0200
4e62ec
@@ -1933,16 +1933,19 @@
4e62ec
 
4e62ec
       for (i = 0; i < elts; ++i)
4e62ec
 	{
4e62ec
-	  /* If this is part of a register, and not the first item in the
4e62ec
-	     word, we can't store using a SUBREG - that would clobber
4e62ec
-	     previous results.
4e62ec
+	  /* If this is part of a register, and not the first item in
4e62ec
+	     the word, we can't store using a SUBREG - that would
4e62ec
+	     clobber previous results, or even the input operands, if
4e62ec
+	     target matches any of them.
4e62ec
 	     And storing with a SUBREG is only possible for the least
4e62ec
 	     significant part, hence we can't do it for big endian
4e62ec
 	     (unless we want to permute the evaluation order.  */
4e62ec
 	  if (GET_CODE (target) == REG
4e62ec
 	      && (BYTES_BIG_ENDIAN
4e62ec
 		  ? subsize < UNITS_PER_WORD
4e62ec
-		  : ((i * subsize) % UNITS_PER_WORD) != 0))
4e62ec
+		  : (((i * subsize) % UNITS_PER_WORD) != 0
4e62ec
+		     || (subsize < UNITS_PER_WORD
4e62ec
+			 && (target == op0 || target == op1)))))
4e62ec
 	    t = NULL_RTX;
4e62ec
 	  else
4e62ec
 	    t = simplify_gen_subreg (submode, target, mode, i * subsize);
4e62ec
--- gcc/testsuite/gcc.c-torture/execute/20060420-1.c.jj	2006-04-20 18:47:19.000000000 +0200
4e62ec
+++ gcc/testsuite/gcc.c-torture/execute/20060420-1.c	2006-04-20 19:07:20.000000000 +0200
4e62ec
@@ -0,0 +1,71 @@
4e62ec
+extern void abort (void);
4e62ec
+
4e62ec
+typedef float v4flt __attribute__ ((vector_size (16)));
4e62ec
+
4e62ec
+void __attribute__ ((noinline)) foo (float *dst, float **src, int a, int n)
4e62ec
+{
4e62ec
+  int i, j;
4e62ec
+  int z = sizeof (v4flt) / sizeof (float);
4e62ec
+  unsigned m = sizeof (v4flt) - 1;
4e62ec
+
4e62ec
+  for (j = 0; j < n && (((unsigned long) dst + j) & m); ++j)
4e62ec
+    {
4e62ec
+      float t = src[0][j];
4e62ec
+      for (i = 1; i < a; ++i)
4e62ec
+	t += src[i][j];
4e62ec
+      dst[j] = t;
4e62ec
+    }
4e62ec
+
4e62ec
+  for (; j < (n - (4 * z - 1)); j += 4 * z)
4e62ec
+    {
4e62ec
+      v4flt t0 = *(v4flt *) (src[0] + j + 0 * z);
4e62ec
+      v4flt t1 = *(v4flt *) (src[0] + j + 1 * z);
4e62ec
+      v4flt t2 = *(v4flt *) (src[0] + j + 2 * z);
4e62ec
+      v4flt t3 = *(v4flt *) (src[0] + j + 3 * z);
4e62ec
+      for (i = 1; i < a; ++i)
4e62ec
+	{
4e62ec
+	  t0 += *(v4flt *) (src[i] + j + 0 * z);
4e62ec
+	  t1 += *(v4flt *) (src[i] + j + 1 * z);
4e62ec
+	  t2 += *(v4flt *) (src[i] + j + 2 * z);
4e62ec
+	  t3 += *(v4flt *) (src[i] + j + 3 * z);
4e62ec
+	}
4e62ec
+      *(v4flt *) (dst + j + 0 * z) = t0;
4e62ec
+      *(v4flt *) (dst + j + 1 * z) = t1;
4e62ec
+      *(v4flt *) (dst + j + 2 * z) = t2;
4e62ec
+      *(v4flt *) (dst + j + 3 * z) = t3;
4e62ec
+    }
4e62ec
+  for (; j < n; ++j)
4e62ec
+    {
4e62ec
+      float t = src[0][j];
4e62ec
+      for (i = 1; i < a; ++i)
4e62ec
+	t += src[i][j];
4e62ec
+      dst[j] = t;
4e62ec
+    }
4e62ec
+}
4e62ec
+
4e62ec
+float buffer[64];
4e62ec
+
4e62ec
+int
4e62ec
+main (void)
4e62ec
+{
4e62ec
+  int i;
4e62ec
+  float *dst, *src[2];
4e62ec
+
4e62ec
+  dst = buffer;
4e62ec
+  dst += (-(long int) buffer & (16 * sizeof (float) - 1)) / sizeof (float);
4e62ec
+  src[0] = dst + 16;
4e62ec
+  src[1] = dst + 32;
4e62ec
+  for (i = 0; i < 16; ++i)
4e62ec
+    {
4e62ec
+      src[0][i] = (float) i + 11 * (float) i;
4e62ec
+      src[1][i] = (float) i + 12 * (float) i;
4e62ec
+    }
4e62ec
+  foo (dst, src, 2, 16);
4e62ec
+  for (i = 0; i < 16; ++i)
4e62ec
+    {
4e62ec
+      float e = (float) i + 11 * (float) i + (float) i + 12 * (float) i;
4e62ec
+      if (dst[i] != e)
4e62ec
+	abort ();
4e62ec
+    }
4e62ec
+  return 0;
4e62ec
+}