|
|
4e62ec |
2005-12-18 Alexandre Oliva <aoliva@redhat.com>
|
|
|
4e62ec |
|
|
|
4e62ec |
* optabs.c (expand_vector_binop): Do not use a SUBREG to modify
|
|
|
4e62ec |
a subword in the output if it matches any of the inputs.
|
|
|
4e62ec |
|
|
|
4e62ec |
2006-04-20 Jakub Jelinek <jakub@redhat.com>
|
|
|
4e62ec |
|
|
|
4e62ec |
* gcc.c-torture/execute/20060420-1.c: New test.
|
|
|
4e62ec |
|
|
|
4e62ec |
--- gcc/optabs.c.orig 2005-11-21 11:43:20.000000000 -0200
|
|
|
4e62ec |
+++ gcc/optabs.c 2005-12-18 18:35:14.000000000 -0200
|
|
|
4e62ec |
@@ -1933,16 +1933,19 @@
|
|
|
4e62ec |
|
|
|
4e62ec |
for (i = 0; i < elts; ++i)
|
|
|
4e62ec |
{
|
|
|
4e62ec |
- /* If this is part of a register, and not the first item in the
|
|
|
4e62ec |
- word, we can't store using a SUBREG - that would clobber
|
|
|
4e62ec |
- previous results.
|
|
|
4e62ec |
+ /* If this is part of a register, and not the first item in
|
|
|
4e62ec |
+ the word, we can't store using a SUBREG - that would
|
|
|
4e62ec |
+ clobber previous results, or even the input operands, if
|
|
|
4e62ec |
+ target matches any of them.
|
|
|
4e62ec |
And storing with a SUBREG is only possible for the least
|
|
|
4e62ec |
significant part, hence we can't do it for big endian
|
|
|
4e62ec |
(unless we want to permute the evaluation order. */
|
|
|
4e62ec |
if (GET_CODE (target) == REG
|
|
|
4e62ec |
&& (BYTES_BIG_ENDIAN
|
|
|
4e62ec |
? subsize < UNITS_PER_WORD
|
|
|
4e62ec |
- : ((i * subsize) % UNITS_PER_WORD) != 0))
|
|
|
4e62ec |
+ : (((i * subsize) % UNITS_PER_WORD) != 0
|
|
|
4e62ec |
+ || (subsize < UNITS_PER_WORD
|
|
|
4e62ec |
+ && (target == op0 || target == op1)))))
|
|
|
4e62ec |
t = NULL_RTX;
|
|
|
4e62ec |
else
|
|
|
4e62ec |
t = simplify_gen_subreg (submode, target, mode, i * subsize);
|
|
|
4e62ec |
--- gcc/testsuite/gcc.c-torture/execute/20060420-1.c.jj 2006-04-20 18:47:19.000000000 +0200
|
|
|
4e62ec |
+++ gcc/testsuite/gcc.c-torture/execute/20060420-1.c 2006-04-20 19:07:20.000000000 +0200
|
|
|
4e62ec |
@@ -0,0 +1,71 @@
|
|
|
4e62ec |
+extern void abort (void);
|
|
|
4e62ec |
+
|
|
|
4e62ec |
+typedef float v4flt __attribute__ ((vector_size (16)));
|
|
|
4e62ec |
+
|
|
|
4e62ec |
+void __attribute__ ((noinline)) foo (float *dst, float **src, int a, int n)
|
|
|
4e62ec |
+{
|
|
|
4e62ec |
+ int i, j;
|
|
|
4e62ec |
+ int z = sizeof (v4flt) / sizeof (float);
|
|
|
4e62ec |
+ unsigned m = sizeof (v4flt) - 1;
|
|
|
4e62ec |
+
|
|
|
4e62ec |
+ for (j = 0; j < n && (((unsigned long) dst + j) & m); ++j)
|
|
|
4e62ec |
+ {
|
|
|
4e62ec |
+ float t = src[0][j];
|
|
|
4e62ec |
+ for (i = 1; i < a; ++i)
|
|
|
4e62ec |
+ t += src[i][j];
|
|
|
4e62ec |
+ dst[j] = t;
|
|
|
4e62ec |
+ }
|
|
|
4e62ec |
+
|
|
|
4e62ec |
+ for (; j < (n - (4 * z - 1)); j += 4 * z)
|
|
|
4e62ec |
+ {
|
|
|
4e62ec |
+ v4flt t0 = *(v4flt *) (src[0] + j + 0 * z);
|
|
|
4e62ec |
+ v4flt t1 = *(v4flt *) (src[0] + j + 1 * z);
|
|
|
4e62ec |
+ v4flt t2 = *(v4flt *) (src[0] + j + 2 * z);
|
|
|
4e62ec |
+ v4flt t3 = *(v4flt *) (src[0] + j + 3 * z);
|
|
|
4e62ec |
+ for (i = 1; i < a; ++i)
|
|
|
4e62ec |
+ {
|
|
|
4e62ec |
+ t0 += *(v4flt *) (src[i] + j + 0 * z);
|
|
|
4e62ec |
+ t1 += *(v4flt *) (src[i] + j + 1 * z);
|
|
|
4e62ec |
+ t2 += *(v4flt *) (src[i] + j + 2 * z);
|
|
|
4e62ec |
+ t3 += *(v4flt *) (src[i] + j + 3 * z);
|
|
|
4e62ec |
+ }
|
|
|
4e62ec |
+ *(v4flt *) (dst + j + 0 * z) = t0;
|
|
|
4e62ec |
+ *(v4flt *) (dst + j + 1 * z) = t1;
|
|
|
4e62ec |
+ *(v4flt *) (dst + j + 2 * z) = t2;
|
|
|
4e62ec |
+ *(v4flt *) (dst + j + 3 * z) = t3;
|
|
|
4e62ec |
+ }
|
|
|
4e62ec |
+ for (; j < n; ++j)
|
|
|
4e62ec |
+ {
|
|
|
4e62ec |
+ float t = src[0][j];
|
|
|
4e62ec |
+ for (i = 1; i < a; ++i)
|
|
|
4e62ec |
+ t += src[i][j];
|
|
|
4e62ec |
+ dst[j] = t;
|
|
|
4e62ec |
+ }
|
|
|
4e62ec |
+}
|
|
|
4e62ec |
+
|
|
|
4e62ec |
+float buffer[64];
|
|
|
4e62ec |
+
|
|
|
4e62ec |
+int
|
|
|
4e62ec |
+main (void)
|
|
|
4e62ec |
+{
|
|
|
4e62ec |
+ int i;
|
|
|
4e62ec |
+ float *dst, *src[2];
|
|
|
4e62ec |
+
|
|
|
4e62ec |
+ dst = buffer;
|
|
|
4e62ec |
+ dst += (-(long int) buffer & (16 * sizeof (float) - 1)) / sizeof (float);
|
|
|
4e62ec |
+ src[0] = dst + 16;
|
|
|
4e62ec |
+ src[1] = dst + 32;
|
|
|
4e62ec |
+ for (i = 0; i < 16; ++i)
|
|
|
4e62ec |
+ {
|
|
|
4e62ec |
+ src[0][i] = (float) i + 11 * (float) i;
|
|
|
4e62ec |
+ src[1][i] = (float) i + 12 * (float) i;
|
|
|
4e62ec |
+ }
|
|
|
4e62ec |
+ foo (dst, src, 2, 16);
|
|
|
4e62ec |
+ for (i = 0; i < 16; ++i)
|
|
|
4e62ec |
+ {
|
|
|
4e62ec |
+ float e = (float) i + 11 * (float) i + (float) i + 12 * (float) i;
|
|
|
4e62ec |
+ if (dst[i] != e)
|
|
|
4e62ec |
+ abort ();
|
|
|
4e62ec |
+ }
|
|
|
4e62ec |
+ return 0;
|
|
|
4e62ec |
+}
|