|
|
ac4b94 |
2019-11-22 Jonathan Wakely <jwakely@redhat.com>
|
|
|
ac4b94 |
|
|
|
ac4b94 |
Backport from mainline
|
|
|
ac4b94 |
2019-10-29 Jonathan Wakely <jwakely@redhat.com>
|
|
|
ac4b94 |
|
|
|
ac4b94 |
PR libstdc++/92267
|
|
|
ac4b94 |
* include/bits/stl_deque.h (_Deque_iterator(const _Deque_iterator&)):
|
|
|
ac4b94 |
Do not define as defaulted.
|
|
|
ac4b94 |
* testsuite/23_containers/deque/types/92267.cc: New test.
|
|
|
ac4b94 |
|
|
|
ac4b94 |
2019-11-21 Jakub Jelinek <jakub@redhat.com>
|
|
|
ac4b94 |
|
|
|
ac4b94 |
PR tree-optimization/91355
|
|
|
ac4b94 |
* tree-ssa-sink.c (select_best_block): Use >= rather than >
|
|
|
ac4b94 |
for early_bb scaled count with best_bb count comparison.
|
|
|
ac4b94 |
|
|
|
ac4b94 |
2019-11-21 Richard Biener <rguenther@suse.de>
|
|
|
ac4b94 |
|
|
|
ac4b94 |
Revert
|
|
|
ac4b94 |
2019-09-17 Richard Biener <rguenther@suse.de>
|
|
|
ac4b94 |
|
|
|
ac4b94 |
PR tree-optimization/91790
|
|
|
ac4b94 |
* tree-vect-stmts.c (vectorizable_load): For BB vectorization
|
|
|
ac4b94 |
use the correct DR for setting up realignment.
|
|
|
ac4b94 |
|
|
|
ac4b94 |
2019-11-20 Peter Bergner <bergner@linux.ibm.com>
|
|
|
ac4b94 |
|
|
|
ac4b94 |
Backport from mainline
|
|
|
ac4b94 |
2019-11-07 Peter Bergner <bergner@linux.ibm.com>
|
|
|
ac4b94 |
|
|
|
ac4b94 |
PR other/92090
|
|
|
ac4b94 |
* config/rs6000/predicates.md (input_operand): Allow MODE_PARTIAL_INT
|
|
|
ac4b94 |
modes for integer constants.
|
|
|
ac4b94 |
|
|
|
ac4b94 |
2019-11-20 Michael Matz <matz@suse.de>
|
|
|
ac4b94 |
|
|
|
ac4b94 |
Backport from mainline
|
|
|
ac4b94 |
PR middle-end/90796
|
|
|
ac4b94 |
* gimple-loop-jam.c (any_access_function_variant_p): New function.
|
|
|
ac4b94 |
(adjust_unroll_factor): Use it to constrain safety, new parameter.
|
|
|
ac4b94 |
(tree_loop_unroll_and_jam): Adjust call and profitable unroll factor.
|
|
|
ac4b94 |
|
|
|
ac4b94 |
2019-11-20 Joseph Myers <joseph@codesourcery.com>
|
|
|
ac4b94 |
|
|
|
ac4b94 |
* doc/invoke.texi (-Wc11-c2x-compat): Document.
|
|
|
ac4b94 |
|
|
|
ac4b94 |
--- libstdc++-v3/include/bits/stl_deque.h (revision 278492)
|
|
|
ac4b94 |
+++ libstdc++-v3/include/bits/stl_deque.h (revision 278614)
|
|
|
ac4b94 |
@@ -158,13 +158,16 @@
|
|
|
ac4b94 |
#else
|
|
|
ac4b94 |
// Conversion from iterator to const_iterator.
|
|
|
ac4b94 |
template
|
|
|
ac4b94 |
- typename = _Require<is_same<_Self, const_iterator>,
|
|
|
ac4b94 |
- is_same<_Iter, iterator>>>
|
|
|
ac4b94 |
+ typename = _Require<is_same<_Self, const_iterator>,
|
|
|
ac4b94 |
+ is_same<_Iter, iterator>>>
|
|
|
ac4b94 |
_Deque_iterator(const _Iter& __x) noexcept
|
|
|
ac4b94 |
: _M_cur(__x._M_cur), _M_first(__x._M_first),
|
|
|
ac4b94 |
- _M_last(__x._M_last), _M_node(__x._M_node) { }
|
|
|
ac4b94 |
+ _M_last(__x._M_last), _M_node(__x._M_node) { }
|
|
|
ac4b94 |
|
|
|
ac4b94 |
- _Deque_iterator(const _Deque_iterator&) = default;
|
|
|
ac4b94 |
+ _Deque_iterator(const _Deque_iterator& __x) noexcept
|
|
|
ac4b94 |
+ : _M_cur(__x._M_cur), _M_first(__x._M_first),
|
|
|
ac4b94 |
+ _M_last(__x._M_last), _M_node(__x._M_node) { }
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
_Deque_iterator& operator=(const _Deque_iterator&) = default;
|
|
|
ac4b94 |
#endif
|
|
|
ac4b94 |
|
|
|
ac4b94 |
--- libstdc++-v3/testsuite/23_containers/deque/types/92267.cc (nonexistent)
|
|
|
ac4b94 |
+++ libstdc++-v3/testsuite/23_containers/deque/types/92267.cc (revision 278614)
|
|
|
ac4b94 |
@@ -0,0 +1,27 @@
|
|
|
ac4b94 |
+// Copyright (C) 2019 Free Software Foundation, Inc.
|
|
|
ac4b94 |
+//
|
|
|
ac4b94 |
+// This file is part of the GNU ISO C++ Library. This library is free
|
|
|
ac4b94 |
+// software; you can redistribute it and/or modify it under the
|
|
|
ac4b94 |
+// terms of the GNU General Public License as published by the
|
|
|
ac4b94 |
+// Free Software Foundation; either version 3, or (at your option)
|
|
|
ac4b94 |
+// any later version.
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+// This library is distributed in the hope that it will be useful,
|
|
|
ac4b94 |
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
ac4b94 |
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
ac4b94 |
+// GNU General Public License for more details.
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+// You should have received a copy of the GNU General Public License along
|
|
|
ac4b94 |
+// with this library; see the file COPYING3. If not see
|
|
|
ac4b94 |
+// <http://www.gnu.org/licenses/>.
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+// { dg-do compile { target c++11 } }
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+#include <deque>
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+using std::deque;
|
|
|
ac4b94 |
+using std::is_trivially_copy_constructible;
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+// PR libstdc++/92267
|
|
|
ac4b94 |
+static_assert(!is_trivially_copy_constructible<deque<int>::iterator>::value);
|
|
|
ac4b94 |
+static_assert(!is_trivially_copy_constructible<deque<int>::const_iterator>::value);
|
|
|
ac4b94 |
--- gcc/doc/invoke.texi (revision 278492)
|
|
|
ac4b94 |
+++ gcc/doc/invoke.texi (revision 278614)
|
|
|
ac4b94 |
@@ -292,6 +292,7 @@
|
|
|
ac4b94 |
-Wbool-compare -Wbool-operation @gol
|
|
|
ac4b94 |
-Wno-builtin-declaration-mismatch @gol
|
|
|
ac4b94 |
-Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol
|
|
|
ac4b94 |
+-Wc11-c2x-compat @gol
|
|
|
ac4b94 |
-Wc++-compat -Wc++11-compat -Wc++14-compat -Wc++17-compat @gol
|
|
|
ac4b94 |
-Wcast-align -Wcast-align=strict -Wcast-function-type -Wcast-qual @gol
|
|
|
ac4b94 |
-Wchar-subscripts -Wcatch-value -Wcatch-value=@var{n} @gol
|
|
|
ac4b94 |
@@ -6698,6 +6699,14 @@
|
|
|
ac4b94 |
and so on. This option is independent of the standards mode. Warnings are
|
|
|
ac4b94 |
disabled in the expression that follows @code{__extension__}.
|
|
|
ac4b94 |
|
|
|
ac4b94 |
+@item -Wc11-c2x-compat @r{(C and Objective-C only)}
|
|
|
ac4b94 |
+@opindex Wc11-c2x-compat
|
|
|
ac4b94 |
+@opindex Wno-c11-c2x-compat
|
|
|
ac4b94 |
+Warn about features not present in ISO C11, but present in ISO C2X.
|
|
|
ac4b94 |
+For instance, warn about omitting the string in @code{_Static_assert}.
|
|
|
ac4b94 |
+This option is independent of the standards mode. Warnings are
|
|
|
ac4b94 |
+disabled in the expression that follows @code{__extension__}.
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
@item -Wc++-compat @r{(C and Objective-C only)}
|
|
|
ac4b94 |
@opindex Wc++-compat
|
|
|
ac4b94 |
@opindex Wno-c++-compat
|
|
|
ac4b94 |
--- gcc/testsuite/gcc.target/powerpc/pr92090-2.c (nonexistent)
|
|
|
ac4b94 |
+++ gcc/testsuite/gcc.target/powerpc/pr92090-2.c (revision 278614)
|
|
|
ac4b94 |
@@ -0,0 +1,45 @@
|
|
|
ac4b94 |
+/* { dg-do compile } */
|
|
|
ac4b94 |
+/* { dg-options "-mdejagnu-cpu=power8 -Os -w" } */
|
|
|
ac4b94 |
+/* { dg-additional-options "-mbig" { target powerpc64le-*-* } } */
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+/* Verify that we don't ICE. */
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+int a;
|
|
|
ac4b94 |
+static _Atomic long double b, c, d, m;
|
|
|
ac4b94 |
+double n;
|
|
|
ac4b94 |
+extern int foo (void);
|
|
|
ac4b94 |
+extern void bar (int, int, int, int);
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+void
|
|
|
ac4b94 |
+bug (void)
|
|
|
ac4b94 |
+{
|
|
|
ac4b94 |
+ b = 1.79769313486231580793728971405301199e308L;
|
|
|
ac4b94 |
+ for (int i = 0; i < 10000; i++)
|
|
|
ac4b94 |
+ if (__builtin_isinf (n))
|
|
|
ac4b94 |
+ b;
|
|
|
ac4b94 |
+ c = 1;
|
|
|
ac4b94 |
+ int e, f, g, h;
|
|
|
ac4b94 |
+ while (a)
|
|
|
ac4b94 |
+ ;
|
|
|
ac4b94 |
+ for (int i; i; i++)
|
|
|
ac4b94 |
+ {
|
|
|
ac4b94 |
+ double j = c /= foo ();
|
|
|
ac4b94 |
+ if (__builtin_isinf (j))
|
|
|
ac4b94 |
+ {
|
|
|
ac4b94 |
+ if (foo == 1 << 31)
|
|
|
ac4b94 |
+ e++;
|
|
|
ac4b94 |
+ f++;
|
|
|
ac4b94 |
+ c = 0;
|
|
|
ac4b94 |
+ }
|
|
|
ac4b94 |
+ else
|
|
|
ac4b94 |
+ {
|
|
|
ac4b94 |
+ if (foo == 1 << 30)
|
|
|
ac4b94 |
+ g++;
|
|
|
ac4b94 |
+ h++;
|
|
|
ac4b94 |
+ c = 1;
|
|
|
ac4b94 |
+ }
|
|
|
ac4b94 |
+ }
|
|
|
ac4b94 |
+ bar (e, f, g, h);
|
|
|
ac4b94 |
+ d = 1.79769313486231580793728971405301199e308L;
|
|
|
ac4b94 |
+ m = 1;
|
|
|
ac4b94 |
+}
|
|
|
ac4b94 |
--- gcc/testsuite/gcc.target/powerpc/pr92090.c (nonexistent)
|
|
|
ac4b94 |
+++ gcc/testsuite/gcc.target/powerpc/pr92090.c (revision 278614)
|
|
|
ac4b94 |
@@ -0,0 +1,43 @@
|
|
|
ac4b94 |
+/* { dg-do compile } */
|
|
|
ac4b94 |
+/* { dg-options "-mdejagnu-cpu=power8 -Os" } */
|
|
|
ac4b94 |
+/* { dg-additional-options "-mbig" { target powerpc64le-*-* } } */
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+/* Verify that we don't ICE. */
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+_Atomic int a;
|
|
|
ac4b94 |
+_Atomic long double b, c;
|
|
|
ac4b94 |
+int j;
|
|
|
ac4b94 |
+void foo (void);
|
|
|
ac4b94 |
+void bar (int, int, int, int);
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+void
|
|
|
ac4b94 |
+bug (void)
|
|
|
ac4b94 |
+{
|
|
|
ac4b94 |
+ b = 1;
|
|
|
ac4b94 |
+ int d, e, f, g;
|
|
|
ac4b94 |
+ while (a)
|
|
|
ac4b94 |
+ ;
|
|
|
ac4b94 |
+ for (int h = 0; h < 10000; h++)
|
|
|
ac4b94 |
+ {
|
|
|
ac4b94 |
+ double i = b /= 3;
|
|
|
ac4b94 |
+ foo ();
|
|
|
ac4b94 |
+ if (i)
|
|
|
ac4b94 |
+ {
|
|
|
ac4b94 |
+ if (i == 1)
|
|
|
ac4b94 |
+ d++;
|
|
|
ac4b94 |
+ e++;
|
|
|
ac4b94 |
+ b = 0;
|
|
|
ac4b94 |
+ }
|
|
|
ac4b94 |
+ else
|
|
|
ac4b94 |
+ {
|
|
|
ac4b94 |
+ if (i == 2)
|
|
|
ac4b94 |
+ f++;
|
|
|
ac4b94 |
+ g++;
|
|
|
ac4b94 |
+ b = 1;
|
|
|
ac4b94 |
+ }
|
|
|
ac4b94 |
+ }
|
|
|
ac4b94 |
+ bar (d, e, f, g);
|
|
|
ac4b94 |
+ c = 1;
|
|
|
ac4b94 |
+ for (int h; h; h++)
|
|
|
ac4b94 |
+ j = 0;
|
|
|
ac4b94 |
+}
|
|
|
ac4b94 |
--- gcc/testsuite/gcc.dg/unroll-and-jam.c (revision 278492)
|
|
|
ac4b94 |
+++ gcc/testsuite/gcc.dg/unroll-and-jam.c (revision 278614)
|
|
|
ac4b94 |
@@ -1,5 +1,5 @@
|
|
|
ac4b94 |
/* { dg-do run } */
|
|
|
ac4b94 |
-/* { dg-options "-O3 -floop-unroll-and-jam --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */
|
|
|
ac4b94 |
+/* { dg-options "-O3 -floop-unroll-and-jam -fno-tree-loop-im --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */
|
|
|
ac4b94 |
/* { dg-require-effective-target int32plus } */
|
|
|
ac4b94 |
|
|
|
ac4b94 |
#include <stdio.h>
|
|
|
ac4b94 |
@@ -34,7 +34,7 @@
|
|
|
ac4b94 |
#define TEST(name, body, test) \
|
|
|
ac4b94 |
static void __attribute__((noinline,noclone)) name (unsigned long n, unsigned long m) \
|
|
|
ac4b94 |
{ \
|
|
|
ac4b94 |
- unsigned long i, j; \
|
|
|
ac4b94 |
+ unsigned i, j; \
|
|
|
ac4b94 |
for (i = 1; i < m; i++) { \
|
|
|
ac4b94 |
for (j = 1; j < n; j++) { \
|
|
|
ac4b94 |
body; \
|
|
|
ac4b94 |
@@ -58,9 +58,14 @@
|
|
|
ac4b94 |
TEST(foo4, aa[i][j] = aa[i-1][j+1] * aa[i-1][j+1] / 2, checkaa()) //notok, -1,1
|
|
|
ac4b94 |
TEST(foo5, aa[i][j] = aa[i+1][j+1] * aa[i+1][j+1] / 2, checkaa()) //ok, 1,1
|
|
|
ac4b94 |
TEST(foo6, aa[i][j] = aa[i+1][j] * aa[i+1][j] / 2, checkaa()) //ok, -1,0
|
|
|
ac4b94 |
+TEST(foo61, aa[i][0] = aa[i+1][0] * aa[i+1][0] / 2, checkaa()) //notok, -1,0
|
|
|
ac4b94 |
+TEST(foo62, aa[i][j/2] = aa[i+1][j/2] * aa[i+1][j/2] / 2, checkaa()) //notok, not affine
|
|
|
ac4b94 |
+TEST(foo63, aa[i][j%2] = aa[i+1][j%2] * aa[i+1][j%2] / 2, checkaa()) //notok, not affine
|
|
|
ac4b94 |
TEST(foo7, aa[i+1][j] = aa[i][j] * aa[i][j] / 2, checkaa()) //ok, 1,0
|
|
|
ac4b94 |
TEST(foo9, b[j] = 3*b[j+1] + 1, checkb()) //notok, 0,-1
|
|
|
ac4b94 |
TEST(foo10, b[j] = 3*b[j] + 1, checkb()) //ok, 0,0
|
|
|
ac4b94 |
+extern int f;
|
|
|
ac4b94 |
+TEST(foo11, f = b[i-1] = 1 + 3* b[i+1], checkb()) //ok, 2,0 but must reduce unroll factor to 2, (it would be incorrect with unroll-by-3, which the profitability would suggest)
|
|
|
ac4b94 |
|
|
|
ac4b94 |
/* foo8 should work as well, but currently doesn't because the distance
|
|
|
ac4b94 |
vectors we compute are too pessimistic. We compute
|
|
|
ac4b94 |
@@ -68,6 +73,7 @@
|
|
|
ac4b94 |
and the last one causes us to lose. */
|
|
|
ac4b94 |
TEST(foo8, b[j+1] = 3*b[j] + 1, checkb()) //ok, 0,1
|
|
|
ac4b94 |
|
|
|
ac4b94 |
+int f;
|
|
|
ac4b94 |
unsigned int a[1024];
|
|
|
ac4b94 |
unsigned int b[1024];
|
|
|
ac4b94 |
unsigned int aa[16][1024];
|
|
|
ac4b94 |
@@ -88,10 +94,12 @@
|
|
|
ac4b94 |
printf(" %s\n", #name); \
|
|
|
ac4b94 |
init();for(i=0;i<4;i++)name##noopt(32,8); checka = checksum; \
|
|
|
ac4b94 |
init();for(i=0;i<4;i++)name(32,8); \
|
|
|
ac4b94 |
+ if (checka != checksum) fail = 1; \
|
|
|
ac4b94 |
printf("%sok %s\n", checka != checksum ? "NOT " : "", #name);
|
|
|
ac4b94 |
|
|
|
ac4b94 |
int main()
|
|
|
ac4b94 |
{
|
|
|
ac4b94 |
+ int fail = 0;
|
|
|
ac4b94 |
int i;
|
|
|
ac4b94 |
unsigned checka;
|
|
|
ac4b94 |
RUN(foo1);
|
|
|
ac4b94 |
@@ -100,12 +108,18 @@
|
|
|
ac4b94 |
RUN(foo4);
|
|
|
ac4b94 |
RUN(foo5);
|
|
|
ac4b94 |
RUN(foo6);
|
|
|
ac4b94 |
+ RUN(foo61);
|
|
|
ac4b94 |
+ RUN(foo62);
|
|
|
ac4b94 |
+ RUN(foo63);
|
|
|
ac4b94 |
RUN(foo7);
|
|
|
ac4b94 |
RUN(foo8);
|
|
|
ac4b94 |
RUN(foo9);
|
|
|
ac4b94 |
RUN(foo10);
|
|
|
ac4b94 |
- return 0;
|
|
|
ac4b94 |
+ RUN(foo11);
|
|
|
ac4b94 |
+ if (fail)
|
|
|
ac4b94 |
+ __builtin_abort();
|
|
|
ac4b94 |
+ return fail;
|
|
|
ac4b94 |
}
|
|
|
ac4b94 |
|
|
|
ac4b94 |
-/* Five loops should be unroll-jammed (actually six, but see above). */
|
|
|
ac4b94 |
-/* { dg-final { scan-tree-dump-times "applying unroll and jam" 5 "unrolljam" } } */
|
|
|
ac4b94 |
+/* Six loops should be unroll-jammed (actually seven, but see above). */
|
|
|
ac4b94 |
+/* { dg-final { scan-tree-dump-times "applying unroll and jam" 6 "unrolljam" } } */
|
|
|
ac4b94 |
--- gcc/testsuite/g++.dg/torture/pr91355.C (nonexistent)
|
|
|
ac4b94 |
+++ gcc/testsuite/g++.dg/torture/pr91355.C (revision 278614)
|
|
|
ac4b94 |
@@ -0,0 +1,28 @@
|
|
|
ac4b94 |
+// PR tree-optimization/91355
|
|
|
ac4b94 |
+// { dg-do run }
|
|
|
ac4b94 |
+// { dg-options "-std=c++14" }
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+unsigned int d = 0;
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+struct S {
|
|
|
ac4b94 |
+ S () { d++; }
|
|
|
ac4b94 |
+ S (const S &) { d++; }
|
|
|
ac4b94 |
+ ~S () { d--; }
|
|
|
ac4b94 |
+};
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+void
|
|
|
ac4b94 |
+foo (int i) throw (int) // { dg-warning "dynamic exception specifications are deprecated" }
|
|
|
ac4b94 |
+{
|
|
|
ac4b94 |
+ if (i == 0)
|
|
|
ac4b94 |
+ throw 3;
|
|
|
ac4b94 |
+ S d;
|
|
|
ac4b94 |
+ throw 3;
|
|
|
ac4b94 |
+}
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+int
|
|
|
ac4b94 |
+main ()
|
|
|
ac4b94 |
+{
|
|
|
ac4b94 |
+ try { foo (1); } catch (...) {}
|
|
|
ac4b94 |
+ if (d)
|
|
|
ac4b94 |
+ __builtin_abort ();
|
|
|
ac4b94 |
+}
|
|
|
ac4b94 |
--- gcc/tree-ssa-sink.c (revision 278492)
|
|
|
ac4b94 |
+++ gcc/tree-ssa-sink.c (revision 278614)
|
|
|
ac4b94 |
@@ -229,7 +229,7 @@
|
|
|
ac4b94 |
/* If result of comparsion is unknown, preffer EARLY_BB.
|
|
|
ac4b94 |
Thus use !(...>=..) rather than (...<...) */
|
|
|
ac4b94 |
&& !(best_bb->count.apply_scale (100, 1)
|
|
|
ac4b94 |
- > (early_bb->count.apply_scale (threshold, 1))))
|
|
|
ac4b94 |
+ >= early_bb->count.apply_scale (threshold, 1)))
|
|
|
ac4b94 |
return best_bb;
|
|
|
ac4b94 |
|
|
|
ac4b94 |
/* No better block found, so return EARLY_BB, which happens to be the
|
|
|
ac4b94 |
--- gcc/tree-vect-stmts.c (revision 278492)
|
|
|
ac4b94 |
+++ gcc/tree-vect-stmts.c (revision 278614)
|
|
|
ac4b94 |
@@ -8276,9 +8276,7 @@
|
|
|
ac4b94 |
|| alignment_support_scheme == dr_explicit_realign)
|
|
|
ac4b94 |
&& !compute_in_loop)
|
|
|
ac4b94 |
{
|
|
|
ac4b94 |
- msq = vect_setup_realignment (first_stmt_info_for_drptr
|
|
|
ac4b94 |
- ? first_stmt_info_for_drptr
|
|
|
ac4b94 |
- : first_stmt_info, gsi, &realignment_token,
|
|
|
ac4b94 |
+ msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
|
|
|
ac4b94 |
alignment_support_scheme, NULL_TREE,
|
|
|
ac4b94 |
&at_loop);
|
|
|
ac4b94 |
if (alignment_support_scheme == dr_explicit_realign_optimized)
|
|
|
ac4b94 |
--- gcc/gimple-loop-jam.c (revision 278492)
|
|
|
ac4b94 |
+++ gcc/gimple-loop-jam.c (revision 278614)
|
|
|
ac4b94 |
@@ -360,9 +360,26 @@
|
|
|
ac4b94 |
rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
|
|
|
ac4b94 |
}
|
|
|
ac4b94 |
|
|
|
ac4b94 |
+/* Return true if any of the access functions for dataref A
|
|
|
ac4b94 |
+ isn't invariant with respect to loop LOOP_NEST. */
|
|
|
ac4b94 |
+static bool
|
|
|
ac4b94 |
+any_access_function_variant_p (const struct data_reference *a,
|
|
|
ac4b94 |
+ const class loop *loop_nest)
|
|
|
ac4b94 |
+{
|
|
|
ac4b94 |
+ unsigned int i;
|
|
|
ac4b94 |
+ vec<tree> fns = DR_ACCESS_FNS (a);
|
|
|
ac4b94 |
+ tree t;
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+ FOR_EACH_VEC_ELT (fns, i, t)
|
|
|
ac4b94 |
+ if (!evolution_function_is_invariant_p (t, loop_nest->num))
|
|
|
ac4b94 |
+ return true;
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
+ return false;
|
|
|
ac4b94 |
+}
|
|
|
ac4b94 |
+
|
|
|
ac4b94 |
/* Returns true if the distance in DDR can be determined and adjusts
|
|
|
ac4b94 |
the unroll factor in *UNROLL to make unrolling valid for that distance.
|
|
|
ac4b94 |
- Otherwise return false.
|
|
|
ac4b94 |
+ Otherwise return false. DDR is with respect to the outer loop of INNER.
|
|
|
ac4b94 |
|
|
|
ac4b94 |
If this data dep can lead to a removed memory reference, increment
|
|
|
ac4b94 |
*REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor
|
|
|
ac4b94 |
@@ -369,7 +386,7 @@
|
|
|
ac4b94 |
for this to happen. */
|
|
|
ac4b94 |
|
|
|
ac4b94 |
static bool
|
|
|
ac4b94 |
-adjust_unroll_factor (struct data_dependence_relation *ddr,
|
|
|
ac4b94 |
+adjust_unroll_factor (class loop *inner, struct data_dependence_relation *ddr,
|
|
|
ac4b94 |
unsigned *unroll, unsigned *profit_unroll,
|
|
|
ac4b94 |
unsigned *removed)
|
|
|
ac4b94 |
{
|
|
|
ac4b94 |
@@ -392,9 +409,59 @@
|
|
|
ac4b94 |
gcc_unreachable ();
|
|
|
ac4b94 |
else if ((unsigned)dist >= *unroll)
|
|
|
ac4b94 |
;
|
|
|
ac4b94 |
- else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
|
|
|
ac4b94 |
- || (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
|
|
|
ac4b94 |
- && dist > 0))
|
|
|
ac4b94 |
+ else if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
|
|
|
ac4b94 |
+ {
|
|
|
ac4b94 |
+ /* We have (a,0) with a < N, so this will be transformed into
|
|
|
ac4b94 |
+ (0,0) after unrolling by N. This might potentially be a
|
|
|
ac4b94 |
+ problem, if it's not a read-read dependency. */
|
|
|
ac4b94 |
+ if (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))
|
|
|
ac4b94 |
+ ;
|
|
|
ac4b94 |
+ else
|
|
|
ac4b94 |
+ {
|
|
|
ac4b94 |
+ /* So, at least one is a write, and we might reduce the
|
|
|
ac4b94 |
+ distance vector to (0,0). This is still no problem
|
|
|
ac4b94 |
+ if both data-refs are affine with respect to the inner
|
|
|
ac4b94 |
+ loops. But if one of them is invariant with respect
|
|
|
ac4b94 |
+ to an inner loop our reordering implicit in loop fusion
|
|
|
ac4b94 |
+ corrupts the program, as our data dependences don't
|
|
|
ac4b94 |
+ capture this. E.g. for:
|
|
|
ac4b94 |
+ for (0 <= i < n)
|
|
|
ac4b94 |
+ for (0 <= j < m)
|
|
|
ac4b94 |
+ a[i][0] = a[i+1][0] + 2; // (1)
|
|
|
ac4b94 |
+ b[i][j] = b[i+1][j] + 2; // (2)
|
|
|
ac4b94 |
+ the distance vector for both statements is (-1,0),
|
|
|
ac4b94 |
+ but exchanging the order for (2) is okay, while
|
|
|
ac4b94 |
+ for (1) it is not. To see this, write out the original
|
|
|
ac4b94 |
+ accesses (assume m is 2):
|
|
|
ac4b94 |
+ a i j original
|
|
|
ac4b94 |
+ 0 0 0 r a[1][0] b[1][0]
|
|
|
ac4b94 |
+ 1 0 0 w a[0][0] b[0][0]
|
|
|
ac4b94 |
+ 2 0 1 r a[1][0] b[1][1]
|
|
|
ac4b94 |
+ 3 0 1 w a[0][0] b[0][1]
|
|
|
ac4b94 |
+ 4 1 0 r a[2][0] b[2][0]
|
|
|
ac4b94 |
+ 5 1 0 w a[1][0] b[1][0]
|
|
|
ac4b94 |
+ after unroll-by-2 and fusion the accesses are done in
|
|
|
ac4b94 |
+ this order (from column a): 0,1, 4,5, 2,3, i.e. this:
|
|
|
ac4b94 |
+ a i j transformed
|
|
|
ac4b94 |
+ 0 0 0 r a[1][0] b[1][0]
|
|
|
ac4b94 |
+ 1 0 0 w a[0][0] b[0][0]
|
|
|
ac4b94 |
+ 4 1 0 r a[2][0] b[2][0]
|
|
|
ac4b94 |
+ 5 1 0 w a[1][0] b[1][0]
|
|
|
ac4b94 |
+ 2 0 1 r a[1][0] b[1][1]
|
|
|
ac4b94 |
+ 3 0 1 w a[0][0] b[0][1]
|
|
|
ac4b94 |
+ Note how access 2 accesses the same element as access 5
|
|
|
ac4b94 |
+ for array 'a' but not for array 'b'. */
|
|
|
ac4b94 |
+ if (any_access_function_variant_p (DDR_A (ddr), inner)
|
|
|
ac4b94 |
+ && any_access_function_variant_p (DDR_B (ddr), inner))
|
|
|
ac4b94 |
+ ;
|
|
|
ac4b94 |
+ else
|
|
|
ac4b94 |
+ /* And if any dataref of this pair is invariant with
|
|
|
ac4b94 |
+ respect to the inner loop, we have no chance than
|
|
|
ac4b94 |
+ to reduce the unroll factor. */
|
|
|
ac4b94 |
+ *unroll = dist;
|
|
|
ac4b94 |
+ }
|
|
|
ac4b94 |
+ }
|
|
|
ac4b94 |
+ else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
|
|
|
ac4b94 |
;
|
|
|
ac4b94 |
else
|
|
|
ac4b94 |
*unroll = dist;
|
|
|
ac4b94 |
@@ -486,7 +553,7 @@
|
|
|
ac4b94 |
/* Now check the distance vector, for determining a sensible
|
|
|
ac4b94 |
outer unroll factor, and for validity of merging the inner
|
|
|
ac4b94 |
loop copies. */
|
|
|
ac4b94 |
- if (!adjust_unroll_factor (ddr, &unroll_factor, &profit_unroll,
|
|
|
ac4b94 |
+ if (!adjust_unroll_factor (loop, ddr, &unroll_factor, &profit_unroll,
|
|
|
ac4b94 |
&removed))
|
|
|
ac4b94 |
{
|
|
|
ac4b94 |
/* Couldn't get the distance vector. For two reads that's
|
|
|
ac4b94 |
@@ -506,7 +573,7 @@
|
|
|
ac4b94 |
to ignore all profitability concerns and apply the transformation
|
|
|
ac4b94 |
always. */
|
|
|
ac4b94 |
if (!PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
|
|
|
ac4b94 |
- profit_unroll = 2;
|
|
|
ac4b94 |
+ profit_unroll = MAX(2, profit_unroll);
|
|
|
ac4b94 |
else if (removed * 100 / datarefs.length ()
|
|
|
ac4b94 |
< (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
|
|
|
ac4b94 |
profit_unroll = 1;
|
|
|
ac4b94 |
--- gcc/config/rs6000/predicates.md (revision 278492)
|
|
|
ac4b94 |
+++ gcc/config/rs6000/predicates.md (revision 278614)
|
|
|
ac4b94 |
@@ -1053,8 +1053,7 @@
|
|
|
ac4b94 |
return 1;
|
|
|
ac4b94 |
|
|
|
ac4b94 |
/* Allow any integer constant. */
|
|
|
ac4b94 |
- if (GET_MODE_CLASS (mode) == MODE_INT
|
|
|
ac4b94 |
- && CONST_SCALAR_INT_P (op))
|
|
|
ac4b94 |
+ if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op))
|
|
|
ac4b94 |
return 1;
|
|
|
ac4b94 |
|
|
|
ac4b94 |
/* Allow easy vector constants. */
|