|
|
dc03fd |
commit 6001db79c477b03eacc7e7049560921fb54b7845
|
|
|
dc03fd |
Author: Richard Sandiford <richard.sandiford@arm.com>
|
|
|
dc03fd |
Date: Mon Sep 7 20:15:36 2020 +0100
|
|
|
dc03fd |
|
|
|
dc03fd |
lra: Avoid cycling on certain subreg reloads [PR96796]
|
|
|
dc03fd |
|
|
|
dc03fd |
This PR is about LRA cycling for a reload of the form:
|
|
|
dc03fd |
|
|
|
dc03fd |
----------------------------------------------------------------------------
|
|
|
dc03fd |
Changing pseudo 196 in operand 1 of insn 103 on equiv [r105:DI*0x8+r140:DI]
|
|
|
dc03fd |
Creating newreg=287, assigning class ALL_REGS to slow/invalid mem r287
|
|
|
dc03fd |
Creating newreg=288, assigning class ALL_REGS to slow/invalid mem r288
|
|
|
dc03fd |
103: r203:SI=r288:SI<<0x1+r196:DI#0
|
|
|
dc03fd |
REG_DEAD r196:DI
|
|
|
dc03fd |
Inserting slow/invalid mem reload before:
|
|
|
dc03fd |
316: r287:DI=[r105:DI*0x8+r140:DI]
|
|
|
dc03fd |
317: r288:SI=r287:DI#0
|
|
|
dc03fd |
----------------------------------------------------------------------------
|
|
|
dc03fd |
|
|
|
dc03fd |
The problem is with r287. We rightly give it a broad starting class of
|
|
|
dc03fd |
POINTER_AND_FP_REGS (reduced from ALL_REGS by preferred_reload_class).
|
|
|
dc03fd |
However, we never make forward progress towards narrowing it down to
|
|
|
dc03fd |
a specific choice of class (POINTER_REGS or FP_REGS).
|
|
|
dc03fd |
|
|
|
dc03fd |
I think in practice we rely on two things to narrow a reload pseudo's
|
|
|
dc03fd |
class down to a specific choice:
|
|
|
dc03fd |
|
|
|
dc03fd |
(1) a restricted class is specified when the pseudo is created
|
|
|
dc03fd |
|
|
|
dc03fd |
This happens for input address reloads, where the class is taken
|
|
|
dc03fd |
from the target's chosen base register class. It also happens
|
|
|
dc03fd |
for simple REG reloads, where the class is taken from the chosen
|
|
|
dc03fd |
alternative's constraints.
|
|
|
dc03fd |
|
|
|
dc03fd |
(2) uses of the reload pseudo as a direct input operand
|
|
|
dc03fd |
|
|
|
dc03fd |
In this case get_reload_reg tries to reuse the existing register
|
|
|
dc03fd |
and narrow its class, instead of creating a new reload pseudo.
|
|
|
dc03fd |
|
|
|
dc03fd |
However, neither occurs here. As described above, r287 rightly
|
|
|
dc03fd |
starts out with a wide choice of class, ultimately derived from
|
|
|
dc03fd |
ALL_REGS, so we don't get (1). And as the comments in the PR
|
|
|
dc03fd |
explain, r287 is never used as an input reload, only the subreg is,
|
|
|
dc03fd |
so we don't get (2):
|
|
|
dc03fd |
|
|
|
dc03fd |
----------------------------------------------------------------------------
|
|
|
dc03fd |
Choosing alt 13 in insn 317: (0) r (1) w {*movsi_aarch64}
|
|
|
dc03fd |
Creating newreg=291, assigning class FP_REGS to r291
|
|
|
dc03fd |
317: r288:SI=r291:SI
|
|
|
dc03fd |
Inserting insn reload before:
|
|
|
dc03fd |
320: r291:SI=r287:DI#0
|
|
|
dc03fd |
----------------------------------------------------------------------------
|
|
|
dc03fd |
|
|
|
dc03fd |
IMO, in this case we should rely on the reload of r316 to narrow
|
|
|
dc03fd |
down the class of r278. Currently we do:
|
|
|
dc03fd |
|
|
|
dc03fd |
----------------------------------------------------------------------------
|
|
|
dc03fd |
Choosing alt 7 in insn 316: (0) r (1) m {*movdi_aarch64}
|
|
|
dc03fd |
Creating newreg=289 from oldreg=287, assigning class GENERAL_REGS to r289
|
|
|
dc03fd |
316: r289:DI=[r105:DI*0x8+r140:DI]
|
|
|
dc03fd |
Inserting insn reload after:
|
|
|
dc03fd |
318: r287:DI=r289:DI
|
|
|
dc03fd |
---------------------------------------------------
|
|
|
dc03fd |
|
|
|
dc03fd |
i.e. we create a new pseudo register r289 and give *that* pseudo
|
|
|
dc03fd |
GENERAL_REGS instead. This is because get_reload_reg only narrows
|
|
|
dc03fd |
down the existing class for OP_IN and OP_INOUT, not OP_OUT.
|
|
|
dc03fd |
|
|
|
dc03fd |
But if we have a reload pseudo in a reload instruction and have chosen
|
|
|
dc03fd |
a specific class for the reload pseudo, I think we should simply install
|
|
|
dc03fd |
it for OP_OUT reloads too, if the class is a subset of the existing class.
|
|
|
dc03fd |
We will need to pick such a register whatever happens (for r289 in the
|
|
|
dc03fd |
example above). And as explained in the PR, doing this actually avoids
|
|
|
dc03fd |
an unnecessary move via the FP registers too.
|
|
|
dc03fd |
|
|
|
dc03fd |
The patch is quite aggressive in that it does this for all reload
|
|
|
dc03fd |
pseudos in all reload instructions. I wondered about reusing the
|
|
|
dc03fd |
condition for a reload move in in_class_p:
|
|
|
dc03fd |
|
|
|
dc03fd |
INSN_UID (curr_insn) >= new_insn_uid_start
|
|
|
dc03fd |
&& curr_insn_set != NULL
|
|
|
dc03fd |
&& ((OBJECT_P (SET_SRC (curr_insn_set))
|
|
|
dc03fd |
&& ! CONSTANT_P (SET_SRC (curr_insn_set)))
|
|
|
dc03fd |
|| (GET_CODE (SET_SRC (curr_insn_set)) == SUBREG
|
|
|
dc03fd |
&& OBJECT_P (SUBREG_REG (SET_SRC (curr_insn_set)))
|
|
|
dc03fd |
&& ! CONSTANT_P (SUBREG_REG (SET_SRC (curr_insn_set)))))))
|
|
|
dc03fd |
|
|
|
dc03fd |
but I can't really justify that on first principles. I think we
|
|
|
dc03fd |
should apply the rule consistently until we have a specific reason
|
|
|
dc03fd |
for doing otherwise.
|
|
|
dc03fd |
|
|
|
dc03fd |
gcc/
|
|
|
dc03fd |
PR rtl-optimization/96796
|
|
|
dc03fd |
* lra-constraints.c (in_class_p): Add a default-false
|
|
|
dc03fd |
allow_all_reload_class_changes_p parameter. Do not treat
|
|
|
dc03fd |
reload moves specially when the parameter is true.
|
|
|
dc03fd |
(get_reload_reg): Try to narrow the class of an existing OP_OUT
|
|
|
dc03fd |
reload if we're reloading a reload pseudo in a reload instruction.
|
|
|
dc03fd |
|
|
|
dc03fd |
gcc/testsuite/
|
|
|
dc03fd |
PR rtl-optimization/96796
|
|
|
dc03fd |
* gcc.c-torture/compile/pr96796.c: New test.
|
|
|
dc03fd |
|
|
|
dc03fd |
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
|
|
|
dc03fd |
index 580da9c3ed6..161b721efb1 100644
|
|
|
dc03fd |
--- a/gcc/lra-constraints.c
|
|
|
dc03fd |
+++ b/gcc/lra-constraints.c
|
|
|
dc03fd |
@@ -236,12 +236,17 @@ get_reg_class (int regno)
|
|
|
dc03fd |
CL. Use elimination first if REG is a hard register. If REG is a
|
|
|
dc03fd |
reload pseudo created by this constraints pass, assume that it will
|
|
|
dc03fd |
be allocated a hard register from its allocno class, but allow that
|
|
|
dc03fd |
- class to be narrowed to CL if it is currently a superset of CL.
|
|
|
dc03fd |
+ class to be narrowed to CL if it is currently a superset of CL and
|
|
|
dc03fd |
+ if either:
|
|
|
dc03fd |
+
|
|
|
dc03fd |
+ - ALLOW_ALL_RELOAD_CLASS_CHANGES_P is true or
|
|
|
dc03fd |
+ - the instruction we're processing is not a reload move.
|
|
|
dc03fd |
|
|
|
dc03fd |
If NEW_CLASS is nonnull, set *NEW_CLASS to the new allocno class of
|
|
|
dc03fd |
REGNO (reg), or NO_REGS if no change in its class was needed. */
|
|
|
dc03fd |
static bool
|
|
|
dc03fd |
-in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class)
|
|
|
dc03fd |
+in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class,
|
|
|
dc03fd |
+ bool allow_all_reload_class_changes_p = false)
|
|
|
dc03fd |
{
|
|
|
dc03fd |
enum reg_class rclass, common_class;
|
|
|
dc03fd |
machine_mode reg_mode;
|
|
|
dc03fd |
@@ -266,7 +271,8 @@ in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class)
|
|
|
dc03fd |
typically moves that have many alternatives, and restricting
|
|
|
dc03fd |
reload pseudos for one alternative may lead to situations
|
|
|
dc03fd |
where other reload pseudos are no longer allocatable. */
|
|
|
dc03fd |
- || (INSN_UID (curr_insn) >= new_insn_uid_start
|
|
|
dc03fd |
+ || (!allow_all_reload_class_changes_p
|
|
|
dc03fd |
+ && INSN_UID (curr_insn) >= new_insn_uid_start
|
|
|
dc03fd |
&& curr_insn_set != NULL
|
|
|
dc03fd |
&& ((OBJECT_P (SET_SRC (curr_insn_set))
|
|
|
dc03fd |
&& ! CONSTANT_P (SET_SRC (curr_insn_set)))
|
|
|
dc03fd |
@@ -551,13 +557,12 @@ init_curr_insn_input_reloads (void)
|
|
|
dc03fd |
curr_insn_input_reloads_num = 0;
|
|
|
dc03fd |
}
|
|
|
dc03fd |
|
|
|
dc03fd |
-/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse already
|
|
|
dc03fd |
- created input reload pseudo (only if TYPE is not OP_OUT). Don't
|
|
|
dc03fd |
- reuse pseudo if IN_SUBREG_P is true and the reused pseudo should be
|
|
|
dc03fd |
- wrapped up in SUBREG. The result pseudo is returned through
|
|
|
dc03fd |
- RESULT_REG. Return TRUE if we created a new pseudo, FALSE if we
|
|
|
dc03fd |
- reused the already created input reload pseudo. Use TITLE to
|
|
|
dc03fd |
- describe new registers for debug purposes. */
|
|
|
dc03fd |
+/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse an existing
|
|
|
dc03fd |
+ reload pseudo. Don't reuse an existing reload pseudo if IN_SUBREG_P
|
|
|
dc03fd |
+ is true and the reused pseudo should be wrapped up in a SUBREG.
|
|
|
dc03fd |
+ The result pseudo is returned through RESULT_REG. Return TRUE if we
|
|
|
dc03fd |
+ created a new pseudo, FALSE if we reused an existing reload pseudo.
|
|
|
dc03fd |
+ Use TITLE to describe new registers for debug purposes. */
|
|
|
dc03fd |
static bool
|
|
|
dc03fd |
get_reload_reg (enum op_type type, machine_mode mode, rtx original,
|
|
|
dc03fd |
enum reg_class rclass, bool in_subreg_p,
|
|
|
dc03fd |
@@ -616,6 +621,35 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original,
|
|
|
dc03fd |
|
|
|
dc03fd |
if (type == OP_OUT)
|
|
|
dc03fd |
{
|
|
|
dc03fd |
+ /* Output reload registers tend to start out with a conservative
|
|
|
dc03fd |
+ choice of register class. Usually this is ALL_REGS, although
|
|
|
dc03fd |
+ a target might narrow it (for performance reasons) through
|
|
|
dc03fd |
+ targetm.preferred_reload_class. It's therefore quite common
|
|
|
dc03fd |
+ for a reload instruction to require a more restrictive class
|
|
|
dc03fd |
+ than the class that was originally assigned to the reload register.
|
|
|
dc03fd |
+
|
|
|
dc03fd |
+ In these situations, it's more efficient to refine the choice
|
|
|
dc03fd |
+ of register class rather than create a second reload register.
|
|
|
dc03fd |
+ This also helps to avoid cycling for registers that are only
|
|
|
dc03fd |
+ used by reload instructions. */
|
|
|
dc03fd |
+ if (REG_P (original)
|
|
|
dc03fd |
+ && (int) REGNO (original) >= new_regno_start
|
|
|
dc03fd |
+ && INSN_UID (curr_insn) >= new_insn_uid_start
|
|
|
dc03fd |
+ && in_class_p (original, rclass, &new_class, true))
|
|
|
dc03fd |
+ {
|
|
|
dc03fd |
+ unsigned int regno = REGNO (original);
|
|
|
dc03fd |
+ if (lra_dump_file != NULL)
|
|
|
dc03fd |
+ {
|
|
|
dc03fd |
+ fprintf (lra_dump_file, " Reuse r%d for output ", regno);
|
|
|
dc03fd |
+ dump_value_slim (lra_dump_file, original, 1);
|
|
|
dc03fd |
+ }
|
|
|
dc03fd |
+ if (new_class != lra_get_allocno_class (regno))
|
|
|
dc03fd |
+ lra_change_class (regno, new_class, ", change to", false);
|
|
|
dc03fd |
+ if (lra_dump_file != NULL)
|
|
|
dc03fd |
+ fprintf (lra_dump_file, "\n");
|
|
|
dc03fd |
+ *result_reg = original;
|
|
|
dc03fd |
+ return false;
|
|
|
dc03fd |
+ }
|
|
|
dc03fd |
*result_reg
|
|
|
dc03fd |
= lra_create_new_reg_with_unique_value (mode, original, rclass, title);
|
|
|
dc03fd |
return true;
|
|
|
dc03fd |
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96796.c b/gcc/testsuite/gcc.c-torture/compile/pr96796.c
|
|
|
dc03fd |
new file mode 100644
|
|
|
dc03fd |
index 00000000000..8808e62fe77
|
|
|
dc03fd |
--- /dev/null
|
|
|
dc03fd |
+++ b/gcc/testsuite/gcc.c-torture/compile/pr96796.c
|
|
|
dc03fd |
@@ -0,0 +1,55 @@
|
|
|
dc03fd |
+/* { dg-additional-options "-fcommon" } */
|
|
|
dc03fd |
+
|
|
|
dc03fd |
+struct S0 {
|
|
|
dc03fd |
+ signed f0 : 8;
|
|
|
dc03fd |
+ unsigned f1;
|
|
|
dc03fd |
+ unsigned f4;
|
|
|
dc03fd |
+};
|
|
|
dc03fd |
+struct S1 {
|
|
|
dc03fd |
+ long f3;
|
|
|
dc03fd |
+ char f4;
|
|
|
dc03fd |
+} g_3_4;
|
|
|
dc03fd |
+
|
|
|
dc03fd |
+int g_5, func_1_l_32, func_50___trans_tmp_31;
|
|
|
dc03fd |
+static struct S0 g_144, g_834, g_1255, g_1261;
|
|
|
dc03fd |
+
|
|
|
dc03fd |
+int g_273[120] = {};
|
|
|
dc03fd |
+int *g_555;
|
|
|
dc03fd |
+char **g_979;
|
|
|
dc03fd |
+static int g_1092_0;
|
|
|
dc03fd |
+static int g_1193;
|
|
|
dc03fd |
+int safe_mul_func_int16_t_s_s(int si1, int si2) { return si1 * si2; }
|
|
|
dc03fd |
+static struct S0 *func_50();
|
|
|
dc03fd |
+int func_1() { func_50(g_3_4, g_5, func_1_l_32, 8, 3); }
|
|
|
dc03fd |
+void safe_div_func_int64_t_s_s(int *);
|
|
|
dc03fd |
+void safe_mod_func_uint32_t_u_u(struct S0);
|
|
|
dc03fd |
+struct S0 *func_50(int p_51, struct S0 p_52, struct S1 p_53, int p_54,
|
|
|
dc03fd |
+ int p_55) {
|
|
|
dc03fd |
+ int __trans_tmp_30;
|
|
|
dc03fd |
+ char __trans_tmp_22;
|
|
|
dc03fd |
+ short __trans_tmp_19;
|
|
|
dc03fd |
+ long l_985_1;
|
|
|
dc03fd |
+ long l_1191[8];
|
|
|
dc03fd |
+ safe_div_func_int64_t_s_s(g_273);
|
|
|
dc03fd |
+ __builtin_printf((char*)g_1261.f4);
|
|
|
dc03fd |
+ safe_mod_func_uint32_t_u_u(g_834);
|
|
|
dc03fd |
+ g_144.f0 += 1;
|
|
|
dc03fd |
+ for (;;) {
|
|
|
dc03fd |
+ struct S1 l_1350 = {&l_1350};
|
|
|
dc03fd |
+ for (; p_53.f3; p_53.f3 -= 1)
|
|
|
dc03fd |
+ for (; g_1193 <= 2; g_1193 += 1) {
|
|
|
dc03fd |
+ __trans_tmp_19 = safe_mul_func_int16_t_s_s(l_1191[l_985_1 + p_53.f3],
|
|
|
dc03fd |
+ p_55 % (**g_979 = 10));
|
|
|
dc03fd |
+ __trans_tmp_22 = g_1255.f1 * p_53.f4;
|
|
|
dc03fd |
+ __trans_tmp_30 = __trans_tmp_19 + __trans_tmp_22;
|
|
|
dc03fd |
+ if (__trans_tmp_30)
|
|
|
dc03fd |
+ g_1261.f0 = p_51;
|
|
|
dc03fd |
+ else {
|
|
|
dc03fd |
+ g_1255.f0 = p_53.f3;
|
|
|
dc03fd |
+ int *l_1422 = g_834.f0 = g_144.f4 != (*l_1422)++ > 0 < 0 ^ 51;
|
|
|
dc03fd |
+ g_555 = ~0;
|
|
|
dc03fd |
+ g_1092_0 |= func_50___trans_tmp_31;
|
|
|
dc03fd |
+ }
|
|
|
dc03fd |
+ }
|
|
|
dc03fd |
+ }
|
|
|
dc03fd |
+}
|