diff --git a/SOURCES/gcc8-pr96796.patch b/SOURCES/gcc8-pr96796.patch
new file mode 100644
index 0000000..46d1d2c
--- /dev/null
+++ b/SOURCES/gcc8-pr96796.patch
@@ -0,0 +1,254 @@
+commit 6001db79c477b03eacc7e7049560921fb54b7845
+Author: Richard Sandiford <richard.sandiford@arm.com>
+Date:   Mon Sep 7 20:15:36 2020 +0100
+
+    lra: Avoid cycling on certain subreg reloads [PR96796]
+    
+    This PR is about LRA cycling for a reload of the form:
+    
+    ----------------------------------------------------------------------------
+    Changing pseudo 196 in operand 1 of insn 103 on equiv [r105:DI*0x8+r140:DI]
+          Creating newreg=287, assigning class ALL_REGS to slow/invalid mem r287
+          Creating newreg=288, assigning class ALL_REGS to slow/invalid mem r288
+      103: r203:SI=r288:SI<<0x1+r196:DI#0
+          REG_DEAD r196:DI
+        Inserting slow/invalid mem reload before:
+      316: r287:DI=[r105:DI*0x8+r140:DI]
+      317: r288:SI=r287:DI#0
+    ----------------------------------------------------------------------------
+    
+    The problem is with r287.  We rightly give it a broad starting class of
+    POINTER_AND_FP_REGS (reduced from ALL_REGS by preferred_reload_class).
+    However, we never make forward progress towards narrowing it down to
+    a specific choice of class (POINTER_REGS or FP_REGS).
+    
+    I think in practice we rely on two things to narrow a reload pseudo's
+    class down to a specific choice:
+    
+    (1) a restricted class is specified when the pseudo is created
+    
+        This happens for input address reloads, where the class is taken
+        from the target's chosen base register class.  It also happens
+        for simple REG reloads, where the class is taken from the chosen
+        alternative's constraints.
+    
+    (2) uses of the reload pseudo as a direct input operand
+    
+        In this case get_reload_reg tries to reuse the existing register
+        and narrow its class, instead of creating a new reload pseudo.
+    
+    However, neither occurs here.  As described above, r287 rightly
+    starts out with a wide choice of class, ultimately derived from
+    ALL_REGS, so we don't get (1).  And as the comments in the PR
+    explain, r287 is never used as an input reload, only the subreg is,
+    so we don't get (2):
+    
+    ----------------------------------------------------------------------------
+             Choosing alt 13 in insn 317:  (0) r  (1) w {*movsi_aarch64}
+          Creating newreg=291, assigning class FP_REGS to r291
+      317: r288:SI=r291:SI
+        Inserting insn reload before:
+      320: r291:SI=r287:DI#0
+    ----------------------------------------------------------------------------
+    
+    IMO, in this case we should rely on the reload of r316 to narrow
+    down the class of r278.  Currently we do:
+    
+    ----------------------------------------------------------------------------
+             Choosing alt 7 in insn 316:  (0) r  (1) m {*movdi_aarch64}
+          Creating newreg=289 from oldreg=287, assigning class GENERAL_REGS to r289
+      316: r289:DI=[r105:DI*0x8+r140:DI]
+        Inserting insn reload after:
+      318: r287:DI=r289:DI
+    ---------------------------------------------------
+    
+    i.e. we create a new pseudo register r289 and give *that* pseudo
+    GENERAL_REGS instead.  This is because get_reload_reg only narrows
+    down the existing class for OP_IN and OP_INOUT, not OP_OUT.
+    
+    But if we have a reload pseudo in a reload instruction and have chosen
+    a specific class for the reload pseudo, I think we should simply install
+    it for OP_OUT reloads too, if the class is a subset of the existing class.
+    We will need to pick such a register whatever happens (for r289 in the
+    example above).  And as explained in the PR, doing this actually avoids
+    an unnecessary move via the FP registers too.
+    
+    The patch is quite aggressive in that it does this for all reload
+    pseudos in all reload instructions.  I wondered about reusing the
+    condition for a reload move in in_class_p:
+    
+              INSN_UID (curr_insn) >= new_insn_uid_start
+              && curr_insn_set != NULL
+              && ((OBJECT_P (SET_SRC (curr_insn_set))
+                   && ! CONSTANT_P (SET_SRC (curr_insn_set)))
+                  || (GET_CODE (SET_SRC (curr_insn_set)) == SUBREG
+                      && OBJECT_P (SUBREG_REG (SET_SRC (curr_insn_set)))
+                      && ! CONSTANT_P (SUBREG_REG (SET_SRC (curr_insn_set)))))))
+    
+    but I can't really justify that on first principles.  I think we
+    should apply the rule consistently until we have a specific reason
+    for doing otherwise.
+    
+    gcc/
+            PR rtl-optimization/96796
+            * lra-constraints.c (in_class_p): Add a default-false
+            allow_all_reload_class_changes_p parameter.  Do not treat
+            reload moves specially when the parameter is true.
+            (get_reload_reg): Try to narrow the class of an existing OP_OUT
+            reload if we're reloading a reload pseudo in a reload instruction.
+    
+    gcc/testsuite/
+            PR rtl-optimization/96796
+            * gcc.c-torture/compile/pr96796.c: New test.
+
+diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
+index 580da9c3ed6..161b721efb1 100644
+--- a/gcc/lra-constraints.c
++++ b/gcc/lra-constraints.c
+@@ -236,12 +236,17 @@ get_reg_class (int regno)
+    CL.  Use elimination first if REG is a hard register.  If REG is a
+    reload pseudo created by this constraints pass, assume that it will
+    be allocated a hard register from its allocno class, but allow that
+-   class to be narrowed to CL if it is currently a superset of CL.
++   class to be narrowed to CL if it is currently a superset of CL and
++   if either:
++
++   - ALLOW_ALL_RELOAD_CLASS_CHANGES_P is true or
++   - the instruction we're processing is not a reload move.
+ 
+    If NEW_CLASS is nonnull, set *NEW_CLASS to the new allocno class of
+    REGNO (reg), or NO_REGS if no change in its class was needed.  */
+ static bool
+-in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class)
++in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class,
++	    bool allow_all_reload_class_changes_p = false)
+ {
+   enum reg_class rclass, common_class;
+   machine_mode reg_mode;
+@@ -266,7 +271,8 @@ in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class)
+ 	 typically moves that have many alternatives, and restricting
+ 	 reload pseudos for one alternative may lead to situations
+ 	 where other reload pseudos are no longer allocatable.  */
+-      || (INSN_UID (curr_insn) >= new_insn_uid_start
++      || (!allow_all_reload_class_changes_p
++	  && INSN_UID (curr_insn) >= new_insn_uid_start
+ 	  && curr_insn_set != NULL
+ 	  && ((OBJECT_P (SET_SRC (curr_insn_set))
+ 	       && ! CONSTANT_P (SET_SRC (curr_insn_set)))
+@@ -551,13 +557,12 @@ init_curr_insn_input_reloads (void)
+   curr_insn_input_reloads_num = 0;
+ }
+ 
+-/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse already
+-   created input reload pseudo (only if TYPE is not OP_OUT).  Don't
+-   reuse pseudo if IN_SUBREG_P is true and the reused pseudo should be
+-   wrapped up in SUBREG.  The result pseudo is returned through
+-   RESULT_REG.  Return TRUE if we created a new pseudo, FALSE if we
+-   reused the already created input reload pseudo.  Use TITLE to
+-   describe new registers for debug purposes.  */
++/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse an existing
++   reload pseudo.  Don't reuse an existing reload pseudo if IN_SUBREG_P
++   is true and the reused pseudo should be wrapped up in a SUBREG.
++   The result pseudo is returned through RESULT_REG.  Return TRUE if we
++   created a new pseudo, FALSE if we reused an existing reload pseudo.
++   Use TITLE to describe new registers for debug purposes.  */
+ static bool
+ get_reload_reg (enum op_type type, machine_mode mode, rtx original,
+ 		enum reg_class rclass, bool in_subreg_p,
+@@ -616,6 +621,35 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original,
+ 
+   if (type == OP_OUT)
+     {
++      /* Output reload registers tend to start out with a conservative
++	 choice of register class.  Usually this is ALL_REGS, although
++	 a target might narrow it (for performance reasons) through
++	 targetm.preferred_reload_class.  It's therefore quite common
++	 for a reload instruction to require a more restrictive class
++	 than the class that was originally assigned to the reload register.
++
++	 In these situations, it's more efficient to refine the choice
++	 of register class rather than create a second reload register.
++	 This also helps to avoid cycling for registers that are only
++	 used by reload instructions.  */
++      if (REG_P (original)
++	  && (int) REGNO (original) >= new_regno_start
++	  && INSN_UID (curr_insn) >= new_insn_uid_start
++	  && in_class_p (original, rclass, &new_class, true))
++	{
++	  unsigned int regno = REGNO (original);
++	  if (lra_dump_file != NULL)
++	    {
++	      fprintf (lra_dump_file, "	 Reuse r%d for output ", regno);
++	      dump_value_slim (lra_dump_file, original, 1);
++	    }
++	  if (new_class != lra_get_allocno_class (regno))
++	    lra_change_class (regno, new_class, ", change to", false);
++	  if (lra_dump_file != NULL)
++	    fprintf (lra_dump_file, "\n");
++	  *result_reg = original;
++	  return false;
++	}
+       *result_reg
+ 	= lra_create_new_reg_with_unique_value (mode, original, rclass, title);
+       return true;
+diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96796.c b/gcc/testsuite/gcc.c-torture/compile/pr96796.c
+new file mode 100644
+index 00000000000..8808e62fe77
+--- /dev/null
++++ b/gcc/testsuite/gcc.c-torture/compile/pr96796.c
+@@ -0,0 +1,55 @@
++/* { dg-additional-options "-fcommon" } */
++
++struct S0 {
++  signed f0 : 8;
++  unsigned f1;
++  unsigned f4;
++};
++struct S1 {
++  long f3;
++  char f4;
++} g_3_4;
++
++int g_5, func_1_l_32, func_50___trans_tmp_31;
++static struct S0 g_144, g_834, g_1255, g_1261;
++
++int g_273[120] = {};
++int *g_555;
++char **g_979;
++static int g_1092_0;
++static int g_1193;
++int safe_mul_func_int16_t_s_s(int si1, int si2) { return si1 * si2; }
++static struct S0 *func_50();
++int func_1() { func_50(g_3_4, g_5, func_1_l_32, 8, 3); }
++void safe_div_func_int64_t_s_s(int *);
++void safe_mod_func_uint32_t_u_u(struct S0);
++struct S0 *func_50(int p_51, struct S0 p_52, struct S1 p_53, int p_54,
++                   int p_55) {
++  int __trans_tmp_30;
++  char __trans_tmp_22;
++  short __trans_tmp_19;
++  long l_985_1;
++  long l_1191[8];
++  safe_div_func_int64_t_s_s(g_273);
++  __builtin_printf((char*)g_1261.f4);
++  safe_mod_func_uint32_t_u_u(g_834);
++  g_144.f0 += 1;
++  for (;;) {
++    struct S1 l_1350 = {&l_1350};
++    for (; p_53.f3; p_53.f3 -= 1)
++      for (; g_1193 <= 2; g_1193 += 1) {
++        __trans_tmp_19 = safe_mul_func_int16_t_s_s(l_1191[l_985_1 + p_53.f3],
++                                                   p_55 % (**g_979 = 10));
++        __trans_tmp_22 = g_1255.f1 * p_53.f4;
++        __trans_tmp_30 = __trans_tmp_19 + __trans_tmp_22;
++        if (__trans_tmp_30)
++          g_1261.f0 = p_51;
++        else {
++          g_1255.f0 = p_53.f3;
++          int *l_1422 = g_834.f0 = g_144.f4 != (*l_1422)++ > 0 < 0 ^ 51;
++          g_555 = ~0;
++          g_1092_0 |= func_50___trans_tmp_31;
++        }
++      }
++  }
++}
diff --git a/SPECS/gcc.spec b/SPECS/gcc.spec
index a6bfab2..efba515 100644
--- a/SPECS/gcc.spec
+++ b/SPECS/gcc.spec
@@ -4,7 +4,7 @@
 %global gcc_major 8
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 5
+%global gcc_release 6
 %global nvptx_tools_gitrev c28050f60193b3b95a18866a96f03334e874e78f
 %global nvptx_newlib_gitrev aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24
 %global _unpackaged_files_terminate_build 0
@@ -280,6 +280,7 @@ Patch19: gcc8-rh1960701.patch
 Patch20: gcc8-pr100797.patch
 Patch21: gcc8-rh1981822.patch
 Patch22: gcc8-Wbidi-chars.patch
+Patch23: gcc8-pr96796.patch
 
 Patch30: gcc8-rh1668903-1.patch
 Patch31: gcc8-rh1668903-2.patch
@@ -861,6 +862,7 @@ to NVidia PTX capable devices if available.
 %patch20 -p0 -b .pr100797~
 %patch21 -p0 -b .rh1981822~
 %patch22 -p1 -b .bidi~
+%patch23 -p1 -b .pr96796~
 
 %patch30 -p0 -b .rh1668903-1~
 %patch31 -p0 -b .rh1668903-2~
@@ -3177,6 +3179,9 @@ fi
 %endif
 
 %changelog
+* Fri Dec  3 2021 Marek Polacek <polacek@redhat.com> 8.5.0-6
+- avoid cycling on certain subreg reloads (PR rtl-optimization/96796, #2028798)
+
 * Tue Nov 30 2021 Marek Polacek <polacek@redhat.com> 8.5.0-5
 - when linking against libgcc_s, link libgcc.a too (#2022588)