Blame SOURCES/gcc8-rh1512529-aarch64.patch

ba3c21
--- gcc/config/aarch64/aarch64.c
ba3c21
+++ gcc/config/aarch64/aarch64.c
ba3c21
@@ -3799,7 +3799,14 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
ba3c21
   output_asm_insn ("sub\t%0, %0, %1", xops);
ba3c21
 
ba3c21
   /* Probe at TEST_ADDR.  */
ba3c21
-  output_asm_insn ("str\txzr, [%0]", xops);
ba3c21
+  if (flag_stack_clash_protection)
ba3c21
+    {
ba3c21
+      gcc_assert (xops[0] == stack_pointer_rtx);
ba3c21
+      xops[1] = GEN_INT (PROBE_INTERVAL - 8);
ba3c21
+      output_asm_insn ("str\txzr, [%0, %1]", xops);
ba3c21
+    }
ba3c21
+  else
ba3c21
+    output_asm_insn ("str\txzr, [%0]", xops);
ba3c21
 
ba3c21
   /* Test if TEST_ADDR == LAST_ADDR.  */
ba3c21
   xops[1] = reg2;
ba3c21
@@ -4589,6 +4596,133 @@ aarch64_set_handled_components (sbitmap components)
ba3c21
       cfun->machine->reg_is_wrapped_separately[regno] = true;
ba3c21
 }
ba3c21
 
ba3c21
+/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
ba3c21
+   registers.  */
ba3c21
+
ba3c21
+static void
ba3c21
+aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
ba3c21
+					poly_int64 poly_size)
ba3c21
+{
ba3c21
+  HOST_WIDE_INT size;
ba3c21
+  if (!poly_size.is_constant (&size))
ba3c21
+    {
ba3c21
+      sorry ("stack probes for SVE frames");
ba3c21
+      return;
ba3c21
+    }
ba3c21
+
ba3c21
+  HOST_WIDE_INT probe_interval
ba3c21
+    = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
ba3c21
+  HOST_WIDE_INT guard_size
ba3c21
+    = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
ba3c21
+  HOST_WIDE_INT guard_used_by_caller = 1024;
ba3c21
+
ba3c21
+  /* SIZE should be large enough to require probing here.  ie, it
ba3c21
+     must be larger than GUARD_SIZE - GUARD_USED_BY_CALLER.
ba3c21
+
ba3c21
+     We can allocate GUARD_SIZE - GUARD_USED_BY_CALLER as a single chunk
ba3c21
+     without any probing.  */
ba3c21
+  gcc_assert (size >= guard_size - guard_used_by_caller);
ba3c21
+  aarch64_sub_sp (temp1, temp2, guard_size - guard_used_by_caller, true);
ba3c21
+  HOST_WIDE_INT orig_size = size;
ba3c21
+  size -= (guard_size - guard_used_by_caller);
ba3c21
+
ba3c21
+  HOST_WIDE_INT rounded_size = size & -probe_interval;
ba3c21
+  HOST_WIDE_INT residual = size - rounded_size;
ba3c21
+
ba3c21
+  /* We can handle a small number of allocations/probes inline.  Otherwise
ba3c21
+     punt to a loop.  */
ba3c21
+  if (rounded_size && rounded_size <= 4 * probe_interval)
ba3c21
+    {
ba3c21
+      /* We don't use aarch64_sub_sp here because we don't want to
ba3c21
+	 repeatedly load TEMP1.  */
ba3c21
+      rtx step = GEN_INT (-probe_interval);
ba3c21
+      if (probe_interval > ARITH_FACTOR)
ba3c21
+	{
ba3c21
+	  emit_move_insn (temp1, step);
ba3c21
+	  step = temp1;
ba3c21
+	}
ba3c21
+
ba3c21
+      for (HOST_WIDE_INT i = 0; i < rounded_size; i += probe_interval)
ba3c21
+	{
ba3c21
+	  rtx_insn *insn = emit_insn (gen_add2_insn (stack_pointer_rtx, step));
ba3c21
+          add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
ba3c21
+
ba3c21
+	  if (probe_interval > ARITH_FACTOR)
ba3c21
+	    {
ba3c21
+	      RTX_FRAME_RELATED_P (insn) = 1;
ba3c21
+	      rtx adj = plus_constant (Pmode, stack_pointer_rtx, -probe_interval);
ba3c21
+	      add_reg_note (insn, REG_CFA_ADJUST_CFA,
ba3c21
+			    gen_rtx_SET (stack_pointer_rtx, adj));
ba3c21
+	    }
ba3c21
+
ba3c21
+	  emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
ba3c21
+					   (probe_interval
ba3c21
+					    - GET_MODE_SIZE (word_mode))));
ba3c21
+	  emit_insn (gen_blockage ());
ba3c21
+	}
ba3c21
+      dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
ba3c21
+    }
ba3c21
+  else if (rounded_size)
ba3c21
+    {
ba3c21
+      /* Compute the ending address.  */
ba3c21
+      unsigned int scratchreg = REGNO (temp1);
ba3c21
+      emit_move_insn (temp1, GEN_INT (-rounded_size));
ba3c21
+      rtx_insn *insn
ba3c21
+	 = emit_insn (gen_add3_insn (temp1, stack_pointer_rtx, temp1));
ba3c21
+
ba3c21
+      /* For the initial allocation, we don't have a frame pointer
ba3c21
+	 set up, so we always need CFI notes.  If we're doing the
ba3c21
+	 final allocation, then we may have a frame pointer, in which
ba3c21
+	 case it is the CFA, otherwise we need CFI notes.
ba3c21
+
ba3c21
+	 We can determine which allocation we are doing by looking at
ba3c21
+	 the temporary register.  IP0 is the initial allocation, IP1
ba3c21
+	 is the final allocation.  */
ba3c21
+      if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
ba3c21
+	{
ba3c21
+	  /* We want the CFA independent of the stack pointer for the
ba3c21
+	     duration of the loop.  */
ba3c21
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
ba3c21
+			plus_constant (Pmode, temp1,
ba3c21
+				       (rounded_size + (orig_size - size))));
ba3c21
+	  RTX_FRAME_RELATED_P (insn) = 1;
ba3c21
+	}
ba3c21
+
ba3c21
+      /* This allocates and probes the stack.
ba3c21
+
ba3c21
+	 It also probes at a 4k interval regardless of the value of
ba3c21
+	 PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL.  */
ba3c21
+      insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
ba3c21
+					       stack_pointer_rtx, temp1));
ba3c21
+
ba3c21
+      /* Now reset the CFA register if needed.  */
ba3c21
+      if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
ba3c21
+	{
ba3c21
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
ba3c21
+			plus_constant (Pmode, stack_pointer_rtx,
ba3c21
+				       (rounded_size + (orig_size - size))));
ba3c21
+	  RTX_FRAME_RELATED_P (insn) = 1;
ba3c21
+	}
ba3c21
+
ba3c21
+      emit_insn (gen_blockage ());
ba3c21
+      dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
ba3c21
+    }
ba3c21
+  else
ba3c21
+    dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
ba3c21
+
ba3c21
+  /* Handle any residuals.
ba3c21
+     Note that any residual must be probed.  */
ba3c21
+  if (residual)
ba3c21
+    {
ba3c21
+      aarch64_sub_sp (temp1, temp2, residual, true);
ba3c21
+      add_reg_note (get_last_insn (), REG_STACK_CHECK, const0_rtx);
ba3c21
+      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
ba3c21
+				       (residual - GET_MODE_SIZE (word_mode))));
ba3c21
+      emit_insn (gen_blockage ());
ba3c21
+    }
ba3c21
+  return;
ba3c21
+}
ba3c21
+
ba3c21
 /* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
ba3c21
    is saved at BASE + OFFSET.  */
ba3c21
 
ba3c21
@@ -4686,7 +4820,54 @@ aarch64_expand_prologue (void)
ba3c21
   rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
ba3c21
   rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
ba3c21
 
ba3c21
-  aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
ba3c21
+  /* We do not fully protect aarch64 against stack clash style attacks
ba3c21
+     as doing so would be prohibitively expensive with less utility over
ba3c21
+     time as newer compilers are deployed.
ba3c21
+
ba3c21
+     We assume the guard is at least 64k.  Furthermore, we assume that
ba3c21
+     the caller has not pushed the stack pointer more than 1k into
ba3c21
+     the guard.  A caller that pushes the stack pointer than 1k into
ba3c21
+     the guard is considered invalid.
ba3c21
+
ba3c21
+     Note that the caller's ability to push the stack pointer into the
ba3c21
+     guard is a function of the number and size of outgoing arguments and/or
ba3c21
+     dynamic stack allocations due to the mandatory save of the link register
ba3c21
+     in the caller's frame.
ba3c21
+
ba3c21
+     With those assumptions the callee can allocate up to 63k of stack
ba3c21
+     space without probing.
ba3c21
+
ba3c21
+     When probing is needed, we emit a probe at the start of the prologue
ba3c21
+     and every PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes thereafter.
ba3c21
+
ba3c21
+     We have to track how much space has been allocated, but we do not
ba3c21
+     track stores into the stack as implicit probes except for the
ba3c21
+     fp/lr store.  */
ba3c21
+  HOST_WIDE_INT guard_size
ba3c21
+    = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
ba3c21
+  HOST_WIDE_INT guard_used_by_caller = 1024;
ba3c21
+  if (flag_stack_clash_protection)
ba3c21
+    {
ba3c21
+      if (known_eq (frame_size, 0))
ba3c21
+	dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
ba3c21
+      else if (known_lt (initial_adjust, guard_size - guard_used_by_caller)
ba3c21
+	       && known_lt (final_adjust, guard_size - guard_used_by_caller))
ba3c21
+	dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
ba3c21
+    }
ba3c21
+
ba3c21
+  /* In theory we should never have both an initial adjustment
ba3c21
+     and a callee save adjustment.  Verify that is the case since the
ba3c21
+     code below does not handle it for -fstack-clash-protection.  */
ba3c21
+  gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
ba3c21
+
ba3c21
+  /* Only probe if the initial adjustment is larger than the guard
ba3c21
+     less the amount of the guard reserved for use by the caller's
ba3c21
+     outgoing args.  */
ba3c21
+  if (flag_stack_clash_protection
ba3c21
+      && maybe_ge (initial_adjust, guard_size - guard_used_by_caller))
ba3c21
+    aarch64_allocate_and_probe_stack_space (ip0_rtx, ip1_rtx, initial_adjust);
ba3c21
+  else
ba3c21
+    aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
ba3c21
 
ba3c21
   if (callee_adjust != 0)
ba3c21
     aarch64_push_regs (reg1, reg2, callee_adjust);
ba3c21
@@ -4742,7 +4923,31 @@ aarch64_expand_prologue (void)
ba3c21
 			     callee_adjust != 0 || emit_frame_chain);
ba3c21
   aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
ba3c21
 			     callee_adjust != 0 || emit_frame_chain);
ba3c21
-  aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
ba3c21
+
ba3c21
+  /* We may need to probe the final adjustment as well.  */
ba3c21
+  if (flag_stack_clash_protection && maybe_ne (final_adjust, 0))
ba3c21
+    {
ba3c21
+      /* First probe if the final adjustment is larger than the guard size
ba3c21
+	 less the amount of the guard reserved for use by the caller's
ba3c21
+	 outgoing args.  */
ba3c21
+      if (maybe_ge (final_adjust, guard_size - guard_used_by_caller))
ba3c21
+	aarch64_allocate_and_probe_stack_space (ip1_rtx, ip0_rtx,
ba3c21
+						final_adjust);
ba3c21
+      else
ba3c21
+	aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
ba3c21
+
ba3c21
+      /* We must also probe if the final adjustment is larger than the guard
ba3c21
+	 that is assumed used by the caller.  This may be sub-optimal.  */
ba3c21
+      if (maybe_ge (final_adjust, guard_used_by_caller))
ba3c21
+	{
ba3c21
+	  if (dump_file)
ba3c21
+	    fprintf (dump_file,
ba3c21
+		     "Stack clash aarch64 large outgoing arg, probing\n");
ba3c21
+	  emit_stack_probe (stack_pointer_rtx);
ba3c21
+	}
ba3c21
+    }
ba3c21
+  else
ba3c21
+    aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
ba3c21
 }
ba3c21
 
ba3c21
 /* Return TRUE if we can use a simple_return insn.
ba3c21
@@ -10476,6 +10681,12 @@ aarch64_override_options_internal (struct gcc_options *opts)
ba3c21
       && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
ba3c21
     opts->x_flag_prefetch_loop_arrays = 1;
ba3c21
 
ba3c21
+  /* We assume the guard page is 64k.  */
ba3c21
+  maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
ba3c21
+			 16,
ba3c21
+			 opts->x_param_values,
ba3c21
+			 global_options_set.x_param_values);
ba3c21
+
ba3c21
   aarch64_override_options_after_change_1 (opts);
ba3c21
 }
ba3c21
 
ba3c21
@@ -17161,6 +17372,28 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn)
ba3c21
     }
ba3c21
 }
ba3c21
 
ba3c21
+/* It has been decided that to allow up to 1kb of outgoing argument
ba3c21
+   space to be allocated w/o probing.  If more than 1kb of outgoing
ba3c21
+   argment space is allocated, then it must be probed and the last
ba3c21
+   probe must occur no more than 1kbyte away from the end of the
ba3c21
+   allocated space.
ba3c21
+
ba3c21
+   This implies that the residual part of an alloca allocation may
ba3c21
+   need probing in cases where the generic code might not otherwise
ba3c21
+   think a probe is needed.
ba3c21
+
ba3c21
+   This target hook returns TRUE when allocating RESIDUAL bytes of
ba3c21
+   alloca space requires an additional probe, otherwise FALSE is
ba3c21
+   returned.  */
ba3c21
+
ba3c21
+static bool
ba3c21
+aarch64_stack_clash_protection_final_dynamic_probe (rtx residual)
ba3c21
+{
ba3c21
+  return (residual == CONST0_RTX (Pmode)
ba3c21
+	  || GET_CODE (residual) != CONST_INT
ba3c21
+	  || INTVAL (residual) >= 1024);
ba3c21
+}
ba3c21
+
ba3c21
 /* Implement TARGET_COMPUTE_PRESSURE_CLASSES.  */
ba3c21
 
ba3c21
 static int
ba3c21
@@ -17669,6 +17902,10 @@ aarch64_libgcc_floating_mode_supported_p
ba3c21
 #undef TARGET_CONSTANT_ALIGNMENT
ba3c21
 #define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
ba3c21
 
ba3c21
+#undef TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE
ba3c21
+#define TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE \
ba3c21
+  aarch64_stack_clash_protection_final_dynamic_probe
ba3c21
+
ba3c21
 #undef TARGET_COMPUTE_PRESSURE_CLASSES
ba3c21
 #define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
ba3c21
 
ba3c21
--- gcc/config/aarch64/aarch64.md
ba3c21
+++ gcc/config/aarch64/aarch64.md
ba3c21
@@ -5812,7 +5812,7 @@
ba3c21
 )
ba3c21
 
ba3c21
 (define_insn "probe_stack_range"
ba3c21
-  [(set (match_operand:DI 0 "register_operand" "=r")
ba3c21
+  [(set (match_operand:DI 0 "register_operand" "=rk")
ba3c21
 	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
ba3c21
 			     (match_operand:DI 2 "register_operand" "r")]
ba3c21
 			      UNSPECV_PROBE_STACK_RANGE))]
ba3c21
--- gcc/testsuite/gcc.target/aarch64/stack-check-12.c
ba3c21
+++ gcc/testsuite/gcc.target/aarch64/stack-check-12.c
ba3c21
@@ -0,0 +1,20 @@
ba3c21
+/* { dg-do compile } */
ba3c21
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
ba3c21
+/* { dg-require-effective-target supports_stack_clash_protection } */
ba3c21
+
ba3c21
+extern void arf (unsigned long int *, unsigned long int *);
ba3c21
+void
ba3c21
+frob ()
ba3c21
+{
ba3c21
+  unsigned long int num[1000];
ba3c21
+  unsigned long int den[1000];
ba3c21
+  arf (den, num);
ba3c21
+}
ba3c21
+
ba3c21
+/* This verifies that the scheduler did not break the dependencies
ba3c21
+   by adjusting the offsets within the probe and that the scheduler
ba3c21
+   did not reorder around the stack probes.  */
ba3c21
+/* { dg-final { scan-assembler-times "sub\\tsp, sp, #4096\\n\\tstr\\txzr, .sp, 4088." 3 } } */
ba3c21
+
ba3c21
+
ba3c21
+
ba3c21
--- gcc/testsuite/gcc.target/aarch64/stack-check-13.c
ba3c21
+++ gcc/testsuite/gcc.target/aarch64/stack-check-13.c
ba3c21
@@ -0,0 +1,28 @@
ba3c21
+/* { dg-do compile } */
ba3c21
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
ba3c21
+/* { dg-require-effective-target supports_stack_clash_protection } */
ba3c21
+
ba3c21
+#define ARG32(X) X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
ba3c21
+#define ARG192(X) ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X)
ba3c21
+void out1(ARG192(__int128));
ba3c21
+int t1(int);
ba3c21
+
ba3c21
+int t3(int x)
ba3c21
+{
ba3c21
+  if (x < 1000)
ba3c21
+    return t1 (x) + 1;
ba3c21
+
ba3c21
+  out1 (ARG192(1));
ba3c21
+  return 0;
ba3c21
+}
ba3c21
+
ba3c21
+
ba3c21
+
ba3c21
+/* This test creates a large (> 1k) outgoing argument area that needs
ba3c21
+   to be probed.  We don't test the exact size of the space or the
ba3c21
+   exact offset to make the test a little less sensitive to trivial
ba3c21
+   output changes.  */
ba3c21
+/* { dg-final { scan-assembler-times "sub\\tsp, sp, #....\\n\\tstr\\txzr, \\\[sp" 1 } } */
ba3c21
+
ba3c21
+
ba3c21
+
ba3c21
--- gcc/testsuite/gcc.target/aarch64/stack-check-14.c
ba3c21
+++ gcc/testsuite/gcc.target/aarch64/stack-check-14.c
ba3c21
@@ -0,0 +1,25 @@
ba3c21
+/* { dg-do compile } */
ba3c21
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
ba3c21
+/* { dg-require-effective-target supports_stack_clash_protection } */
ba3c21
+
ba3c21
+int t1(int);
ba3c21
+
ba3c21
+int t2(int x)
ba3c21
+{
ba3c21
+  char *p = __builtin_alloca (4050);
ba3c21
+  x = t1 (x);
ba3c21
+  return p[x];
ba3c21
+}
ba3c21
+
ba3c21
+
ba3c21
+/* This test has a constant sized alloca that is smaller than the
ba3c21
+   probe interval.  But it actually requires two probes instead
ba3c21
+   of one because of the optimistic assumptions we made in the
ba3c21
+   aarch64 prologue code WRT probing state. 
ba3c21
+
ba3c21
+   The form can change quite a bit so we just check for two
ba3c21
+   probes without looking at the actual address.  */
ba3c21
+/* { dg-final { scan-assembler-times "str\\txzr," 2 } } */
ba3c21
+
ba3c21
+
ba3c21
+
ba3c21
--- gcc/testsuite/gcc.target/aarch64/stack-check-15.c
ba3c21
+++ gcc/testsuite/gcc.target/aarch64/stack-check-15.c
ba3c21
@@ -0,0 +1,24 @@
ba3c21
+/* { dg-do compile } */
ba3c21
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
ba3c21
+/* { dg-require-effective-target supports_stack_clash_protection } */
ba3c21
+
ba3c21
+int t1(int);
ba3c21
+
ba3c21
+int t2(int x)
ba3c21
+{
ba3c21
+  char *p = __builtin_alloca (x);
ba3c21
+  x = t1 (x);
ba3c21
+  return p[x];
ba3c21
+}
ba3c21
+
ba3c21
+
ba3c21
+/* This test has a variable sized alloca.  It requires 3 probes.
ba3c21
+   One in the loop, one for the residual and at the end of the
ba3c21
+   alloca area. 
ba3c21
+
ba3c21
+   The form can change quite a bit so we just check for two
ba3c21
+   probes without looking at the actual address.  */
ba3c21
+/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */
ba3c21
+
ba3c21
+
ba3c21
+
ba3c21
--- gcc/testsuite/lib/target-supports.exp
ba3c21
+++ gcc/testsuite/lib/target-supports.exp
ba3c21
@@ -9201,14 +9201,9 @@ proc check_effective_target_autoincdec { } {
ba3c21
 # 
ba3c21
 proc check_effective_target_supports_stack_clash_protection { } {
ba3c21
 
ba3c21
-   # Temporary until the target bits are fully ACK'd.
ba3c21
-#  if { [istarget aarch*-*-*] } {
ba3c21
-#	return 1
ba3c21
-#  }
ba3c21
-
ba3c21
     if { [istarget x86_64-*-*] || [istarget i?86-*-*] 
ba3c21
 	  || [istarget powerpc*-*-*] || [istarget rs6000*-*-*]
ba3c21
-	  || [istarget s390*-*-*] } {
ba3c21
+	  || [istarget aarch64*-**] || [istarget s390*-*-*] } {
ba3c21
 	return 1
ba3c21
     }
ba3c21
   return 0
ba3c21
@@ -9217,9 +9212,9 @@ proc check_effective_target_supports_stack_clash_protection { } {
ba3c21
 # Return 1 if the target creates a frame pointer for non-leaf functions
ba3c21
 # Note we ignore cases where we apply tail call optimization here.
ba3c21
 proc check_effective_target_frame_pointer_for_non_leaf { } {
ba3c21
-  if { [istarget aarch*-*-*] } {
ba3c21
-	return 1
ba3c21
-  }
ba3c21
+#  if { [istarget aarch*-*-*] } {
ba3c21
+#	return 1
ba3c21
+#  }
ba3c21
 
ba3c21
   # Solaris/x86 defaults to -fno-omit-frame-pointer.
ba3c21
   if { [istarget i?86-*-solaris*] || [istarget x86_64-*-solaris*] } {