Blame SOURCES/gcc48-rh1469697-16.patch

56d343
commit 5d7a77ede3e91948ee125bd82533d7e692543fff
56d343
Author: Jeff Law <law@redhat.com>
56d343
Date:   Mon Oct 2 13:43:01 2017 -0600
56d343
56d343
    aarch64 support
56d343
56d343
diff --git a/gcc/config/aarch64/.aarch64.c.rej.swp b/gcc/config/aarch64/.aarch64.c.rej.swp
56d343
new file mode 100644
56d343
index 00000000000..b899e21b855
56d343
Binary files /dev/null and b/gcc/config/aarch64/.aarch64.c.rej.swp differ
56d343
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
56d343
index cadf193cfcf..e08632ffa88 100644
56d343
--- a/gcc/config/aarch64/aarch64.c
56d343
+++ b/gcc/config/aarch64/aarch64.c
56d343
@@ -45,6 +45,8 @@
56d343
 #include "gimple.h"
56d343
 #include "optabs.h"
56d343
 #include "dwarf2.h"
56d343
+#include "params.h"
56d343
+#include "dumpfile.h"
56d343
 
56d343
 /* Classifies an address.
56d343
 
56d343
@@ -1696,7 +1698,14 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
56d343
   output_asm_insn ("sub\t%0, %0, %1", xops);
56d343
 
56d343
   /* Probe at TEST_ADDR.  */
56d343
-  output_asm_insn ("str\txzr, [%0]", xops);
56d343
+  if (flag_stack_clash_protection)
56d343
+    {
56d343
+      gcc_assert (xops[0] == stack_pointer_rtx);
56d343
+      xops[1] = GEN_INT (PROBE_INTERVAL - 8);
56d343
+      output_asm_insn ("str\txzr, [%0, %1]", xops);
56d343
+    }
56d343
+  else
56d343
+    output_asm_insn ("str\txzr, [%0]", xops);
56d343
 
56d343
   /* Test if TEST_ADDR == LAST_ADDR.  */
56d343
   xops[1] = reg2;
56d343
@@ -2001,6 +2010,123 @@ aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
56d343
 				base_rtx, cfi_ops);
56d343
 }
56d343
 
56d343
+/* Allocate SIZE bytes of stack space using SCRATCH_REG as a scratch
56d343
+   register.  */
56d343
+
56d343
+static void
56d343
+aarch64_allocate_and_probe_stack_space (int scratchreg, HOST_WIDE_INT size)
56d343
+{
56d343
+  HOST_WIDE_INT probe_interval
56d343
+    = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
56d343
+  HOST_WIDE_INT guard_size
56d343
+    = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
56d343
+  HOST_WIDE_INT guard_used_by_caller = 1024;
56d343
+
56d343
+  /* SIZE should be large enough to require probing here.  ie, it
56d343
+     must be larger than GUARD_SIZE - GUARD_USED_BY_CALLER.
56d343
+
56d343
+     We can allocate GUARD_SIZE - GUARD_USED_BY_CALLER as a single chunk
56d343
+     without any probing.  */
56d343
+  gcc_assert (size >= guard_size - guard_used_by_caller);
56d343
+  aarch64_sub_sp (scratchreg, guard_size - guard_used_by_caller, true);
56d343
+  HOST_WIDE_INT orig_size = size;
56d343
+  size -= (guard_size - guard_used_by_caller);
56d343
+
56d343
+  HOST_WIDE_INT rounded_size = size & -probe_interval;
56d343
+  HOST_WIDE_INT residual = size - rounded_size;
56d343
+
56d343
+  /* We can handle a small number of allocations/probes inline.  Otherwise
56d343
+     punt to a loop.  */
56d343
+  if (rounded_size && rounded_size <= 4 * probe_interval)
56d343
+    {
56d343
+      /* We don't use aarch64_sub_sp here because we don't want to
56d343
+	 repeatedly load SCRATCHREG.  */
56d343
+      rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
56d343
+      if (probe_interval > ARITH_FACTOR)
56d343
+	emit_move_insn (scratch_rtx, GEN_INT (-probe_interval));
56d343
+      else
56d343
+	scratch_rtx = GEN_INT (-probe_interval);
56d343
+
56d343
+      for (HOST_WIDE_INT i = 0; i < rounded_size; i += probe_interval)
56d343
+	{
56d343
+	  rtx insn = emit_insn (gen_add2_insn (stack_pointer_rtx, scratch_rtx));
56d343
+          add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
56d343
+
56d343
+	  if (probe_interval > ARITH_FACTOR)
56d343
+	    {
56d343
+	      RTX_FRAME_RELATED_P (insn) = 1;
56d343
+	      rtx adj = plus_constant (Pmode, stack_pointer_rtx, -probe_interval);
56d343
+	      add_reg_note (insn, REG_CFA_ADJUST_CFA,
56d343
+			    gen_rtx_SET (VOIDmode, stack_pointer_rtx, adj));
56d343
+	    }
56d343
+
56d343
+	  emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
56d343
+					   (probe_interval
56d343
+					    - GET_MODE_SIZE (word_mode))));
56d343
+	  emit_insn (gen_blockage ());
56d343
+	}
56d343
+      dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
56d343
+    }
56d343
+  else if (rounded_size)
56d343
+    {
56d343
+      /* Compute the ending address.  */
56d343
+      rtx temp = gen_rtx_REG (word_mode, scratchreg);
56d343
+      emit_move_insn (temp, GEN_INT (-rounded_size));
56d343
+      rtx insn = emit_insn (gen_add3_insn (temp, stack_pointer_rtx, temp));
56d343
+
56d343
+      /* For the initial allocation, we don't have a frame pointer
56d343
+	 set up, so we always need CFI notes.  If we're doing the
56d343
+	 final allocation, then we may have a frame pointer, in which
56d343
+	 case it is the CFA, otherwise we need CFI notes.
56d343
+
56d343
+	 We can determine which allocation we are doing by looking at
56d343
+	 the temporary register.  IP0 is the initial allocation, IP1
56d343
+	 is the final allocation.  */
56d343
+      if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
56d343
+	{
56d343
+	  /* We want the CFA independent of the stack pointer for the
56d343
+	     duration of the loop.  */
56d343
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
56d343
+			plus_constant (Pmode, temp,
56d343
+				       (rounded_size + (orig_size - size))));
56d343
+	  RTX_FRAME_RELATED_P (insn) = 1;
56d343
+	}
56d343
+
56d343
+      /* This allocates and probes the stack.
56d343
+
56d343
+	 It also probes at a 4k interval regardless of the value of
56d343
+	 PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL.  */
56d343
+      insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
56d343
+					       stack_pointer_rtx, temp));
56d343
+
56d343
+      /* Now reset the CFA register if needed.  */
56d343
+      if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
56d343
+	{
56d343
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
56d343
+			plus_constant (Pmode, stack_pointer_rtx,
56d343
+				       (rounded_size + (orig_size - size))));
56d343
+	  RTX_FRAME_RELATED_P (insn) = 1;
56d343
+	}
56d343
+
56d343
+      emit_insn (gen_blockage ());
56d343
+      dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
56d343
+    }
56d343
+  else
56d343
+    dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
56d343
+
56d343
+  /* Handle any residuals.
56d343
+     Note that any residual must be probed.  */
56d343
+  if (residual)
56d343
+    {
56d343
+      aarch64_sub_sp (scratchreg, residual, true);
56d343
+      add_reg_note (get_last_insn (), REG_STACK_CHECK, const0_rtx);
56d343
+      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
56d343
+				       (residual - GET_MODE_SIZE (word_mode))));
56d343
+      emit_insn (gen_blockage ());
56d343
+    }
56d343
+  return;
56d343
+}
56d343
+
56d343
 /* AArch64 stack frames generated by this compiler look like:
56d343
 
56d343
 	+-------------------------------+
56d343
@@ -2073,6 +2199,44 @@ aarch64_expand_prologue (void)
56d343
 	       - original_frame_size
56d343
 	       - cfun->machine->frame.saved_regs_size);
56d343
 
56d343
+  /* We do not fully protect aarch64 against stack clash style attacks
56d343
+     as doing so would be prohibitively expensive with less utility over
56d343
+     time as newer compilers are deployed.
56d343
+
56d343
+     We assume the guard is at least 64k.  Furthermore, we assume that
56d343
+     the caller has not pushed the stack pointer more than 1k into
56d343
+     the guard.  A caller that pushes the stack pointer than 1k into
56d343
+     the guard is considered invalid.
56d343
+
56d343
+     Note that the caller's ability to push the stack pointer into the
56d343
+     guard is a function of the number and size of outgoing arguments and/or
56d343
+     dynamic stack allocations due to the mandatory save of the link register
56d343
+     in the caller's frame.
56d343
+
56d343
+     With those assumptions the callee can allocate up to 63k of stack
56d343
+     space without probing.
56d343
+
56d343
+     When probing is needed, we emit a probe at the start of the prologue
56d343
+     and every PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes thereafter.
56d343
+
56d343
+     We have to track how much space has been allocated, but we do not
56d343
+     track stores into the stack as implicit probes except for the
56d343
+     fp/lr store.  */
56d343
+  HOST_WIDE_INT guard_size
56d343
+    = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
56d343
+  HOST_WIDE_INT guard_used_by_caller = 1024;
56d343
+  HOST_WIDE_INT final_adjust = crtl->outgoing_args_size;
56d343
+  HOST_WIDE_INT initial_adjust = frame_size;
56d343
+
56d343
+  if (flag_stack_clash_protection)
56d343
+    {
56d343
+      if (initial_adjust == 0)
56d343
+	dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
56d343
+      else if (offset < guard_size - guard_used_by_caller
56d343
+	       && final_adjust < guard_size - guard_used_by_caller)
56d343
+	dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
56d343
+    }
56d343
+
56d343
   /* Store pairs and load pairs have a range only -512 to 504.  */
56d343
   if (offset >= 512)
56d343
     {
56d343
@@ -2089,7 +2253,10 @@ aarch64_expand_prologue (void)
56d343
       frame_size -= (offset + crtl->outgoing_args_size);
56d343
       fp_offset = 0;
56d343
 
56d343
-      if (frame_size >= 0x1000000)
56d343
+      if (flag_stack_clash_protection
56d343
+	  && frame_size >= guard_size - guard_used_by_caller)
56d343
+	aarch64_allocate_and_probe_stack_space (IP0_REGNUM, frame_size);
56d343
+      else if (frame_size >= 0x1000000)
56d343
 	{
56d343
 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
56d343
 	  emit_move_insn (op0, GEN_INT (-frame_size));
56d343
@@ -2206,10 +2373,30 @@ aarch64_expand_prologue (void)
56d343
     {
56d343
       if (crtl->outgoing_args_size > 0)
56d343
 	{
56d343
-	  insn = emit_insn (gen_add2_insn
56d343
-			    (stack_pointer_rtx,
56d343
-			     GEN_INT (- crtl->outgoing_args_size)));
56d343
-	  RTX_FRAME_RELATED_P (insn) = 1;
56d343
+	  if (flag_stack_clash_protection)
56d343
+	    {
56d343
+	      /* First probe if the final adjustment is larger than the
56d343
+		 guard size less the amount of guard reserved for use by
56d343
+		 the caller's outgoing args.  */
56d343
+	      if (final_adjust >= guard_size - guard_used_by_caller)
56d343
+		aarch64_allocate_and_probe_stack_space (IP1_REGNUM,
56d343
+						        final_adjust);
56d343
+	      else
56d343
+		aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
56d343
+
56d343
+	      /* We must also probe if the final adjustment is larger than the
56d343
+		 guard that is assumed used by the caller.  This may be
56d343
+		 sub-optimal.  */
56d343
+	      if (final_adjust >= guard_used_by_caller)
56d343
+		{
56d343
+		  if (dump_file)
56d343
+		    fprintf (dump_file,
56d343
+			     "Stack clash aarch64 large outgoing arg, probing\n");
56d343
+		  emit_stack_probe (stack_pointer_rtx);
56d343
+		}
56d343
+	    }
56d343
+	  else
56d343
+	    aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
56d343
 	}
56d343
     }
56d343
 }
56d343
@@ -5088,6 +5275,12 @@ aarch64_override_options (void)
56d343
 #endif
56d343
     }
56d343
 
56d343
+  /* We assume the guard page is 64k.  */
56d343
+  maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
56d343
+			 16,
56d343
+			 global_options.x_param_values,
56d343
+			 global_options_set.x_param_values);
56d343
+
56d343
   aarch64_override_options_after_change ();
56d343
 }
56d343
 
56d343
@@ -8161,6 +8354,28 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
56d343
   return ret;
56d343
 }
56d343
 
56d343
+/* It has been decided that to allow up to 1kb of outgoing argument
56d343
+   space to be allocated w/o probing.  If more than 1kb of outgoing
56d343
+   argment space is allocated, then it must be probed and the last
56d343
+   probe must occur no more than 1kbyte away from the end of the
56d343
+   allocated space.
56d343
+
56d343
+   This implies that the residual part of an alloca allocation may
56d343
+   need probing in cases where the generic code might not otherwise
56d343
+   think a probe is needed.
56d343
+
56d343
+   This target hook returns TRUE when allocating RESIDUAL bytes of
56d343
+   alloca space requires an additional probe, otherwise FALSE is
56d343
+   returned.  */
56d343
+
56d343
+static bool
56d343
+aarch64_stack_clash_protection_final_dynamic_probe (rtx residual)
56d343
+{
56d343
+  return (residual == CONST0_RTX (Pmode)
56d343
+	  || GET_CODE (residual) != CONST_INT
56d343
+	  || INTVAL (residual) >= 1024);
56d343
+}
56d343
+
56d343
 #undef TARGET_ADDRESS_COST
56d343
 #define TARGET_ADDRESS_COST aarch64_address_cost
56d343
 
56d343
@@ -8378,6 +8593,10 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
56d343
 #undef TARGET_FIXED_CONDITION_CODE_REGS
56d343
 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
56d343
 
56d343
+#undef TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE
56d343
+#define TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE \
56d343
+  aarch64_stack_clash_protection_final_dynamic_probe
56d343
+
56d343
 struct gcc_target targetm = TARGET_INITIALIZER;
56d343
 
56d343
 #include "gt-aarch64.h"
56d343
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
56d343
index a085c6acaf5..5485a5f70b1 100644
56d343
--- a/gcc/config/aarch64/aarch64.md
56d343
+++ b/gcc/config/aarch64/aarch64.md
56d343
@@ -3401,7 +3401,7 @@
56d343
 )
56d343
 
56d343
 (define_insn "probe_stack_range"
56d343
-  [(set (match_operand:DI 0 "register_operand" "=r")
56d343
+  [(set (match_operand:DI 0 "register_operand" "=rk")
56d343
 	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
56d343
 			     (match_operand:DI 2 "register_operand" "r")]
56d343
 			      UNSPECV_PROBE_STACK_RANGE))]
56d343
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-12.c b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c
56d343
new file mode 100644
56d343
index 00000000000..2ce38483b6b
56d343
--- /dev/null
56d343
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c
56d343
@@ -0,0 +1,20 @@
56d343
+/* { dg-do compile } */
56d343
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
56d343
+/* { dg-require-effective-target supports_stack_clash_protection } */
56d343
+
56d343
+extern void arf (unsigned long int *, unsigned long int *);
56d343
+void
56d343
+frob ()
56d343
+{
56d343
+  unsigned long int num[1000];
56d343
+  unsigned long int den[1000];
56d343
+  arf (den, num);
56d343
+}
56d343
+
56d343
+/* This verifies that the scheduler did not break the dependencies
56d343
+   by adjusting the offsets within the probe and that the scheduler
56d343
+   did not reorder around the stack probes.  */
56d343
+/* { dg-final { scan-assembler-times "sub\\tsp, sp, #4096\\n\\tstr\\txzr, .sp, 4088." 3 } } */
56d343
+
56d343
+
56d343
+
56d343
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-13.c b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c
56d343
new file mode 100644
56d343
index 00000000000..d8886835989
56d343
--- /dev/null
56d343
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c
56d343
@@ -0,0 +1,28 @@
56d343
+/* { dg-do compile } */
56d343
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
56d343
+/* { dg-require-effective-target supports_stack_clash_protection } */
56d343
+
56d343
+#define ARG32(X) X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
56d343
+#define ARG192(X) ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X)
56d343
+void out1(ARG192(__int128));
56d343
+int t1(int);
56d343
+
56d343
+int t3(int x)
56d343
+{
56d343
+  if (x < 1000)
56d343
+    return t1 (x) + 1;
56d343
+
56d343
+  out1 (ARG192(1));
56d343
+  return 0;
56d343
+}
56d343
+
56d343
+
56d343
+
56d343
+/* This test creates a large (> 1k) outgoing argument area that needs
56d343
+   to be probed.  We don't test the exact size of the space or the
56d343
+   exact offset to make the test a little less sensitive to trivial
56d343
+   output changes.  */
56d343
+/* { dg-final { scan-assembler-times "sub\\tsp, sp, #....\\n\\tstr\\txzr, \\\[sp" 1 } } */
56d343
+
56d343
+
56d343
+
56d343
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-14.c b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c
56d343
new file mode 100644
56d343
index 00000000000..59ffe01376d
56d343
--- /dev/null
56d343
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c
56d343
@@ -0,0 +1,25 @@
56d343
+/* { dg-do compile } */
56d343
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
56d343
+/* { dg-require-effective-target supports_stack_clash_protection } */
56d343
+
56d343
+int t1(int);
56d343
+
56d343
+int t2(int x)
56d343
+{
56d343
+  char *p = __builtin_alloca (4050);
56d343
+  x = t1 (x);
56d343
+  return p[x];
56d343
+}
56d343
+
56d343
+
56d343
+/* This test has a constant sized alloca that is smaller than the
56d343
+   probe interval.  But it actually requires two probes instead
56d343
+   of one because of the optimistic assumptions we made in the
56d343
+   aarch64 prologue code WRT probing state. 
56d343
+
56d343
+   The form can change quite a bit so we just check for two
56d343
+   probes without looking at the actual address.  */
56d343
+/* { dg-final { scan-assembler-times "str\\txzr," 2 } } */
56d343
+
56d343
+
56d343
+
56d343
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-15.c b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c
56d343
new file mode 100644
56d343
index 00000000000..e06db6dc2f0
56d343
--- /dev/null
56d343
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c
56d343
@@ -0,0 +1,24 @@
56d343
+/* { dg-do compile } */
56d343
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
56d343
+/* { dg-require-effective-target supports_stack_clash_protection } */
56d343
+
56d343
+int t1(int);
56d343
+
56d343
+int t2(int x)
56d343
+{
56d343
+  char *p = __builtin_alloca (x);
56d343
+  x = t1 (x);
56d343
+  return p[x];
56d343
+}
56d343
+
56d343
+
56d343
+/* This test has a variable sized alloca.  It requires 3 probes.
56d343
+   One in the loop, one for the residual and at the end of the
56d343
+   alloca area. 
56d343
+
56d343
+   The form can change quite a bit so we just check for two
56d343
+   probes without looking at the actual address.  */
56d343
+/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */
56d343
+
56d343
+
56d343
+
56d343
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
56d343
index aba99513ed0..a8451c98b08 100644
56d343
--- a/gcc/testsuite/lib/target-supports.exp
56d343
+++ b/gcc/testsuite/lib/target-supports.exp
56d343
@@ -5420,14 +5420,9 @@ proc check_effective_target_autoincdec { } {
56d343
 # 
56d343
 proc check_effective_target_supports_stack_clash_protection { } {
56d343
 
56d343
-   # Temporary until the target bits are fully ACK'd.
56d343
-#  if { [istarget aarch*-*-*] } {
56d343
-#	return 1
56d343
-#  }
56d343
-
56d343
     if { [istarget x86_64-*-*] || [istarget i?86-*-*] 
56d343
 	  || [istarget powerpc*-*-*] || [istarget rs6000*-*-*]
56d343
-	  || [istarget s390*-*-*] } {
56d343
+	  || [istarget aarch64*-**] || [istarget s390*-*-*] } {
56d343
 	return 1
56d343
     }
56d343
   return 0