Blame SOURCES/gcc48-rh1469697-11.patch

8178f7
commit 27d2a2d27f3e0060ade9a1a82ce2292aad6c6931
8178f7
Author: law <law@138bc75d-0d04-0410-961f-82ee72b054a4>
8178f7
Date:   Mon Sep 25 23:13:55 2017 +0000
8178f7
8178f7
            * config/rs6000/rs6000-protos.h (output_probe_stack_range): Update
8178f7
            prototype for new argument.
8178f7
            * config/rs6000/rs6000.c (rs6000_emit_allocate_stack_1): New function,
8178f7
            mostly extracted from rs6000_emit_allocate_stack.
8178f7
            (rs6000_emit_probe_stack_range_stack_clash): New function.
8178f7
            (rs6000_emit_allocate_stack): Call
8178f7
            rs6000_emit_probe_stack_range_stack_clash as needed.
8178f7
            (rs6000_emit_probe_stack_range): Add additional argument
8178f7
            to call to gen_probe_stack_range{si,di}.
8178f7
            (output_probe_stack_range): New.
8178f7
            (output_probe_stack_range_1): Renamed from output_probe_stack_range.
8178f7
            (output_probe_stack_range_stack_clash): New.
8178f7
            (rs6000_emit_prologue): Emit notes into dump file as requested.
8178f7
            * rs6000.md (allocate_stack): Handle -fstack-clash-protection.
8178f7
            (probe_stack_range<P:mode>): Operand 0 is now early-clobbered.
8178f7
            Add additional operand and pass it to output_probe_stack_range.
8178f7
    
8178f7
            * lib/target-supports.exp
8178f7
            (check_effective_target_supports_stack_clash_protection): Enable for
8178f7
            rs6000 and powerpc targets.
8178f7
    
8178f7
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253179 138bc75d-0d04-0410-961f-82ee72b054a4
8178f7
8178f7
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
8178f7
index d4b93d9970d..cfb23ab80cc 100644
8178f7
--- a/gcc/config/rs6000/rs6000-protos.h
8178f7
+++ b/gcc/config/rs6000/rs6000-protos.h
8178f7
@@ -114,7 +114,7 @@ extern void rs6000_emit_sCOND (enum machine_mode, rtx[]);
8178f7
 extern void rs6000_emit_cbranch (enum machine_mode, rtx[]);
8178f7
 extern char * output_cbranch (rtx, const char *, int, rtx);
8178f7
 extern char * output_e500_flip_gt_bit (rtx, rtx);
8178f7
-extern const char * output_probe_stack_range (rtx, rtx);
8178f7
+extern const char * output_probe_stack_range (rtx, rtx, rtx);
8178f7
 extern rtx rs6000_emit_set_const (rtx, enum machine_mode, rtx, int);
8178f7
 extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx);
8178f7
 extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
8178f7
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
8178f7
index a9052c6becf..c5d9988c1d9 100644
8178f7
--- a/gcc/config/rs6000/rs6000.c
8178f7
+++ b/gcc/config/rs6000/rs6000.c
8178f7
@@ -22320,6 +22320,220 @@ rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
8178f7
   emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
8178f7
 }
8178f7
 
8178f7
+/* Allocate SIZE_INT bytes on the stack using a store with update style insn
8178f7
+   and set the appropriate attributes for the generated insn.  Return the
8178f7
+   first insn which adjusts the stack pointer or the last insn before
8178f7
+   the stack adjustment loop. 
8178f7
+
8178f7
+   SIZE_INT is used to create the CFI note for the allocation.
8178f7
+
8178f7
+   SIZE_RTX is an rtx containing the size of the adjustment.  Note that
8178f7
+   since stacks grow to lower addresses its runtime value is -SIZE_INT.
8178f7
+
8178f7
+   ORIG_SP contains the backchain value that must be stored at *sp.  */
8178f7
+
8178f7
+static rtx
8178f7
+rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp)
8178f7
+{
8178f7
+  rtx insn;
8178f7
+
8178f7
+  rtx size_rtx = GEN_INT (-size_int);
8178f7
+  if (size_int > 32767)
8178f7
+    {
8178f7
+      rtx tmp_reg = gen_rtx_REG (Pmode, 0);
8178f7
+      /* Need a note here so that try_split doesn't get confused.  */
8178f7
+      if (get_last_insn () == NULL_RTX)
8178f7
+	emit_note (NOTE_INSN_DELETED);
8178f7
+      insn = emit_move_insn (tmp_reg, size_rtx);
8178f7
+      try_split (PATTERN (insn), insn, 0);
8178f7
+      size_rtx = tmp_reg;
8178f7
+    }
8178f7
+  
8178f7
+  if (Pmode == SImode)
8178f7
+    insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx,
8178f7
+					      stack_pointer_rtx,
8178f7
+					      size_rtx,
8178f7
+					      orig_sp));
8178f7
+  else
8178f7
+    insn = emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx,
8178f7
+						 stack_pointer_rtx,
8178f7
+						 size_rtx,
8178f7
+						 orig_sp));
8178f7
+  rtx par = PATTERN (insn);
8178f7
+  gcc_assert (GET_CODE (par) == PARALLEL);
8178f7
+  rtx set = XVECEXP (par, 0, 0);
8178f7
+  gcc_assert (GET_CODE (set) == SET);
8178f7
+  rtx mem = SET_DEST (set);
8178f7
+  gcc_assert (MEM_P (mem));
8178f7
+  MEM_NOTRAP_P (mem) = 1;
8178f7
+  set_mem_alias_set (mem, get_frame_alias_set ());
8178f7
+
8178f7
+  RTX_FRAME_RELATED_P (insn) = 1;
8178f7
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8178f7
+		gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8178f7
+			     gen_rtx_PLUS (Pmode,
8178f7
+					   stack_pointer_rtx,
8178f7
+					   GEN_INT (-size_int))));
8178f7
+
8178f7
+  /* Emit a blockage to ensure the allocation/probing insns are
8178f7
+     not optimized, combined, removed, etc.  Add REG_STACK_CHECK
8178f7
+     note for similar reasons.  */
8178f7
+  if (flag_stack_clash_protection)
8178f7
+    {
8178f7
+      add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
8178f7
+      emit_insn (gen_blockage ());
8178f7
+    }
8178f7
+
8178f7
+  return insn;
8178f7
+}
8178f7
+
8178f7
+static HOST_WIDE_INT
8178f7
+get_stack_clash_protection_probe_interval (void)
8178f7
+{
8178f7
+  return (HOST_WIDE_INT_1U
8178f7
+	  << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
8178f7
+}
8178f7
+
8178f7
+static HOST_WIDE_INT
8178f7
+get_stack_clash_protection_guard_size (void)
8178f7
+{
8178f7
+  return (HOST_WIDE_INT_1U
8178f7
+	  << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE));
8178f7
+}
8178f7
+
8178f7
+/* Allocate ORIG_SIZE bytes on the stack and probe the newly
8178f7
+   allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes.
8178f7
+
8178f7
+   COPY_REG, if non-null, should contain a copy of the original
8178f7
+   stack pointer at exit from this function.
8178f7
+
8178f7
+   This is subtly different than the Ada probing in that it tries hard to
8178f7
+   prevent attacks that jump the stack guard.  Thus it is never allowed to
8178f7
+   allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack
8178f7
+   space without a suitable probe.  */
8178f7
+static rtx
8178f7
+rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size,
8178f7
+					   rtx copy_reg)
8178f7
+{
8178f7
+  rtx orig_sp = copy_reg;
8178f7
+
8178f7
+  HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
8178f7
+
8178f7
+  /* Round the size down to a multiple of PROBE_INTERVAL.  */
8178f7
+  HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval);
8178f7
+
8178f7
+  /* If explicitly requested,
8178f7
+       or the rounded size is not the same as the original size
8178f7
+       or the the rounded size is greater than a page,
8178f7
+     then we will need a copy of the original stack pointer.  */
8178f7
+  if (rounded_size != orig_size
8178f7
+      || rounded_size > probe_interval
8178f7
+      || copy_reg)
8178f7
+    {
8178f7
+      /* If the caller did not request a copy of the incoming stack
8178f7
+	 pointer, then we use r0 to hold the copy.  */
8178f7
+      if (!copy_reg)
8178f7
+	orig_sp = gen_rtx_REG (Pmode, 0);
8178f7
+      emit_move_insn (orig_sp, stack_pointer_rtx);
8178f7
+    }
8178f7
+
8178f7
+  /* There's three cases here.
8178f7
+
8178f7
+     One is a single probe which is the most common and most efficiently
8178f7
+     implemented as it does not have to have a copy of the original
8178f7
+     stack pointer if there are no residuals.
8178f7
+
8178f7
+     Second is unrolled allocation/probes which we use if there's just
8178f7
+     a few of them.  It needs to save the original stack pointer into a
8178f7
+     temporary for use as a source register in the allocation/probe.
8178f7
+
8178f7
+     Last is a loop.  This is the most uncommon case and least efficient.  */
8178f7
+  rtx retval = NULL;
8178f7
+  if (rounded_size == probe_interval)
8178f7
+    {
8178f7
+      retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx);
8178f7
+
8178f7
+      dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
8178f7
+    }
8178f7
+  else if (rounded_size <= 8 * probe_interval)
8178f7
+    {
8178f7
+      /* The ABI requires using the store with update insns to allocate
8178f7
+	 space and store the backchain into the stack
8178f7
+
8178f7
+	 So we save the current stack pointer into a temporary, then
8178f7
+	 emit the store-with-update insns to store the saved stack pointer
8178f7
+	 into the right location in each new page.  */
8178f7
+      for (int i = 0; i < rounded_size; i += probe_interval)
8178f7
+	{
8178f7
+	  rtx insn = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp);
8178f7
+
8178f7
+	  /* Save the first stack adjustment in RETVAL.  */
8178f7
+	  if (i == 0)
8178f7
+	    retval = insn;
8178f7
+	}
8178f7
+
8178f7
+      dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
8178f7
+    }
8178f7
+  else
8178f7
+    {
8178f7
+      /* Compute the ending address.  */
8178f7
+      rtx end_addr
8178f7
+	= copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12);
8178f7
+      rtx rs = GEN_INT (-rounded_size);
8178f7
+      rtx insn;
8178f7
+      if (add_operand (rs, Pmode))
8178f7
+	insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs));
8178f7
+      else
8178f7
+	{
8178f7
+	  emit_move_insn (end_addr, GEN_INT (-rounded_size));
8178f7
+	  insn = emit_insn (gen_add3_insn (end_addr, end_addr,
8178f7
+					   stack_pointer_rtx));
8178f7
+	  /* Describe the effect of INSN to the CFI engine.  */
8178f7
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8178f7
+			gen_rtx_SET (VOIDmode, end_addr,
8178f7
+				     gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8178f7
+						   rs)));
8178f7
+	}
8178f7
+      RTX_FRAME_RELATED_P (insn) = 1;
8178f7
+
8178f7
+      /* Emit the loop.  */
8178f7
+      if (TARGET_64BIT)
8178f7
+	retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx,
8178f7
+						     stack_pointer_rtx, orig_sp,
8178f7
+						     end_addr));
8178f7
+      else
8178f7
+	retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx,
8178f7
+						     stack_pointer_rtx, orig_sp,
8178f7
+						     end_addr));
8178f7
+      RTX_FRAME_RELATED_P (retval) = 1;
8178f7
+      /* Describe the effect of INSN to the CFI engine.  */
8178f7
+      add_reg_note (retval, REG_FRAME_RELATED_EXPR,
8178f7
+		    gen_rtx_SET (VOIDmode, stack_pointer_rtx, end_addr));
8178f7
+
8178f7
+      /* Emit a blockage to ensure the allocation/probing insns are
8178f7
+	 not optimized, combined, removed, etc.  Other cases handle this
8178f7
+	 within their call to rs6000_emit_allocate_stack_1.  */
8178f7
+      emit_insn (gen_blockage ());
8178f7
+
8178f7
+      dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size);
8178f7
+    }
8178f7
+
8178f7
+  if (orig_size != rounded_size)
8178f7
+    {
8178f7
+      /* Allocate (and implicitly probe) any residual space.   */
8178f7
+      HOST_WIDE_INT residual = orig_size - rounded_size;
8178f7
+
8178f7
+      rtx insn = rs6000_emit_allocate_stack_1 (residual, orig_sp);
8178f7
+
8178f7
+      /* If the residual was the only allocation, then we can return the
8178f7
+	 allocating insn.  */
8178f7
+      if (!retval)
8178f7
+	retval = insn;
8178f7
+    }
8178f7
+
8178f7
+  return retval;
8178f7
+}
8178f7
+
8178f7
 /* Emit the correct code for allocating stack space, as insns.
8178f7
    If COPY_REG, make sure a copy of the old frame is left there.
8178f7
    The generated code may use hard register 0 as a temporary.  */
8178f7
@@ -22331,7 +22545,6 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
8178f7
   rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
8178f7
   rtx tmp_reg = gen_rtx_REG (Pmode, 0);
8178f7
   rtx todec = gen_int_mode (-size, Pmode);
8178f7
-  rtx par, set, mem;
8178f7
 
8178f7
   if (INTVAL (todec) != -size)
8178f7
     {
8178f7
@@ -22368,6 +22581,22 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
8178f7
 	warning (0, "stack limit expression is not supported");
8178f7
     }
8178f7
 
8178f7
+  if (flag_stack_clash_protection)
8178f7
+    {
8178f7
+      if (size < get_stack_clash_protection_guard_size ())
8178f7
+	dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8178f7
+      else
8178f7
+	{
8178f7
+	  rtx insn = rs6000_emit_probe_stack_range_stack_clash (size, copy_reg);
8178f7
+
8178f7
+	  /* If we asked for a copy with an offset, then we still need add in
8178f7
+	     the offset.  */
8178f7
+	  if (copy_reg && copy_off)
8178f7
+	    emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off)));
8178f7
+	  return;
8178f7
+	}
8178f7
+    }
8178f7
+
8178f7
   if (copy_reg)
8178f7
     {
8178f7
       if (copy_off != 0)
8178f7
@@ -22376,39 +22605,12 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
8178f7
 	emit_move_insn (copy_reg, stack_reg);
8178f7
     }
8178f7
 
8178f7
-  if (size > 32767)
8178f7
-    {
8178f7
-      /* Need a note here so that try_split doesn't get confused.  */
8178f7
-      if (get_last_insn () == NULL_RTX)
8178f7
-	emit_note (NOTE_INSN_DELETED);
8178f7
-      insn = emit_move_insn (tmp_reg, todec);
8178f7
-      try_split (PATTERN (insn), insn, 0);
8178f7
-      todec = tmp_reg;
8178f7
-    }
8178f7
-  
8178f7
-  insn = emit_insn (TARGET_32BIT
8178f7
-		    ? gen_movsi_update_stack (stack_reg, stack_reg,
8178f7
-					todec, stack_reg)
8178f7
-		    : gen_movdi_di_update_stack (stack_reg, stack_reg,
8178f7
-					   todec, stack_reg));
8178f7
   /* Since we didn't use gen_frame_mem to generate the MEM, grab
8178f7
      it now and set the alias set/attributes. The above gen_*_update
8178f7
      calls will generate a PARALLEL with the MEM set being the first
8178f7
      operation. */
8178f7
-  par = PATTERN (insn);
8178f7
-  gcc_assert (GET_CODE (par) == PARALLEL);
8178f7
-  set = XVECEXP (par, 0, 0);
8178f7
-  gcc_assert (GET_CODE (set) == SET);
8178f7
-  mem = SET_DEST (set);
8178f7
-  gcc_assert (MEM_P (mem));
8178f7
-  MEM_NOTRAP_P (mem) = 1;
8178f7
-  set_mem_alias_set (mem, get_frame_alias_set ());
8178f7
-
8178f7
-  RTX_FRAME_RELATED_P (insn) = 1;
8178f7
-  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8178f7
-		gen_rtx_SET (VOIDmode, stack_reg,
8178f7
-			     gen_rtx_PLUS (Pmode, stack_reg,
8178f7
-					   GEN_INT (-size))));
8178f7
+  insn = rs6000_emit_allocate_stack_1 (size, stack_reg);
8178f7
+  return;
8178f7
 }
8178f7
 
8178f7
 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
8178f7
@@ -22490,9 +22692,9 @@ rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
8178f7
 	 until it is equal to ROUNDED_SIZE.  */
8178f7
 
8178f7
       if (TARGET_64BIT)
8178f7
-	emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
8178f7
+	emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0));
8178f7
       else
8178f7
-	emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
8178f7
+	emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0));
8178f7
 
8178f7
 
8178f7
       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8178f7
@@ -22504,10 +22706,10 @@ rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
8178f7
 }
8178f7
 
8178f7
 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
8178f7
-   absolute addresses.  */
8178f7
+   addresses, not offsets.  */
8178f7
 
8178f7
-const char *
8178f7
-output_probe_stack_range (rtx reg1, rtx reg2)
8178f7
+static const char *
8178f7
+output_probe_stack_range_1 (rtx reg1, rtx reg2)
8178f7
 {
8178f7
   static int labelno = 0;
8178f7
   char loop_lab[32], end_lab[32];
8178f7
@@ -22546,6 +22748,63 @@ output_probe_stack_range (rtx reg1, rtx reg2)
8178f7
   return "";
8178f7
 }
8178f7
 
8178f7
+/* Probe a range of stack addresses from REG1 to REG3 inclusive.  These are
8178f7
+   addresses, not offsets.
8178f7
+
8178f7
+   REG2 contains the backchain that must be stored into *sp at each allocation.
8178f7
+
8178f7
+   This is subtly different than the Ada probing above in that it tries hard
8178f7
+   to prevent attacks that jump the stack guard.  Thus, it is never allowed
8178f7
+   to allocate more than PROBE_INTERVAL bytes of stack space without a
8178f7
+   suitable probe.  */
8178f7
+
8178f7
+static const char *
8178f7
+output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3)
8178f7
+{
8178f7
+  static int labelno = 0;
8178f7
+  char loop_lab[32];
8178f7
+  rtx xops[3];
8178f7
+
8178f7
+  HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
8178f7
+
8178f7
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8178f7
+
8178f7
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8178f7
+
8178f7
+  /* This allocates and probes.  */
8178f7
+  xops[0] = reg1;
8178f7
+  xops[1] = reg2;
8178f7
+  xops[2] = GEN_INT (-probe_interval);
8178f7
+  if (TARGET_64BIT)
8178f7
+    output_asm_insn ("stdu %1,%2(%0)", xops);
8178f7
+  else
8178f7
+    output_asm_insn ("stwu %1,%2(%0)", xops);
8178f7
+
8178f7
+  /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR.  */
8178f7
+  xops[0] = reg1;
8178f7
+  xops[1] = reg3;
8178f7
+  if (TARGET_64BIT)
8178f7
+    output_asm_insn ("cmpd 0,%0,%1", xops);
8178f7
+  else
8178f7
+    output_asm_insn ("cmpw 0,%0,%1", xops);
8178f7
+
8178f7
+  fputs ("\tbne 0,", asm_out_file);
8178f7
+  assemble_name_raw (asm_out_file, loop_lab);
8178f7
+  fputc ('\n', asm_out_file);
8178f7
+
8178f7
+  return "";
8178f7
+}
8178f7
+
8178f7
+/* Wrapper around the output_probe_stack_range routines.  */
8178f7
+const char *
8178f7
+output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
8178f7
+{
8178f7
+  if (flag_stack_clash_protection)
8178f7
+    return output_probe_stack_range_stack_clash (reg1, reg2, reg3);
8178f7
+  else
8178f7
+    return output_probe_stack_range_1 (reg1, reg3);
8178f7
+}
8178f7
+
8178f7
 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
8178f7
    with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
8178f7
    is not NULL.  It would be nice if dwarf2out_frame_debug_expr could
8178f7
@@ -23857,6 +24116,13 @@ rs6000_emit_prologue (void)
8178f7
 	  }
8178f7
     }
8178f7
 
8178f7
+  /* If we are emitting stack probes, but allocate no stack, then
8178f7
+     just note that in the dump file.  */
8178f7
+  if (flag_stack_clash_protection
8178f7
+      && dump_file
8178f7
+      && !info->push_p)
8178f7
+    dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8178f7
+
8178f7
   /* Update stack and set back pointer unless this is V.4,
8178f7
      for which it was done previously.  */
8178f7
   if (!WORLD_SAVE_P (info) && info->push_p
8178f7
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
8178f7
index cd197213480..3cd70e592c1 100644
8178f7
--- a/gcc/config/rs6000/rs6000.md
8178f7
+++ b/gcc/config/rs6000/rs6000.md
8178f7
@@ -11822,10 +11822,20 @@
8178f7
 ;;
8178f7
 ;; First, an insn to allocate new stack space for dynamic use (e.g., alloca).
8178f7
 ;; We move the back-chain and decrement the stack pointer.
8178f7
-
8178f7
+;;
8178f7
+;; Operand1 is more naturally reg_or_short_operand.  However, for a large
8178f7
+;; constant alloca, using that predicate will force the generic code to put
8178f7
+;; the constant size into a register before calling the expander.
8178f7
+;;
8178f7
+;; As a result the expander would not have the constant size information
8178f7
+;; in those cases and would have to generate less efficient code.
8178f7
+;;
8178f7
+;; Thus we allow reg_or_cint_operand instead so that the expander can see
8178f7
+;; the constant size.  The value is forced into a register if necessary.
8178f7
+;;
8178f7
 (define_expand "allocate_stack"
8178f7
   [(set (match_operand 0 "gpc_reg_operand" "")
8178f7
-	(minus (reg 1) (match_operand 1 "reg_or_short_operand" "")))
8178f7
+	(minus (reg 1) (match_operand 1 "reg_or_cint_operand" "")))
8178f7
    (set (reg 1)
8178f7
 	(minus (reg 1) (match_dup 1)))]
8178f7
   ""
8178f7
@@ -11835,6 +11845,15 @@
8178f7
   rtx neg_op0;
8178f7
   rtx insn, par, set, mem;
8178f7
 
8178f7
+  /* By allowing reg_or_cint_operand as the predicate we can get
8178f7
+     better code for stack-clash-protection because we do not lose
8178f7
+     size information.  But the rest of the code expects the operand
8178f7
+     to be reg_or_short_operand.  If it isn't, then force it into
8178f7
+     a register.  */
8178f7
+  rtx orig_op1 = operands[1];
8178f7
+  if (!reg_or_short_operand (operands[1], Pmode))
8178f7
+    operands[1] = force_reg (Pmode, operands[1]);
8178f7
+
8178f7
   emit_move_insn (chain, stack_bot);
8178f7
 
8178f7
   /* Check stack bounds if necessary.  */
8178f7
@@ -11847,6 +11866,51 @@
8178f7
       emit_insn (gen_cond_trap (LTU, available, operands[1], const0_rtx));
8178f7
     }
8178f7
 
8178f7
+  /* Allocate and probe if requested.
8178f7
+     This may look similar to the loop we use for prologue allocations,
8178f7
+     but it is critically different.  For the former we know the loop
8178f7
+     will iterate, but do not know that generally here.  The former
8178f7
+     uses that knowledge to rotate the loop.  Combining them would be
8178f7
+     possible with some performance cost.  */
8178f7
+  if (flag_stack_clash_protection)
8178f7
+    {
8178f7
+      rtx rounded_size, last_addr, residual;
8178f7
+      HOST_WIDE_INT probe_interval;
8178f7
+      compute_stack_clash_protection_loop_data (&rounded_size, &last_addr,
8178f7
+						&residual, &probe_interval,
8178f7
+						orig_op1);
8178f7
+      
8178f7
+      /* We do occasionally get in here with constant sizes, we might
8178f7
+	 as well do a reasonable job when we obviously can.  */
8178f7
+      if (rounded_size != const0_rtx)
8178f7
+	{
8178f7
+	  rtx loop_lab, end_loop;
8178f7
+	  bool rotated = CONST_INT_P (rounded_size);
8178f7
+
8178f7
+	  emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop,
8178f7
+							last_addr, rotated);
8178f7
+
8178f7
+	  if (Pmode == SImode)
8178f7
+	    emit_insn (gen_movsi_update_stack (stack_pointer_rtx,
8178f7
+					       stack_pointer_rtx,
8178f7
+					       GEN_INT (-probe_interval),
8178f7
+					       chain));
8178f7
+	  else
8178f7
+	    emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx,
8178f7
+					          stack_pointer_rtx,
8178f7
+					          GEN_INT (-probe_interval),
8178f7
+					          chain));
8178f7
+	  emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop,
8178f7
+						      last_addr, rotated);
8178f7
+	}
8178f7
+
8178f7
+      /* Now handle residuals.  We just have to set operands[1] correctly
8178f7
+	 and let the rest of the expander run.  */
8178f7
+      operands[1] = residual;
8178f7
+      if (!CONST_INT_P (residual))
8178f7
+	operands[1] = force_reg (Pmode, operands[1]);
8178f7
+    }
8178f7
+
8178f7
   if (GET_CODE (operands[1]) != CONST_INT
8178f7
       || INTVAL (operands[1]) < -32767
8178f7
       || INTVAL (operands[1]) > 32768)
8178f7
@@ -12994,12 +13058,13 @@
8178f7
    (set_attr "length" "4")])
8178f7
 
8178f7
 (define_insn "probe_stack_range<P:mode>"
8178f7
-  [(set (match_operand:P 0 "register_operand" "=r")
8178f7
+  [(set (match_operand:P 0 "register_operand" "=&r")
8178f7
 	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
8178f7
-			    (match_operand:P 2 "register_operand" "r")]
8178f7
+			    (match_operand:P 2 "register_operand" "r")
8178f7
+			    (match_operand:P 3 "register_operand" "r")]
8178f7
 			   UNSPECV_PROBE_STACK_RANGE))]
8178f7
   ""
8178f7
-  "* return output_probe_stack_range (operands[0], operands[2]);"
8178f7
+  "* return output_probe_stack_range (operands[0], operands[2], operands[3]);"
8178f7
   [(set_attr "type" "three")])
8178f7
 
8178f7
 ;; Compare insns are next.  Note that the RS/6000 has two types of compares,
8178f7
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
8178f7
index 7c126e4122b..aba99513ed0 100644
8178f7
--- a/gcc/testsuite/lib/target-supports.exp
8178f7
+++ b/gcc/testsuite/lib/target-supports.exp
8178f7
@@ -5421,12 +5421,12 @@ proc check_effective_target_autoincdec { } {
8178f7
 proc check_effective_target_supports_stack_clash_protection { } {
8178f7
 
8178f7
    # Temporary until the target bits are fully ACK'd.
8178f7
-#  if { [istarget aarch*-*-*]
8178f7
-#       || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } {
8178f7
+#  if { [istarget aarch*-*-*] } {
8178f7
 #	return 1
8178f7
 #  }
8178f7
 
8178f7
     if { [istarget x86_64-*-*] || [istarget i?86-*-*] 
8178f7
+	  || [istarget powerpc*-*-*] || [istarget rs6000*-*-*]
8178f7
 	  || [istarget s390*-*-*] } {
8178f7
 	return 1
8178f7
     }