Blame SOURCES/gcc48-rh1469697-14.patch

22033d
commit 21397732bbcef3347c0d5ff8a0ee5163e803e2fb
22033d
Author: Jeff Law <law@redhat.com>
22033d
Date:   Mon Oct 2 12:30:26 2017 -0600
22033d
22033d
    Dependencies for aarch64 work
22033d
22033d
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
22033d
index 07ff7031b35..91dd5b7fc02 100644
22033d
--- a/gcc/config/aarch64/aarch64-protos.h
22033d
+++ b/gcc/config/aarch64/aarch64-protos.h
22033d
@@ -181,6 +181,7 @@ unsigned aarch64_dbx_register_number (unsigned);
22033d
 unsigned aarch64_trampoline_size (void);
22033d
 void aarch64_asm_output_labelref (FILE *, const char *);
22033d
 void aarch64_elf_asm_named_section (const char *, unsigned, tree);
22033d
+const char * aarch64_output_probe_stack_range (rtx, rtx);
22033d
 void aarch64_expand_epilogue (bool);
22033d
 void aarch64_expand_mov_immediate (rtx, rtx);
22033d
 void aarch64_expand_prologue (void);
22033d
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
22033d
index 5afc167d569..cadf193cfcf 100644
22033d
--- a/gcc/config/aarch64/aarch64.c
22033d
+++ b/gcc/config/aarch64/aarch64.c
22033d
@@ -969,6 +969,199 @@ aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
22033d
   return true;
22033d
 }
22033d
 
22033d
+static int
22033d
+aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
22033d
+				enum machine_mode mode)
22033d
+{
22033d
+  int i;
22033d
+  unsigned HOST_WIDE_INT val, val2, mask;
22033d
+  int one_match, zero_match;
22033d
+  int num_insns;
22033d
+
22033d
+  val = INTVAL (imm);
22033d
+
22033d
+  if (aarch64_move_imm (val, mode))
22033d
+    {
22033d
+      if (generate)
22033d
+	emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
22033d
+      return 1;
22033d
+    }
22033d
+
22033d
+  /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
22033d
+     (with XXXX non-zero). In that case check to see if the move can be done in
22033d
+     a smaller mode.  */
22033d
+  val2 = val & 0xffffffff;
22033d
+  if (mode == DImode
22033d
+      && aarch64_move_imm (val2, SImode)
22033d
+      && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
22033d
+    {
22033d
+      if (generate)
22033d
+	emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val2)));
22033d
+
22033d
+      /* Check if we have to emit a second instruction by checking to see
22033d
+         if any of the upper 32 bits of the original DI mode value is set.  */
22033d
+      if (val == val2)
22033d
+	return 1;
22033d
+
22033d
+      i = (val >> 48) ? 48 : 32;
22033d
+
22033d
+      if (generate)
22033d
+	 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
22033d
+				    GEN_INT ((val >> i) & 0xffff)));
22033d
+
22033d
+      return 2;
22033d
+    }
22033d
+
22033d
+  if ((val >> 32) == 0 || mode == SImode)
22033d
+    {
22033d
+      if (generate)
22033d
+	{
22033d
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val & 0xffff)));
22033d
+	  if (mode == SImode)
22033d
+	    emit_insn (gen_insv_immsi (dest, GEN_INT (16),
22033d
+				       GEN_INT ((val >> 16) & 0xffff)));
22033d
+	  else
22033d
+	    emit_insn (gen_insv_immdi (dest, GEN_INT (16),
22033d
+				       GEN_INT ((val >> 16) & 0xffff)));
22033d
+	}
22033d
+      return 2;
22033d
+    }
22033d
+
22033d
+  /* Remaining cases are all for DImode.  */
22033d
+
22033d
+  mask = 0xffff;
22033d
+  zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
22033d
+    ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
22033d
+  one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
22033d
+    ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
22033d
+
22033d
+  if (zero_match != 2 && one_match != 2)
22033d
+    {
22033d
+      /* Try emitting a bitmask immediate with a movk replacing 16 bits.
22033d
+	 For a 64-bit bitmask try whether changing 16 bits to all ones or
22033d
+	 zeroes creates a valid bitmask.  To check any repeated bitmask,
22033d
+	 try using 16 bits from the other 32-bit half of val.  */
22033d
+
22033d
+      for (i = 0; i < 64; i += 16, mask <<= 16)
22033d
+	{
22033d
+	  val2 = val & ~mask;
22033d
+	  if (val2 != val && aarch64_bitmask_imm (val2, mode))
22033d
+	    break;
22033d
+	  val2 = val | mask;
22033d
+	  if (val2 != val && aarch64_bitmask_imm (val2, mode))
22033d
+	    break;
22033d
+	  val2 = val2 & ~mask;
22033d
+	  val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
22033d
+	  if (val2 != val && aarch64_bitmask_imm (val2, mode))
22033d
+	    break;
22033d
+	}
22033d
+      if (i != 64)
22033d
+	{
22033d
+	  if (generate)
22033d
+	    {
22033d
+	      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val2)));
22033d
+	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
22033d
+					 GEN_INT ((val >> i) & 0xffff)));
22033d
+	    }
22033d
+	  return 2;
22033d
+	}
22033d
+    }
22033d
+
22033d
+  /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
22033d
+     are emitted by the initial mov.  If one_match > zero_match, skip set bits,
22033d
+     otherwise skip zero bits.  */
22033d
+
22033d
+  num_insns = 1;
22033d
+  mask = 0xffff;
22033d
+  val2 = one_match > zero_match ? ~val : val;
22033d
+  i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
22033d
+
22033d
+  if (generate)
22033d
+    emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (one_match > zero_match
22033d
+					   ? (val | ~(mask << i))
22033d
+					   : (val & (mask << i)))));
22033d
+  for (i += 16; i < 64; i += 16)
22033d
+    {
22033d
+      if ((val2 & (mask << i)) == 0)
22033d
+	continue;
22033d
+      if (generate)
22033d
+	emit_insn (gen_insv_immdi (dest, GEN_INT (i),
22033d
+				   GEN_INT ((val >> i) & 0xffff)));
22033d
+      num_insns ++;
22033d
+    }
22033d
+
22033d
+  return num_insns;
22033d
+}
22033d
+
22033d
+/* Add DELTA to REGNUM in mode MODE.  SCRATCHREG can be used to hold a
22033d
+   temporary value if necessary.  FRAME_RELATED_P should be true if
22033d
+   the RTX_FRAME_RELATED flag should be set and CFA adjustments added
22033d
+   to the generated instructions.  If SCRATCHREG is known to hold
22033d
+   abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
22033d
+   immediate again.
22033d
+
22033d
+   Since this function may be used to adjust the stack pointer, we must
22033d
+   ensure that it cannot cause transient stack deallocation (for example
22033d
+   by first incrementing SP and then decrementing when adjusting by a
22033d
+   large immediate).  */
22033d
+
22033d
+static void
22033d
+aarch64_add_constant_internal (enum machine_mode mode, int regnum,
22033d
+			       int scratchreg, HOST_WIDE_INT delta,
22033d
+			       bool frame_related_p, bool emit_move_imm)
22033d
+{
22033d
+  HOST_WIDE_INT mdelta = abs_hwi (delta);
22033d
+  rtx this_rtx = gen_rtx_REG (mode, regnum);
22033d
+  rtx insn;
22033d
+
22033d
+  if (!mdelta)
22033d
+    return;
22033d
+
22033d
+  /* Single instruction adjustment.  */
22033d
+  if (aarch64_uimm12_shift (mdelta))
22033d
+    {
22033d
+      insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
22033d
+      RTX_FRAME_RELATED_P (insn) = frame_related_p;
22033d
+      return;
22033d
+    }
22033d
+
22033d
+  /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
22033d
+     Only do this if mdelta is not a 16-bit move as adjusting using a move
22033d
+     is better.  */
22033d
+  if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
22033d
+    {
22033d
+      HOST_WIDE_INT low_off = mdelta & 0xfff;
22033d
+
22033d
+      low_off = delta < 0 ? -low_off : low_off;
22033d
+      insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
22033d
+      RTX_FRAME_RELATED_P (insn) = frame_related_p;
22033d
+      insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
22033d
+      RTX_FRAME_RELATED_P (insn) = frame_related_p;
22033d
+      return;
22033d
+    }
22033d
+
22033d
+  /* Emit a move immediate if required and an addition/subtraction.  */
22033d
+  rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
22033d
+  if (emit_move_imm)
22033d
+    aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
22033d
+  insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
22033d
+			      : gen_add2_insn (this_rtx, scratch_rtx));
22033d
+  if (frame_related_p)
22033d
+    {
22033d
+      RTX_FRAME_RELATED_P (insn) = frame_related_p;
22033d
+      rtx adj = plus_constant (mode, this_rtx, delta);
22033d
+      add_reg_note (insn , REG_CFA_ADJUST_CFA,
22033d
+		    gen_rtx_SET (VOIDmode, this_rtx, adj));
22033d
+    }
22033d
+}
22033d
+
22033d
+static inline void
22033d
+aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
22033d
+{
22033d
+  aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
22033d
+				 frame_related_p, true);
22033d
+}
22033d
+
22033d
 /* Implement TARGET_PASS_BY_REFERENCE.  */
22033d
 
22033d
 static bool
22033d
@@ -1476,6 +1669,47 @@ aarch64_libgcc_cmp_return_mode (void)
22033d
   return SImode;
22033d
 }
22033d
 
22033d
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22033d
+
22033d
+/* We use the 12-bit shifted immediate arithmetic instructions so values
22033d
+   must be multiple of (1 << 12), i.e. 4096.  */
22033d
+#define ARITH_FACTOR 4096
22033d
+
22033d
+/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
22033d
+   absolute addresses.  */
22033d
+
22033d
+const char *
22033d
+aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
22033d
+{
22033d
+  static int labelno = 0;
22033d
+  char loop_lab[32];
22033d
+  rtx xops[2];
22033d
+
22033d
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
22033d
+
22033d
+  /* Loop.  */
22033d
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22033d
+
22033d
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
22033d
+  xops[0] = reg1;
22033d
+  xops[1] = GEN_INT (PROBE_INTERVAL);
22033d
+  output_asm_insn ("sub\t%0, %0, %1", xops);
22033d
+
22033d
+  /* Probe at TEST_ADDR.  */
22033d
+  output_asm_insn ("str\txzr, [%0]", xops);
22033d
+
22033d
+  /* Test if TEST_ADDR == LAST_ADDR.  */
22033d
+  xops[1] = reg2;
22033d
+  output_asm_insn ("cmp\t%0, %1", xops);
22033d
+
22033d
+  /* Branch.  */
22033d
+  fputs ("\tb.ne\t", asm_out_file);
22033d
+  assemble_name_raw (asm_out_file, loop_lab);
22033d
+  fputc ('\n', asm_out_file);
22033d
+
22033d
+  return "";
22033d
+}
22033d
+
22033d
 static bool
22033d
 aarch64_frame_pointer_required (void)
22033d
 {
22033d
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
22033d
index 91299901bbf..17082486ac8 100644
22033d
--- a/gcc/config/aarch64/aarch64.md
22033d
+++ b/gcc/config/aarch64/aarch64.md
22033d
@@ -88,6 +88,7 @@
22033d
     UNSPEC_ST4
22033d
     UNSPEC_TLS
22033d
     UNSPEC_TLSDESC
22033d
+    UNSPECV_PROBE_STACK_RANGE   ; Represent stack range probing.
22033d
     UNSPEC_VSTRUCTDUMMY
22033d
 ])
22033d
 
22033d
@@ -3399,6 +3400,18 @@
22033d
   [(set_attr "length" "0")]
22033d
 )
22033d
 
22033d
+(define_insn "probe_stack_range"
22033d
+  [(set (match_operand:DI 0 "register_operand" "=r")
22033d
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
22033d
+			     (match_operand:DI 2 "register_operand" "r")]
22033d
+			      UNSPECV_PROBE_STACK_RANGE))]
22033d
+  ""
22033d
+{
22033d
+  return aarch64_output_probe_stack_range (operands[0], operands[2]);
22033d
+}
22033d
+  [(set_attr "length" "32")]
22033d
+)
22033d
+
22033d
 ;; Named pattern for expanding thread pointer reference.
22033d
 (define_expand "get_thread_pointerdi"
22033d
   [(match_operand:DI 0 "register_operand" "=r")]