diff -Nrup a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
--- a/gcc/config/i386/i386.c 2018-01-03 16:10:46.278171086 -0700
+++ b/gcc/config/i386/i386.c 2018-01-03 16:12:32.022220166 -0700
@@ -9862,14 +9862,13 @@ static void
ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size)
{
struct machine_function *m = cfun->machine;
+ struct ix86_frame frame;
+ ix86_compute_frame_layout (&frame);
/* If this function does not statically allocate stack space, then
no probes are needed. */
if (!size)
{
- struct ix86_frame frame;
- ix86_compute_frame_layout (&frame);
-
/* However, the allocation of space via pushes for register
saves could be viewed as allocating space, but without the
need to probe. */
@@ -9888,21 +9887,40 @@ ix86_adjust_stack_and_probe_stack_clash
pointer could be anywhere in the guard page. The safe thing
to do is emit a probe now.
+ The probe can be avoided if we have already emitted any callee
+ register saves into the stack or have a frame pointer (which will
+ have been saved as well). Those saves will function as implicit
+ probes.
+
?!? This should be revamped to work like aarch64 and s390 where
we track the offset from the most recent probe. Normally that
offset would be zero. For a non-return function we would reset
it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
we just probe when we cross PROBE_INTERVAL. */
- if (TREE_THIS_VOLATILE (cfun->decl))
+ if (TREE_THIS_VOLATILE (cfun->decl)
+ && !(frame.nregs || frame.nsseregs || frame_pointer_needed))
+
{
/* We can safely use any register here since we're just going to push
its value and immediately pop it back. But we do try and avoid
argument passing registers so as not to introduce dependencies in
the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
- rtx insn = emit_insn (gen_push (dummy_reg));
- RTX_FRAME_RELATED_P (insn) = 1;
- ix86_emit_restore_reg_using_pop (dummy_reg);
+ rtx insn_push = emit_insn (gen_push (dummy_reg));
+ rtx insn_pop = emit_insn (gen_pop (dummy_reg));
+ m->fs.sp_offset -= UNITS_PER_WORD;
+ if (m->fs.cfa_reg == stack_pointer_rtx)
+ {
+ m->fs.cfa_offset -= UNITS_PER_WORD;
+ rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
+ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+ add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
+ RTX_FRAME_RELATED_P (insn_push) = 1;
+ x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
+ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+ add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
+ RTX_FRAME_RELATED_P (insn_pop) = 1;
+ }
emit_insn (gen_blockage ());
}
diff -Nrup a/gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-12.c b/gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-12.c
--- gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-12.c 2018-01-03 15:42:40.849530670 -0700
+++ gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-12.c 2018-01-03 15:36:12.528488596 -0700
@@ -7,7 +7,6 @@ __attribute__ ((noreturn)) void exit (in
__attribute__ ((noreturn)) void
f (void)
{
- asm volatile ("nop" ::: "edi");
exit (1);
}
diff -Nrup a/gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-17.c b/gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-17.c
--- gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-17.c 1969-12-31 17:00:00.000000000 -0700
+++ gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-17.c 2018-01-03 15:36:12.528488596 -0700
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection -mtune=generic -fomit-frame-pointer" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+
+
+int x0, x1;
+void f1 (void);
+void f2 (int, int);
+
+__attribute__ ((noreturn))
+void
+f3 (void)
+{
+ int y0 = x0;
+ int y1 = x1;
+ f1 ();
+ f2 (y0, y1);
+ while (1);
+}
+
+/* Verify no explicit probes. */
+/* { dg-final { scan-assembler-not "or\[ql\]" } } */
+
+/* We also want to verify we did not use a push/pop sequence
+ to probe *sp as the callee register saves are sufficient
+ to probe *sp.
+
+ y0/y1 are live across the call and thus must be allocated
+ into either a stack slot or callee saved register. The former
+ would be rather dumb. So assume it does not happen.
+
+ So search for two/four pushes for the callee register saves/argument
+ pushes and no pops (since the function has no reachable epilogue). */
+/* { dg-final { scan-assembler-times "push\[ql\]" 2 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "push\[ql\]" 4 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-not "pop" } } */
+