a2cf7d
commit f1c56cdff09f650ad721fae026eb6a3651631f3d
a2cf7d
Author: Paul A. Clarke <pc@us.ibm.com>
a2cf7d
Date:   Thu Sep 19 08:35:16 2019 -0500
a2cf7d
a2cf7d
    [powerpc] SET_RESTORE_ROUND optimizations and bug fix
a2cf7d
    
a2cf7d
    SET_RESTORE_ROUND brackets a block of code, temporarily setting and
a2cf7d
    restoring the rounding mode and letting everything else, including
a2cf7d
    exceptions generated within the block, pass through.
a2cf7d
    
a2cf7d
    On powerpc, the current code clears the exception enables, which will hide
a2cf7d
    exceptions generated within the block.  This issue was introduced by me
a2cf7d
    in commit e905212627350d54b58426214b5a54ddc852b0c9.
a2cf7d
    
a2cf7d
    Fix this by not clearing exception enable bits in the prologue.
a2cf7d
    
a2cf7d
    Also, since we are no longer changing the enable bits in either the
a2cf7d
    prologue or the epilogue, there is no need to test for entering/exiting
a2cf7d
    non-stop mode.
a2cf7d
    
a2cf7d
    Also, optimize the prologue get/save/set rounding mode operations for
a2cf7d
    POWER9 and later by using 'mffscrn' when possible.
a2cf7d
    
a2cf7d
    Suggested-by: Paul E. Murphy <murphyp@linux.ibm.com>
a2cf7d
    Reviewed-by: Paul E. Murphy <murphyp@linux.ibm.com>
a2cf7d
    Fixes: e905212627350d54b58426214b5a54ddc852b0c9
a2cf7d
    
a2cf7d
    2019-09-19  Paul A. Clarke  <pc@us.ibm.com>
a2cf7d
    
a2cf7d
            * sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_and_set_rn): New.
a2cf7d
            (__fe_mffscrn): New.
a2cf7d
            * sysdeps/powerpc/fpu/fenv_private.h (libc_feholdsetround_ppc_ctx):
a2cf7d
            Do not clear enable bits, remove obsolete code, use
a2cf7d
            fegetenv_and_set_rn.
a2cf7d
            (libc_feresetround_ppc): Remove obsolete code, use
a2cf7d
            fegetenv_and_set_rn.
a2cf7d
a2cf7d
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
a2cf7d
index e8d40ea256b6c5bc..b10b6a141ded4bfd 100644
a2cf7d
--- a/sysdeps/powerpc/fpu/fenv_libc.h
a2cf7d
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
a2cf7d
@@ -49,6 +49,38 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
a2cf7d
     __fr;								\
a2cf7d
   })
a2cf7d
 
a2cf7d
+#define __fe_mffscrn(rn)						\
a2cf7d
+  ({register fenv_union_t __fr;						\
a2cf7d
+    if (__builtin_constant_p (rn))					\
a2cf7d
+      __asm__ __volatile__ (						\
a2cf7d
+        ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \
a2cf7d
+        : "=f" (__fr.fenv) : "i" (rn));					\
a2cf7d
+    else								\
a2cf7d
+    {									\
a2cf7d
+      __fr.l = (rn);							\
a2cf7d
+      __asm__ __volatile__ (						\
a2cf7d
+        ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \
a2cf7d
+        : "=f" (__fr.fenv) : "f" (__fr.fenv));				\
a2cf7d
+    }									\
a2cf7d
+    __fr.fenv;								\
a2cf7d
+  })
a2cf7d
+
a2cf7d
+/* Like fegetenv_status, but also sets the rounding mode.  */
a2cf7d
+#ifdef _ARCH_PWR9
a2cf7d
+#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
a2cf7d
+#else
a2cf7d
+/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
a2cf7d
+   but not sufficient, because it does not set the rounding mode.
a2cf7d
+   Explicitly set the rounding mode when 'mffscrn' actually doesn't.  */
a2cf7d
+#define fegetenv_and_set_rn(rn)						\
a2cf7d
+  ({register fenv_union_t __fr;						\
a2cf7d
+    __fr.fenv = __fe_mffscrn (rn);					\
a2cf7d
+    if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)))	\
a2cf7d
+      __fesetround_inline (rn);						\
a2cf7d
+    __fr.fenv;								\
a2cf7d
+  })
a2cf7d
+#endif
a2cf7d
+
a2cf7d
 /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
a2cf7d
 #define fesetenv_register(env) \
a2cf7d
 	do { \
a2cf7d
diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
a2cf7d
index b0149aa243e69f5a..30df92c9a4700dee 100644
a2cf7d
--- a/sysdeps/powerpc/fpu/fenv_private.h
a2cf7d
+++ b/sysdeps/powerpc/fpu/fenv_private.h
a2cf7d
@@ -133,16 +133,7 @@ static __always_inline void
a2cf7d
 libc_feresetround_ppc (fenv_t *envp)
a2cf7d
 {
a2cf7d
   fenv_union_t new = { .fenv = *envp };
a2cf7d
-
a2cf7d
-  /* If the old env has no enabled exceptions and the new env has any enabled
a2cf7d
-     exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits.  This will put the
a2cf7d
-     hardware into "precise mode" and may cause the FPU to run slower on some
a2cf7d
-     hardware.  */
a2cf7d
-  if ((new.l & _FPU_ALL_TRAPS) != 0)
a2cf7d
-    (void) __fe_nomask_env_priv ();
a2cf7d
-
a2cf7d
-  /* Atomically enable and raise (if appropriate) exceptions set in `new'.  */
a2cf7d
-  fesetenv_mode (new.fenv);
a2cf7d
+  fegetenv_and_set_rn (new.l & FPSCR_RN_MASK);
a2cf7d
 }
a2cf7d
 
a2cf7d
 static __always_inline int
a2cf7d
@@ -184,22 +175,10 @@ libc_feupdateenv_ppc (fenv_t *e)
a2cf7d
 static __always_inline void
a2cf7d
 libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
a2cf7d
 {
a2cf7d
-  fenv_union_t old, new;
a2cf7d
+  fenv_union_t old;
a2cf7d
 
a2cf7d
-  old.fenv = fegetenv_status ();
a2cf7d
-
a2cf7d
-  new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
a2cf7d
-
a2cf7d
-  ctx->env = old.fenv;
a2cf7d
-  if (__glibc_unlikely (new.l != old.l))
a2cf7d
-    {
a2cf7d
-      if ((old.l & _FPU_ALL_TRAPS) != 0)
a2cf7d
-	(void) __fe_mask_env ();
a2cf7d
-      fesetenv_mode (new.fenv);
a2cf7d
-      ctx->updated_status = true;
a2cf7d
-    }
a2cf7d
-  else
a2cf7d
-    ctx->updated_status = false;
a2cf7d
+  ctx->env = old.fenv = fegetenv_and_set_rn (r);
a2cf7d
+  ctx->updated_status = (r != (old.l & FPSCR_RN_MASK));
a2cf7d
 }
a2cf7d
 
a2cf7d
 static __always_inline void