|
|
a2cf7d |
commit f1c56cdff09f650ad721fae026eb6a3651631f3d
|
|
|
a2cf7d |
Author: Paul A. Clarke <pc@us.ibm.com>
|
|
|
a2cf7d |
Date: Thu Sep 19 08:35:16 2019 -0500
|
|
|
a2cf7d |
|
|
|
a2cf7d |
[powerpc] SET_RESTORE_ROUND optimizations and bug fix
|
|
|
a2cf7d |
|
|
|
a2cf7d |
SET_RESTORE_ROUND brackets a block of code, temporarily setting and
|
|
|
a2cf7d |
restoring the rounding mode and letting everything else, including
|
|
|
a2cf7d |
exceptions generated within the block, pass through.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
On powerpc, the current code clears the exception enables, which will hide
|
|
|
a2cf7d |
exceptions generated within the block. This issue was introduced by me
|
|
|
a2cf7d |
in commit e905212627350d54b58426214b5a54ddc852b0c9.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
Fix this by not clearing exception enable bits in the prologue.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
Also, since we are no longer changing the enable bits in either the
|
|
|
a2cf7d |
prologue or the epilogue, there is no need to test for entering/exiting
|
|
|
a2cf7d |
non-stop mode.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
Also, optimize the prologue get/save/set rounding mode operations for
|
|
|
a2cf7d |
POWER9 and later by using 'mffscrn' when possible.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
Suggested-by: Paul E. Murphy <murphyp@linux.ibm.com>
|
|
|
a2cf7d |
Reviewed-by: Paul E. Murphy <murphyp@linux.ibm.com>
|
|
|
a2cf7d |
Fixes: e905212627350d54b58426214b5a54ddc852b0c9
|
|
|
a2cf7d |
|
|
|
a2cf7d |
2019-09-19 Paul A. Clarke <pc@us.ibm.com>
|
|
|
a2cf7d |
|
|
|
a2cf7d |
* sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_and_set_rn): New.
|
|
|
a2cf7d |
(__fe_mffscrn): New.
|
|
|
a2cf7d |
* sysdeps/powerpc/fpu/fenv_private.h (libc_feholdsetround_ppc_ctx):
|
|
|
a2cf7d |
Do not clear enable bits, remove obsolete code, use
|
|
|
a2cf7d |
fegetenv_and_set_rn.
|
|
|
a2cf7d |
(libc_feresetround_ppc): Remove obsolete code, use
|
|
|
a2cf7d |
fegetenv_and_set_rn.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
|
|
|
a2cf7d |
index e8d40ea256b6c5bc..b10b6a141ded4bfd 100644
|
|
|
a2cf7d |
--- a/sysdeps/powerpc/fpu/fenv_libc.h
|
|
|
a2cf7d |
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
|
|
|
a2cf7d |
@@ -49,6 +49,38 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
|
|
|
a2cf7d |
__fr; \
|
|
|
a2cf7d |
})
|
|
|
a2cf7d |
|
|
|
a2cf7d |
+#define __fe_mffscrn(rn) \
|
|
|
a2cf7d |
+ ({register fenv_union_t __fr; \
|
|
|
a2cf7d |
+ if (__builtin_constant_p (rn)) \
|
|
|
a2cf7d |
+ __asm__ __volatile__ ( \
|
|
|
a2cf7d |
+ ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \
|
|
|
a2cf7d |
+ : "=f" (__fr.fenv) : "i" (rn)); \
|
|
|
a2cf7d |
+ else \
|
|
|
a2cf7d |
+ { \
|
|
|
a2cf7d |
+ __fr.l = (rn); \
|
|
|
a2cf7d |
+ __asm__ __volatile__ ( \
|
|
|
a2cf7d |
+ ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \
|
|
|
a2cf7d |
+ : "=f" (__fr.fenv) : "f" (__fr.fenv)); \
|
|
|
a2cf7d |
+ } \
|
|
|
a2cf7d |
+ __fr.fenv; \
|
|
|
a2cf7d |
+ })
|
|
|
a2cf7d |
+
|
|
|
a2cf7d |
+/* Like fegetenv_status, but also sets the rounding mode. */
|
|
|
a2cf7d |
+#ifdef _ARCH_PWR9
|
|
|
a2cf7d |
+#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
|
|
|
a2cf7d |
+#else
|
|
|
a2cf7d |
+/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
|
|
|
a2cf7d |
+ but not sufficient, because it does not set the rounding mode.
|
|
|
a2cf7d |
+ Explicitly set the rounding mode when 'mffscrn' actually doesn't. */
|
|
|
a2cf7d |
+#define fegetenv_and_set_rn(rn) \
|
|
|
a2cf7d |
+ ({register fenv_union_t __fr; \
|
|
|
a2cf7d |
+ __fr.fenv = __fe_mffscrn (rn); \
|
|
|
a2cf7d |
+ if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))) \
|
|
|
a2cf7d |
+ __fesetround_inline (rn); \
|
|
|
a2cf7d |
+ __fr.fenv; \
|
|
|
a2cf7d |
+ })
|
|
|
a2cf7d |
+#endif
|
|
|
a2cf7d |
+
|
|
|
a2cf7d |
/* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */
|
|
|
a2cf7d |
#define fesetenv_register(env) \
|
|
|
a2cf7d |
do { \
|
|
|
a2cf7d |
diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
|
|
|
a2cf7d |
index b0149aa243e69f5a..30df92c9a4700dee 100644
|
|
|
a2cf7d |
--- a/sysdeps/powerpc/fpu/fenv_private.h
|
|
|
a2cf7d |
+++ b/sysdeps/powerpc/fpu/fenv_private.h
|
|
|
a2cf7d |
@@ -133,16 +133,7 @@ static __always_inline void
|
|
|
a2cf7d |
libc_feresetround_ppc (fenv_t *envp)
|
|
|
a2cf7d |
{
|
|
|
a2cf7d |
fenv_union_t new = { .fenv = *envp };
|
|
|
a2cf7d |
-
|
|
|
a2cf7d |
- /* If the old env has no enabled exceptions and the new env has any enabled
|
|
|
a2cf7d |
- exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
|
|
|
a2cf7d |
- hardware into "precise mode" and may cause the FPU to run slower on some
|
|
|
a2cf7d |
- hardware. */
|
|
|
a2cf7d |
- if ((new.l & _FPU_ALL_TRAPS) != 0)
|
|
|
a2cf7d |
- (void) __fe_nomask_env_priv ();
|
|
|
a2cf7d |
-
|
|
|
a2cf7d |
- /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
|
|
|
a2cf7d |
- fesetenv_mode (new.fenv);
|
|
|
a2cf7d |
+ fegetenv_and_set_rn (new.l & FPSCR_RN_MASK);
|
|
|
a2cf7d |
}
|
|
|
a2cf7d |
|
|
|
a2cf7d |
static __always_inline int
|
|
|
a2cf7d |
@@ -184,22 +175,10 @@ libc_feupdateenv_ppc (fenv_t *e)
|
|
|
a2cf7d |
static __always_inline void
|
|
|
a2cf7d |
libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
|
|
|
a2cf7d |
{
|
|
|
a2cf7d |
- fenv_union_t old, new;
|
|
|
a2cf7d |
+ fenv_union_t old;
|
|
|
a2cf7d |
|
|
|
a2cf7d |
- old.fenv = fegetenv_status ();
|
|
|
a2cf7d |
-
|
|
|
a2cf7d |
- new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
|
|
|
a2cf7d |
-
|
|
|
a2cf7d |
- ctx->env = old.fenv;
|
|
|
a2cf7d |
- if (__glibc_unlikely (new.l != old.l))
|
|
|
a2cf7d |
- {
|
|
|
a2cf7d |
- if ((old.l & _FPU_ALL_TRAPS) != 0)
|
|
|
a2cf7d |
- (void) __fe_mask_env ();
|
|
|
a2cf7d |
- fesetenv_mode (new.fenv);
|
|
|
a2cf7d |
- ctx->updated_status = true;
|
|
|
a2cf7d |
- }
|
|
|
a2cf7d |
- else
|
|
|
a2cf7d |
- ctx->updated_status = false;
|
|
|
a2cf7d |
+ ctx->env = old.fenv = fegetenv_and_set_rn (r);
|
|
|
a2cf7d |
+ ctx->updated_status = (r != (old.l & FPSCR_RN_MASK));
|
|
|
a2cf7d |
}
|
|
|
a2cf7d |
|
|
|
a2cf7d |
static __always_inline void
|