|
|
a2cf7d |
commit e905212627350d54b58426214b5a54ddc852b0c9
|
|
|
a2cf7d |
Author: Paul A. Clarke <pc@us.ibm.com>
|
|
|
a2cf7d |
Date: Fri Aug 2 22:47:57 2019 -0400
|
|
|
a2cf7d |
|
|
|
a2cf7d |
[powerpc] SET_RESTORE_ROUND improvements
|
|
|
a2cf7d |
|
|
|
a2cf7d |
SET_RESTORE_ROUND uses libc_feholdsetround_ppc_ctx and
|
|
|
a2cf7d |
libc_feresetround_ppc_ctx to bracket a block of code where the floating point
|
|
|
a2cf7d |
rounding mode must be set to a certain value.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
For the *prologue*, libc_feholdsetround_ppc_ctx is used and performs:
|
|
|
a2cf7d |
1. Read/save FPSCR.
|
|
|
a2cf7d |
2. Create new value for FPSCR with new rounding mode and enables cleared.
|
|
|
a2cf7d |
3. If new value is different than current value,
|
|
|
a2cf7d |
a. If transitioning from a state where some exceptions enabled,
|
|
|
a2cf7d |
enter "ignore exceptions / non-stop" mode.
|
|
|
a2cf7d |
b. Write new value to FPSCR.
|
|
|
a2cf7d |
c. Put a mark on the wall indicating the FPSCR was changed.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
(1) uses the 'mffs' instruction. On POWER9, the lighter weight 'mffsl'
|
|
|
a2cf7d |
instruction can be used, but it doesn't return all of the bits in the FPSCR.
|
|
|
a2cf7d |
fegetenv_status uses 'mffsl' on POWER9, 'mffs' otherwise, and can thus be
|
|
|
a2cf7d |
used instead of fegetenv_register.
|
|
|
a2cf7d |
(3b) uses 'mtfsf 0b11111111' to write the entire FPSCR, so it must
|
|
|
a2cf7d |
instead use 'mtfsf 0b00000011' to write just the enables and the mode,
|
|
|
a2cf7d |
because some of the rest of the bits are not valid if 'mffsl' was used.
|
|
|
a2cf7d |
fesetenv_mode uses 'mtfsf 0b00000011' on POWER9, 'mtfsf 0b11111111'
|
|
|
a2cf7d |
otherwise.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
For the *epilogue*, libc_feresetround_ppc_ctx checks the mark on the wall, then
|
|
|
a2cf7d |
calls libc_feresetround_ppc, which just calls __libc_femergeenv_ppc with
|
|
|
a2cf7d |
parameters such that it performs:
|
|
|
a2cf7d |
1. Retreive saved value of FPSCR, saved in prologue above.
|
|
|
a2cf7d |
2. Read FPSCR.
|
|
|
a2cf7d |
3. Create new value of FPSCR where:
|
|
|
a2cf7d |
- Summary bits and exception indicators = current OR saved.
|
|
|
a2cf7d |
- Rounding mode and enables = saved.
|
|
|
a2cf7d |
- Status bits = current.
|
|
|
a2cf7d |
4. If transitioning from some exceptions enabled to none,
|
|
|
a2cf7d |
enter "ignore exceptions / non-stop" mode.
|
|
|
a2cf7d |
5. If transitioning from no exceptions enabled to some,
|
|
|
a2cf7d |
enter "catch exceptions" mode.
|
|
|
a2cf7d |
6. Write new value to FPSCR.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
The summary bits are hardwired to the exception indicators, so there is no
|
|
|
a2cf7d |
need to restore any saved summary bits.
|
|
|
a2cf7d |
The exception indicator bits, which are sticky and remain set unless
|
|
|
a2cf7d |
explicitly cleared, would only need to be restored if the code block
|
|
|
a2cf7d |
might explicitly clear any of them. This is certainly not expected.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
So, the only bits that need to be restored are the enables and the mode.
|
|
|
a2cf7d |
If it is the case that only those bits are to be restored, there is no need to
|
|
|
a2cf7d |
read the FPSCR. Steps (2) and (3) are unnecessary, and step (6) only needs to
|
|
|
a2cf7d |
write the bits being restored.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
We know we are transitioning out of "ignore exceptions" mode, so step (4) is
|
|
|
a2cf7d |
unnecessary, and in step (6), we only need to check the state we are
|
|
|
a2cf7d |
entering.
|
|
|
a2cf7d |
|
|
|
a2cf7d |
diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
|
|
|
a2cf7d |
index 945ab98018450092..b0149aa243e69f5a 100644
|
|
|
a2cf7d |
--- a/sysdeps/powerpc/fpu/fenv_private.h
|
|
|
a2cf7d |
+++ b/sysdeps/powerpc/fpu/fenv_private.h
|
|
|
a2cf7d |
@@ -132,7 +132,17 @@ libc_fesetenv_ppc (const fenv_t *envp)
|
|
|
a2cf7d |
static __always_inline void
|
|
|
a2cf7d |
libc_feresetround_ppc (fenv_t *envp)
|
|
|
a2cf7d |
{
|
|
|
a2cf7d |
- __libc_femergeenv_ppc (envp, _FPU_MASK_TRAPS_RN, _FPU_MASK_FRAC_INEX_RET_CC);
|
|
|
a2cf7d |
+ fenv_union_t new = { .fenv = *envp };
|
|
|
a2cf7d |
+
|
|
|
a2cf7d |
+ /* If the old env has no enabled exceptions and the new env has any enabled
|
|
|
a2cf7d |
+ exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
|
|
|
a2cf7d |
+ hardware into "precise mode" and may cause the FPU to run slower on some
|
|
|
a2cf7d |
+ hardware. */
|
|
|
a2cf7d |
+ if ((new.l & _FPU_ALL_TRAPS) != 0)
|
|
|
a2cf7d |
+ (void) __fe_nomask_env_priv ();
|
|
|
a2cf7d |
+
|
|
|
a2cf7d |
+ /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
|
|
|
a2cf7d |
+ fesetenv_mode (new.fenv);
|
|
|
a2cf7d |
}
|
|
|
a2cf7d |
|
|
|
a2cf7d |
static __always_inline int
|
|
|
a2cf7d |
@@ -176,9 +186,30 @@ libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
|
|
|
a2cf7d |
{
|
|
|
a2cf7d |
fenv_union_t old, new;
|
|
|
a2cf7d |
|
|
|
a2cf7d |
+ old.fenv = fegetenv_status ();
|
|
|
a2cf7d |
+
|
|
|
a2cf7d |
+ new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
|
|
|
a2cf7d |
+
|
|
|
a2cf7d |
+ ctx->env = old.fenv;
|
|
|
a2cf7d |
+ if (__glibc_unlikely (new.l != old.l))
|
|
|
a2cf7d |
+ {
|
|
|
a2cf7d |
+ if ((old.l & _FPU_ALL_TRAPS) != 0)
|
|
|
a2cf7d |
+ (void) __fe_mask_env ();
|
|
|
a2cf7d |
+ fesetenv_mode (new.fenv);
|
|
|
a2cf7d |
+ ctx->updated_status = true;
|
|
|
a2cf7d |
+ }
|
|
|
a2cf7d |
+ else
|
|
|
a2cf7d |
+ ctx->updated_status = false;
|
|
|
a2cf7d |
+}
|
|
|
a2cf7d |
+
|
|
|
a2cf7d |
+static __always_inline void
|
|
|
a2cf7d |
+libc_feholdsetround_noex_ppc_ctx (struct rm_ctx *ctx, int r)
|
|
|
a2cf7d |
+{
|
|
|
a2cf7d |
+ fenv_union_t old, new;
|
|
|
a2cf7d |
+
|
|
|
a2cf7d |
old.fenv = fegetenv_register ();
|
|
|
a2cf7d |
|
|
|
a2cf7d |
- new.l = (old.l & _FPU_MASK_TRAPS_RN) | r;
|
|
|
a2cf7d |
+ new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
|
|
|
a2cf7d |
|
|
|
a2cf7d |
ctx->env = old.fenv;
|
|
|
a2cf7d |
if (__glibc_unlikely (new.l != old.l))
|
|
|
a2cf7d |
@@ -218,6 +249,9 @@ libc_feresetround_ppc_ctx (struct rm_ctx *ctx)
|
|
|
a2cf7d |
#define libc_feholdsetround_ctx libc_feholdsetround_ppc_ctx
|
|
|
a2cf7d |
#define libc_feholdsetroundf_ctx libc_feholdsetround_ppc_ctx
|
|
|
a2cf7d |
#define libc_feholdsetroundl_ctx libc_feholdsetround_ppc_ctx
|
|
|
a2cf7d |
+#define libc_feholdsetround_noex_ctx libc_feholdsetround_noex_ppc_ctx
|
|
|
a2cf7d |
+#define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ppc_ctx
|
|
|
a2cf7d |
+#define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ppc_ctx
|
|
|
a2cf7d |
#define libc_feresetround_ctx libc_feresetround_ppc_ctx
|
|
|
a2cf7d |
#define libc_feresetroundf_ctx libc_feresetround_ppc_ctx
|
|
|
a2cf7d |
#define libc_feresetroundl_ctx libc_feresetround_ppc_ctx
|