|
|
dfa500 |
commit e905212627350d54b58426214b5a54ddc852b0c9
|
|
|
dfa500 |
Author: Paul A. Clarke <pc@us.ibm.com>
|
|
|
dfa500 |
Date: Fri Aug 2 22:47:57 2019 -0400
|
|
|
dfa500 |
|
|
|
dfa500 |
[powerpc] SET_RESTORE_ROUND improvements
|
|
|
dfa500 |
|
|
|
dfa500 |
SET_RESTORE_ROUND uses libc_feholdsetround_ppc_ctx and
|
|
|
dfa500 |
libc_feresetround_ppc_ctx to bracket a block of code where the floating point
|
|
|
dfa500 |
rounding mode must be set to a certain value.
|
|
|
dfa500 |
|
|
|
dfa500 |
For the *prologue*, libc_feholdsetround_ppc_ctx is used and performs:
|
|
|
dfa500 |
1. Read/save FPSCR.
|
|
|
dfa500 |
2. Create new value for FPSCR with new rounding mode and enables cleared.
|
|
|
dfa500 |
3. If new value is different than current value,
|
|
|
dfa500 |
a. If transitioning from a state where some exceptions enabled,
|
|
|
dfa500 |
enter "ignore exceptions / non-stop" mode.
|
|
|
dfa500 |
b. Write new value to FPSCR.
|
|
|
dfa500 |
c. Put a mark on the wall indicating the FPSCR was changed.
|
|
|
dfa500 |
|
|
|
dfa500 |
(1) uses the 'mffs' instruction. On POWER9, the lighter weight 'mffsl'
|
|
|
dfa500 |
instruction can be used, but it doesn't return all of the bits in the FPSCR.
|
|
|
dfa500 |
fegetenv_status uses 'mffsl' on POWER9, 'mffs' otherwise, and can thus be
|
|
|
dfa500 |
used instead of fegetenv_register.
|
|
|
dfa500 |
(3b) uses 'mtfsf 0b11111111' to write the entire FPSCR, so it must
|
|
|
dfa500 |
instead use 'mtfsf 0b00000011' to write just the enables and the mode,
|
|
|
dfa500 |
because some of the rest of the bits are not valid if 'mffsl' was used.
|
|
|
dfa500 |
fesetenv_mode uses 'mtfsf 0b00000011' on POWER9, 'mtfsf 0b11111111'
|
|
|
dfa500 |
otherwise.
|
|
|
dfa500 |
|
|
|
dfa500 |
For the *epilogue*, libc_feresetround_ppc_ctx checks the mark on the wall, then
|
|
|
dfa500 |
calls libc_feresetround_ppc, which just calls __libc_femergeenv_ppc with
|
|
|
dfa500 |
parameters such that it performs:
|
|
|
dfa500 |
1. Retreive saved value of FPSCR, saved in prologue above.
|
|
|
dfa500 |
2. Read FPSCR.
|
|
|
dfa500 |
3. Create new value of FPSCR where:
|
|
|
dfa500 |
- Summary bits and exception indicators = current OR saved.
|
|
|
dfa500 |
- Rounding mode and enables = saved.
|
|
|
dfa500 |
- Status bits = current.
|
|
|
dfa500 |
4. If transitioning from some exceptions enabled to none,
|
|
|
dfa500 |
enter "ignore exceptions / non-stop" mode.
|
|
|
dfa500 |
5. If transitioning from no exceptions enabled to some,
|
|
|
dfa500 |
enter "catch exceptions" mode.
|
|
|
dfa500 |
6. Write new value to FPSCR.
|
|
|
dfa500 |
|
|
|
dfa500 |
The summary bits are hardwired to the exception indicators, so there is no
|
|
|
dfa500 |
need to restore any saved summary bits.
|
|
|
dfa500 |
The exception indicator bits, which are sticky and remain set unless
|
|
|
dfa500 |
explicitly cleared, would only need to be restored if the code block
|
|
|
dfa500 |
might explicitly clear any of them. This is certainly not expected.
|
|
|
dfa500 |
|
|
|
dfa500 |
So, the only bits that need to be restored are the enables and the mode.
|
|
|
dfa500 |
If it is the case that only those bits are to be restored, there is no need to
|
|
|
dfa500 |
read the FPSCR. Steps (2) and (3) are unnecessary, and step (6) only needs to
|
|
|
dfa500 |
write the bits being restored.
|
|
|
dfa500 |
|
|
|
dfa500 |
We know we are transitioning out of "ignore exceptions" mode, so step (4) is
|
|
|
dfa500 |
unnecessary, and in step (6), we only need to check the state we are
|
|
|
dfa500 |
entering.
|
|
|
dfa500 |
|
|
|
dfa500 |
diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
|
|
|
dfa500 |
index 945ab98018450092..b0149aa243e69f5a 100644
|
|
|
dfa500 |
--- a/sysdeps/powerpc/fpu/fenv_private.h
|
|
|
dfa500 |
+++ b/sysdeps/powerpc/fpu/fenv_private.h
|
|
|
dfa500 |
@@ -132,7 +132,17 @@ libc_fesetenv_ppc (const fenv_t *envp)
|
|
|
dfa500 |
static __always_inline void
|
|
|
dfa500 |
libc_feresetround_ppc (fenv_t *envp)
|
|
|
dfa500 |
{
|
|
|
dfa500 |
- __libc_femergeenv_ppc (envp, _FPU_MASK_TRAPS_RN, _FPU_MASK_FRAC_INEX_RET_CC);
|
|
|
dfa500 |
+ fenv_union_t new = { .fenv = *envp };
|
|
|
dfa500 |
+
|
|
|
dfa500 |
+ /* If the old env has no enabled exceptions and the new env has any enabled
|
|
|
dfa500 |
+ exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
|
|
|
dfa500 |
+ hardware into "precise mode" and may cause the FPU to run slower on some
|
|
|
dfa500 |
+ hardware. */
|
|
|
dfa500 |
+ if ((new.l & _FPU_ALL_TRAPS) != 0)
|
|
|
dfa500 |
+ (void) __fe_nomask_env_priv ();
|
|
|
dfa500 |
+
|
|
|
dfa500 |
+ /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
|
|
|
dfa500 |
+ fesetenv_mode (new.fenv);
|
|
|
dfa500 |
}
|
|
|
dfa500 |
|
|
|
dfa500 |
static __always_inline int
|
|
|
dfa500 |
@@ -176,9 +186,30 @@ libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
|
|
|
dfa500 |
{
|
|
|
dfa500 |
fenv_union_t old, new;
|
|
|
dfa500 |
|
|
|
dfa500 |
+ old.fenv = fegetenv_status ();
|
|
|
dfa500 |
+
|
|
|
dfa500 |
+ new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
|
|
|
dfa500 |
+
|
|
|
dfa500 |
+ ctx->env = old.fenv;
|
|
|
dfa500 |
+ if (__glibc_unlikely (new.l != old.l))
|
|
|
dfa500 |
+ {
|
|
|
dfa500 |
+ if ((old.l & _FPU_ALL_TRAPS) != 0)
|
|
|
dfa500 |
+ (void) __fe_mask_env ();
|
|
|
dfa500 |
+ fesetenv_mode (new.fenv);
|
|
|
dfa500 |
+ ctx->updated_status = true;
|
|
|
dfa500 |
+ }
|
|
|
dfa500 |
+ else
|
|
|
dfa500 |
+ ctx->updated_status = false;
|
|
|
dfa500 |
+}
|
|
|
dfa500 |
+
|
|
|
dfa500 |
+static __always_inline void
|
|
|
dfa500 |
+libc_feholdsetround_noex_ppc_ctx (struct rm_ctx *ctx, int r)
|
|
|
dfa500 |
+{
|
|
|
dfa500 |
+ fenv_union_t old, new;
|
|
|
dfa500 |
+
|
|
|
dfa500 |
old.fenv = fegetenv_register ();
|
|
|
dfa500 |
|
|
|
dfa500 |
- new.l = (old.l & _FPU_MASK_TRAPS_RN) | r;
|
|
|
dfa500 |
+ new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
|
|
|
dfa500 |
|
|
|
dfa500 |
ctx->env = old.fenv;
|
|
|
dfa500 |
if (__glibc_unlikely (new.l != old.l))
|
|
|
dfa500 |
@@ -218,6 +249,9 @@ libc_feresetround_ppc_ctx (struct rm_ctx *ctx)
|
|
|
dfa500 |
#define libc_feholdsetround_ctx libc_feholdsetround_ppc_ctx
|
|
|
dfa500 |
#define libc_feholdsetroundf_ctx libc_feholdsetround_ppc_ctx
|
|
|
dfa500 |
#define libc_feholdsetroundl_ctx libc_feholdsetround_ppc_ctx
|
|
|
dfa500 |
+#define libc_feholdsetround_noex_ctx libc_feholdsetround_noex_ppc_ctx
|
|
|
dfa500 |
+#define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ppc_ctx
|
|
|
dfa500 |
+#define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ppc_ctx
|
|
|
dfa500 |
#define libc_feresetround_ctx libc_feresetround_ppc_ctx
|
|
|
dfa500 |
#define libc_feresetroundf_ctx libc_feresetround_ppc_ctx
|
|
|
dfa500 |
#define libc_feresetroundl_ctx libc_feresetround_ppc_ctx
|