|
|
7c0489 |
commit 3c1766ea10043f2e9625f3cba3bda37c84b32cf0
|
|
|
7c0489 |
Author: Paul A. Clarke <pc@us.ibm.com>
|
|
|
7c0489 |
Date: Thu Jul 18 19:37:13 2019 -0500
|
|
|
7c0489 |
|
|
|
7c0489 |
[powerpc] fe{en,dis}ableexcept, fesetmode: optimize FPSCR accesses
|
|
|
7c0489 |
|
|
|
7c0489 |
Since fe{en,dis}ableexcept() and fesetmode() read-modify-write just the
|
|
|
7c0489 |
"mode" (exception enable and rounding mode) bits of the Floating Point Status
|
|
|
7c0489 |
Control Register (FPSCR), the lighter weight 'mffsl' instruction can be used
|
|
|
7c0489 |
to read the FPSCR (enables and rounding mode), and 'mtfsf 0b00000011' can be
|
|
|
7c0489 |
used to write just those bits back to the FPSCR. The net is better performance.
|
|
|
7c0489 |
|
|
|
7c0489 |
In addition, fe{en,dis}ableexcept() read the FPSCR again after writing it, or
|
|
|
7c0489 |
they determine that it doesn't need to be written because it is not changing.
|
|
|
7c0489 |
In either case, the local variable holds the current values of the enable
|
|
|
7c0489 |
bits in the FPSCR. This local variable can be used instead of again reading
|
|
|
7c0489 |
the FPSCR.
|
|
|
7c0489 |
|
|
|
7c0489 |
Also, that value of the FPSCR which is read the second time is validated
|
|
|
7c0489 |
against the requested enables. Since the write can't fail, this validation
|
|
|
7c0489 |
step is unnecessary, and can be removed. Instead, the exceptions to be
|
|
|
7c0489 |
enabled (or disabled) are transformed into available bits in the FPSCR,
|
|
|
7c0489 |
then validated after being transformed back, to ensure that all requested
|
|
|
7c0489 |
bits are actually being set. For example, FE_INVALID_SQRT can be
|
|
|
7c0489 |
requested, but cannot actually be set. This bit is not mapped during the
|
|
|
7c0489 |
transformations, so a test for that bit being set before and after
|
|
|
7c0489 |
transformations will show the bit would not be set, and the function will
|
|
|
7c0489 |
return -1 for failure.
|
|
|
7c0489 |
|
|
|
7c0489 |
Finally, convert the local macros in fesetmode.c to more generally useful
|
|
|
7c0489 |
macros in fenv_libc.h.
|
|
|
7c0489 |
|
|
|
7c0489 |
diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c
|
|
|
7c0489 |
index 90bc3d12c6d8558c..2a776c72fb5a2b70 100644
|
|
|
7c0489 |
--- a/sysdeps/powerpc/fpu/fedisblxcpt.c
|
|
|
7c0489 |
+++ b/sysdeps/powerpc/fpu/fedisblxcpt.c
|
|
|
7c0489 |
@@ -26,23 +26,25 @@ fedisableexcept (int excepts)
|
|
|
7c0489 |
int result, new;
|
|
|
7c0489 |
|
|
|
7c0489 |
/* Get current exception mask to return. */
|
|
|
7c0489 |
- fe.fenv = curr.fenv = fegetenv_register ();
|
|
|
7c0489 |
+ fe.fenv = curr.fenv = fegetenv_status ();
|
|
|
7c0489 |
result = fenv_reg_to_exceptions (fe.l);
|
|
|
7c0489 |
|
|
|
7c0489 |
if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
|
|
|
7c0489 |
excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
|
|
|
7c0489 |
|
|
|
7c0489 |
+ new = fenv_exceptions_to_reg (excepts);
|
|
|
7c0489 |
+
|
|
|
7c0489 |
+ if (fenv_reg_to_exceptions (new) != excepts)
|
|
|
7c0489 |
+ return -1;
|
|
|
7c0489 |
+
|
|
|
7c0489 |
/* Sets the new exception mask. */
|
|
|
7c0489 |
- fe.l &= ~ fenv_exceptions_to_reg (excepts);
|
|
|
7c0489 |
+ fe.l &= ~new;
|
|
|
7c0489 |
|
|
|
7c0489 |
if (fe.l != curr.l)
|
|
|
7c0489 |
- fesetenv_register (fe.fenv);
|
|
|
7c0489 |
+ fesetenv_mode (fe.fenv);
|
|
|
7c0489 |
|
|
|
7c0489 |
- new = __fegetexcept ();
|
|
|
7c0489 |
if (new == 0 && result != 0)
|
|
|
7c0489 |
(void)__fe_mask_env ();
|
|
|
7c0489 |
|
|
|
7c0489 |
- if ((new & excepts) != 0)
|
|
|
7c0489 |
- result = -1;
|
|
|
7c0489 |
return result;
|
|
|
7c0489 |
}
|
|
|
7c0489 |
diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c
|
|
|
7c0489 |
index e029971b9a460c28..6f5a828e80965bfa 100644
|
|
|
7c0489 |
--- a/sysdeps/powerpc/fpu/feenablxcpt.c
|
|
|
7c0489 |
+++ b/sysdeps/powerpc/fpu/feenablxcpt.c
|
|
|
7c0489 |
@@ -26,24 +26,25 @@ feenableexcept (int excepts)
|
|
|
7c0489 |
int result, new;
|
|
|
7c0489 |
|
|
|
7c0489 |
/* Get current exception mask to return. */
|
|
|
7c0489 |
- fe.fenv = curr.fenv = fegetenv_register ();
|
|
|
7c0489 |
+ fe.fenv = curr.fenv = fegetenv_status ();
|
|
|
7c0489 |
result = fenv_reg_to_exceptions (fe.l);
|
|
|
7c0489 |
|
|
|
7c0489 |
if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
|
|
|
7c0489 |
excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
|
|
|
7c0489 |
|
|
|
7c0489 |
+ new = fenv_exceptions_to_reg (excepts);
|
|
|
7c0489 |
+
|
|
|
7c0489 |
+ if (fenv_reg_to_exceptions (new) != excepts)
|
|
|
7c0489 |
+ return -1;
|
|
|
7c0489 |
+
|
|
|
7c0489 |
/* Sets the new exception mask. */
|
|
|
7c0489 |
- fe.l |= fenv_exceptions_to_reg (excepts);
|
|
|
7c0489 |
+ fe.l |= new;
|
|
|
7c0489 |
|
|
|
7c0489 |
if (fe.l != curr.l)
|
|
|
7c0489 |
- fesetenv_register (fe.fenv);
|
|
|
7c0489 |
+ fesetenv_mode (fe.fenv);
|
|
|
7c0489 |
|
|
|
7c0489 |
- new = __fegetexcept ();
|
|
|
7c0489 |
if (new != 0 && result == 0)
|
|
|
7c0489 |
(void) __fe_nomask_env_priv ();
|
|
|
7c0489 |
|
|
|
7c0489 |
- if ((new & excepts) != excepts)
|
|
|
7c0489 |
- result = -1;
|
|
|
7c0489 |
-
|
|
|
7c0489 |
return result;
|
|
|
7c0489 |
}
|
|
|
7c0489 |
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
|
|
|
7c0489 |
index f9634a64d186c076..b244770d115ea7bb 100644
|
|
|
7c0489 |
--- a/sysdeps/powerpc/fpu/fenv_libc.h
|
|
|
7c0489 |
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
|
|
|
7c0489 |
@@ -71,6 +71,11 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
|
|
|
7c0489 |
asm volatile ("mtfsf 0xff,%0" : : "f" (d)); \
|
|
|
7c0489 |
} while(0)
|
|
|
7c0489 |
|
|
|
7c0489 |
+/* Set the last 2 nibbles of the FPSCR, which contain the
|
|
|
7c0489 |
+ exception enables and the rounding mode.
|
|
|
7c0489 |
+ 'fegetenv_status' retrieves these bits by reading the FPSCR. */
|
|
|
7c0489 |
+#define fesetenv_mode(env) __builtin_mtfsf (0b00000011, (env));
|
|
|
7c0489 |
+
|
|
|
7c0489 |
/* This very handy macro:
|
|
|
7c0489 |
- Sets the rounding mode to 'round to nearest';
|
|
|
7c0489 |
- Sets the processor into IEEE mode; and
|
|
|
7c0489 |
@@ -209,8 +214,11 @@ enum {
|
|
|
7c0489 |
(FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
|
|
|
7c0489 |
#define FPSCR_BASIC_EXCEPTIONS_MASK \
|
|
|
7c0489 |
(FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
|
|
|
7c0489 |
-
|
|
|
7c0489 |
+#define FPSCR_FPRF_MASK \
|
|
|
7c0489 |
+ (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
|
|
|
7c0489 |
+ FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
|
|
|
7c0489 |
#define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
|
|
|
7c0489 |
+#define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK)
|
|
|
7c0489 |
|
|
|
7c0489 |
/* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
|
|
|
7c0489 |
in the FPSCR, albeit shifted to different but corresponding locations.
|
|
|
7c0489 |
diff --git a/sysdeps/powerpc/fpu/fesetmode.c b/sysdeps/powerpc/fpu/fesetmode.c
|
|
|
7c0489 |
index 32203a24ff434a32..29e088d5ab1c0d93 100644
|
|
|
7c0489 |
--- a/sysdeps/powerpc/fpu/fesetmode.c
|
|
|
7c0489 |
+++ b/sysdeps/powerpc/fpu/fesetmode.c
|
|
|
7c0489 |
@@ -19,11 +19,6 @@
|
|
|
7c0489 |
#include <fenv_libc.h>
|
|
|
7c0489 |
#include <fpu_control.h>
|
|
|
7c0489 |
|
|
|
7c0489 |
-#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \
|
|
|
7c0489 |
- | _FPU_MASK_XM | _FPU_MASK_IM)
|
|
|
7c0489 |
-
|
|
|
7c0489 |
-#define FPU_STATUS 0xbffff700ULL
|
|
|
7c0489 |
-
|
|
|
7c0489 |
int
|
|
|
7c0489 |
fesetmode (const femode_t *modep)
|
|
|
7c0489 |
{
|
|
|
7c0489 |
@@ -32,18 +27,18 @@ fesetmode (const femode_t *modep)
|
|
|
7c0489 |
/* Logic regarding enabled exceptions as in fesetenv. */
|
|
|
7c0489 |
|
|
|
7c0489 |
new.fenv = *modep;
|
|
|
7c0489 |
- old.fenv = fegetenv_register ();
|
|
|
7c0489 |
- new.l = (new.l & ~FPU_STATUS) | (old.l & FPU_STATUS);
|
|
|
7c0489 |
+ old.fenv = fegetenv_status ();
|
|
|
7c0489 |
+ new.l = (new.l & ~FPSCR_STATUS_MASK) | (old.l & FPSCR_STATUS_MASK);
|
|
|
7c0489 |
|
|
|
7c0489 |
if (old.l == new.l)
|
|
|
7c0489 |
return 0;
|
|
|
7c0489 |
|
|
|
7c0489 |
- if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
|
|
|
7c0489 |
+ if ((old.l & FPSCR_ENABLES_MASK) == 0 && (new.l & FPSCR_ENABLES_MASK) != 0)
|
|
|
7c0489 |
(void) __fe_nomask_env_priv ();
|
|
|
7c0489 |
|
|
|
7c0489 |
- if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
|
|
|
7c0489 |
+ if ((old.l & FPSCR_ENABLES_MASK) != 0 && (new.l & FPSCR_ENABLES_MASK) == 0)
|
|
|
7c0489 |
(void) __fe_mask_env ();
|
|
|
7c0489 |
|
|
|
7c0489 |
- fesetenv_register (new.fenv);
|
|
|
7c0489 |
+ fesetenv_mode (new.fenv);
|
|
|
7c0489 |
return 0;
|
|
|
7c0489 |
}
|