a2cf7d
commit 3c1766ea10043f2e9625f3cba3bda37c84b32cf0
a2cf7d
Author: Paul A. Clarke <pc@us.ibm.com>
a2cf7d
Date:   Thu Jul 18 19:37:13 2019 -0500
a2cf7d
a2cf7d
    [powerpc] fe{en,dis}ableexcept, fesetmode: optimize FPSCR accesses
a2cf7d
    
a2cf7d
    Since fe{en,dis}ableexcept() and fesetmode() read-modify-write just the
a2cf7d
    "mode" (exception enable and rounding mode) bits of the Floating Point Status
a2cf7d
    Control Register (FPSCR), the lighter weight 'mffsl' instruction can be used
a2cf7d
    to read the FPSCR (enables and rounding mode), and 'mtfsf 0b00000011' can be
a2cf7d
    used to write just those bits back to the FPSCR.  The net is better performance.
a2cf7d
    
a2cf7d
    In addition, fe{en,dis}ableexcept() read the FPSCR again after writing it, or
a2cf7d
    they determine that it doesn't need to be written because it is not changing.
a2cf7d
    In either case, the local variable holds the current values of the enable
a2cf7d
    bits in the FPSCR.  This local variable can be used instead of again reading
a2cf7d
    the FPSCR.
a2cf7d
    
a2cf7d
    Also, that value of the FPSCR which is read the second time is validated
a2cf7d
    against the requested enables.  Since the write can't fail, this validation
a2cf7d
    step is unnecessary, and can be removed.  Instead, the exceptions to be
a2cf7d
    enabled (or disabled) are transformed into available bits in the FPSCR,
a2cf7d
    then validated after being transformed back, to ensure that all requested
a2cf7d
    bits are actually being set.  For example, FE_INVALID_SQRT can be
a2cf7d
    requested, but cannot actually be set.  This bit is not mapped during the
a2cf7d
    transformations, so a test for that bit being set before and after
a2cf7d
    transformations will show the bit would not be set, and the function will
a2cf7d
    return -1 for failure.
a2cf7d
    
a2cf7d
    Finally, convert the local macros in fesetmode.c to more generally useful
a2cf7d
    macros in fenv_libc.h.
a2cf7d
a2cf7d
diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c
a2cf7d
index 90bc3d12c6d8558c..2a776c72fb5a2b70 100644
a2cf7d
--- a/sysdeps/powerpc/fpu/fedisblxcpt.c
a2cf7d
+++ b/sysdeps/powerpc/fpu/fedisblxcpt.c
a2cf7d
@@ -26,23 +26,25 @@ fedisableexcept (int excepts)
a2cf7d
   int result, new;
a2cf7d
 
a2cf7d
   /* Get current exception mask to return.  */
a2cf7d
-  fe.fenv = curr.fenv = fegetenv_register ();
a2cf7d
+  fe.fenv = curr.fenv = fegetenv_status ();
a2cf7d
   result = fenv_reg_to_exceptions (fe.l);
a2cf7d
 
a2cf7d
   if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
a2cf7d
     excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
a2cf7d
 
a2cf7d
+  new = fenv_exceptions_to_reg (excepts);
a2cf7d
+
a2cf7d
+  if (fenv_reg_to_exceptions (new) != excepts)
a2cf7d
+    return -1;
a2cf7d
+
a2cf7d
   /* Sets the new exception mask.  */
a2cf7d
-  fe.l &= ~ fenv_exceptions_to_reg (excepts);
a2cf7d
+  fe.l &= ~new;
a2cf7d
 
a2cf7d
   if (fe.l != curr.l)
a2cf7d
-    fesetenv_register (fe.fenv);
a2cf7d
+    fesetenv_mode (fe.fenv);
a2cf7d
 
a2cf7d
-  new = __fegetexcept ();
a2cf7d
   if (new == 0 && result != 0)
a2cf7d
     (void)__fe_mask_env ();
a2cf7d
 
a2cf7d
-  if ((new & excepts) != 0)
a2cf7d
-    result = -1;
a2cf7d
   return result;
a2cf7d
 }
a2cf7d
diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c
a2cf7d
index e029971b9a460c28..6f5a828e80965bfa 100644
a2cf7d
--- a/sysdeps/powerpc/fpu/feenablxcpt.c
a2cf7d
+++ b/sysdeps/powerpc/fpu/feenablxcpt.c
a2cf7d
@@ -26,24 +26,25 @@ feenableexcept (int excepts)
a2cf7d
   int result, new;
a2cf7d
 
a2cf7d
   /* Get current exception mask to return.  */
a2cf7d
-  fe.fenv = curr.fenv = fegetenv_register ();
a2cf7d
+  fe.fenv = curr.fenv = fegetenv_status ();
a2cf7d
   result = fenv_reg_to_exceptions (fe.l);
a2cf7d
 
a2cf7d
   if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
a2cf7d
     excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
a2cf7d
 
a2cf7d
+  new = fenv_exceptions_to_reg (excepts);
a2cf7d
+
a2cf7d
+  if (fenv_reg_to_exceptions (new) != excepts)
a2cf7d
+    return -1;
a2cf7d
+
a2cf7d
   /* Sets the new exception mask.  */
a2cf7d
-  fe.l |= fenv_exceptions_to_reg (excepts);
a2cf7d
+  fe.l |= new;
a2cf7d
 
a2cf7d
   if (fe.l != curr.l)
a2cf7d
-    fesetenv_register (fe.fenv);
a2cf7d
+    fesetenv_mode (fe.fenv);
a2cf7d
 
a2cf7d
-  new = __fegetexcept ();
a2cf7d
   if (new != 0 && result == 0)
a2cf7d
     (void) __fe_nomask_env_priv ();
a2cf7d
 
a2cf7d
-  if ((new & excepts) != excepts)
a2cf7d
-    result = -1;
a2cf7d
-
a2cf7d
   return result;
a2cf7d
 }
a2cf7d
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
a2cf7d
index f9634a64d186c076..b244770d115ea7bb 100644
a2cf7d
--- a/sysdeps/powerpc/fpu/fenv_libc.h
a2cf7d
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
a2cf7d
@@ -71,6 +71,11 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
a2cf7d
 	    asm volatile ("mtfsf 0xff,%0" : : "f" (d)); \
a2cf7d
 	} while(0)
a2cf7d
 
a2cf7d
+/* Set the last 2 nibbles of the FPSCR, which contain the
a2cf7d
+   exception enables and the rounding mode.
a2cf7d
+   'fegetenv_status' retrieves these bits by reading the FPSCR.  */
a2cf7d
+#define fesetenv_mode(env) __builtin_mtfsf (0b00000011, (env));
a2cf7d
+
a2cf7d
 /* This very handy macro:
a2cf7d
    - Sets the rounding mode to 'round to nearest';
a2cf7d
    - Sets the processor into IEEE mode; and
a2cf7d
@@ -209,8 +214,11 @@ enum {
a2cf7d
   (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
a2cf7d
 #define FPSCR_BASIC_EXCEPTIONS_MASK \
a2cf7d
   (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
a2cf7d
-
a2cf7d
+#define FPSCR_FPRF_MASK \
a2cf7d
+  (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
a2cf7d
+   FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
a2cf7d
 #define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
a2cf7d
+#define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK)
a2cf7d
 
a2cf7d
 /* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
a2cf7d
    in the FPSCR, albeit shifted to different but corresponding locations.
a2cf7d
diff --git a/sysdeps/powerpc/fpu/fesetmode.c b/sysdeps/powerpc/fpu/fesetmode.c
a2cf7d
index 32203a24ff434a32..29e088d5ab1c0d93 100644
a2cf7d
--- a/sysdeps/powerpc/fpu/fesetmode.c
a2cf7d
+++ b/sysdeps/powerpc/fpu/fesetmode.c
a2cf7d
@@ -19,11 +19,6 @@
a2cf7d
 #include <fenv_libc.h>
a2cf7d
 #include <fpu_control.h>
a2cf7d
 
a2cf7d
-#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM	\
a2cf7d
-		       | _FPU_MASK_XM | _FPU_MASK_IM)
a2cf7d
-
a2cf7d
-#define FPU_STATUS 0xbffff700ULL
a2cf7d
-
a2cf7d
 int
a2cf7d
 fesetmode (const femode_t *modep)
a2cf7d
 {
a2cf7d
@@ -32,18 +27,18 @@ fesetmode (const femode_t *modep)
a2cf7d
   /* Logic regarding enabled exceptions as in fesetenv.  */
a2cf7d
 
a2cf7d
   new.fenv = *modep;
a2cf7d
-  old.fenv = fegetenv_register ();
a2cf7d
-  new.l = (new.l & ~FPU_STATUS) | (old.l & FPU_STATUS);
a2cf7d
+  old.fenv = fegetenv_status ();
a2cf7d
+  new.l = (new.l & ~FPSCR_STATUS_MASK) | (old.l & FPSCR_STATUS_MASK);
a2cf7d
 
a2cf7d
   if (old.l == new.l)
a2cf7d
     return 0;
a2cf7d
 
a2cf7d
-  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
a2cf7d
+  if ((old.l & FPSCR_ENABLES_MASK) == 0 && (new.l & FPSCR_ENABLES_MASK) != 0)
a2cf7d
     (void) __fe_nomask_env_priv ();
a2cf7d
 
a2cf7d
-  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
a2cf7d
+  if ((old.l & FPSCR_ENABLES_MASK) != 0 && (new.l & FPSCR_ENABLES_MASK) == 0)
a2cf7d
     (void) __fe_mask_env ();
a2cf7d
 
a2cf7d
-  fesetenv_register (new.fenv);
a2cf7d
+  fesetenv_mode (new.fenv);
a2cf7d
   return 0;
a2cf7d
 }