dfa500
commit 3c1766ea10043f2e9625f3cba3bda37c84b32cf0
dfa500
Author: Paul A. Clarke <pc@us.ibm.com>
dfa500
Date:   Thu Jul 18 19:37:13 2019 -0500
dfa500
dfa500
    [powerpc] fe{en,dis}ableexcept, fesetmode: optimize FPSCR accesses
dfa500
    
dfa500
    Since fe{en,dis}ableexcept() and fesetmode() read-modify-write just the
dfa500
    "mode" (exception enable and rounding mode) bits of the Floating Point Status
dfa500
    Control Register (FPSCR), the lighter weight 'mffsl' instruction can be used
dfa500
    to read the FPSCR (enables and rounding mode), and 'mtfsf 0b00000011' can be
dfa500
    used to write just those bits back to the FPSCR.  The net is better performance.
dfa500
    
dfa500
    In addition, fe{en,dis}ableexcept() read the FPSCR again after writing it, or
dfa500
    they determine that it doesn't need to be written because it is not changing.
dfa500
    In either case, the local variable holds the current values of the enable
dfa500
    bits in the FPSCR.  This local variable can be used instead of again reading
dfa500
    the FPSCR.
dfa500
    
dfa500
    Also, that value of the FPSCR which is read the second time is validated
dfa500
    against the requested enables.  Since the write can't fail, this validation
dfa500
    step is unnecessary, and can be removed.  Instead, the exceptions to be
dfa500
    enabled (or disabled) are transformed into available bits in the FPSCR,
dfa500
    then validated after being transformed back, to ensure that all requested
dfa500
    bits are actually being set.  For example, FE_INVALID_SQRT can be
dfa500
    requested, but cannot actually be set.  This bit is not mapped during the
dfa500
    transformations, so a test for that bit being set before and after
dfa500
    transformations will show the bit would not be set, and the function will
dfa500
    return -1 for failure.
dfa500
    
dfa500
    Finally, convert the local macros in fesetmode.c to more generally useful
dfa500
    macros in fenv_libc.h.
dfa500
dfa500
diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c
dfa500
index 90bc3d12c6d8558c..2a776c72fb5a2b70 100644
dfa500
--- a/sysdeps/powerpc/fpu/fedisblxcpt.c
dfa500
+++ b/sysdeps/powerpc/fpu/fedisblxcpt.c
dfa500
@@ -26,23 +26,25 @@ fedisableexcept (int excepts)
dfa500
   int result, new;
dfa500
 
dfa500
   /* Get current exception mask to return.  */
dfa500
-  fe.fenv = curr.fenv = fegetenv_register ();
dfa500
+  fe.fenv = curr.fenv = fegetenv_status ();
dfa500
   result = fenv_reg_to_exceptions (fe.l);
dfa500
 
dfa500
   if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
dfa500
     excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
dfa500
 
dfa500
+  new = fenv_exceptions_to_reg (excepts);
dfa500
+
dfa500
+  if (fenv_reg_to_exceptions (new) != excepts)
dfa500
+    return -1;
dfa500
+
dfa500
   /* Sets the new exception mask.  */
dfa500
-  fe.l &= ~ fenv_exceptions_to_reg (excepts);
dfa500
+  fe.l &= ~new;
dfa500
 
dfa500
   if (fe.l != curr.l)
dfa500
-    fesetenv_register (fe.fenv);
dfa500
+    fesetenv_mode (fe.fenv);
dfa500
 
dfa500
-  new = __fegetexcept ();
dfa500
   if (new == 0 && result != 0)
dfa500
     (void)__fe_mask_env ();
dfa500
 
dfa500
-  if ((new & excepts) != 0)
dfa500
-    result = -1;
dfa500
   return result;
dfa500
 }
dfa500
diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c
dfa500
index e029971b9a460c28..6f5a828e80965bfa 100644
dfa500
--- a/sysdeps/powerpc/fpu/feenablxcpt.c
dfa500
+++ b/sysdeps/powerpc/fpu/feenablxcpt.c
dfa500
@@ -26,24 +26,25 @@ feenableexcept (int excepts)
dfa500
   int result, new;
dfa500
 
dfa500
   /* Get current exception mask to return.  */
dfa500
-  fe.fenv = curr.fenv = fegetenv_register ();
dfa500
+  fe.fenv = curr.fenv = fegetenv_status ();
dfa500
   result = fenv_reg_to_exceptions (fe.l);
dfa500
 
dfa500
   if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
dfa500
     excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
dfa500
 
dfa500
+  new = fenv_exceptions_to_reg (excepts);
dfa500
+
dfa500
+  if (fenv_reg_to_exceptions (new) != excepts)
dfa500
+    return -1;
dfa500
+
dfa500
   /* Sets the new exception mask.  */
dfa500
-  fe.l |= fenv_exceptions_to_reg (excepts);
dfa500
+  fe.l |= new;
dfa500
 
dfa500
   if (fe.l != curr.l)
dfa500
-    fesetenv_register (fe.fenv);
dfa500
+    fesetenv_mode (fe.fenv);
dfa500
 
dfa500
-  new = __fegetexcept ();
dfa500
   if (new != 0 && result == 0)
dfa500
     (void) __fe_nomask_env_priv ();
dfa500
 
dfa500
-  if ((new & excepts) != excepts)
dfa500
-    result = -1;
dfa500
-
dfa500
   return result;
dfa500
 }
dfa500
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
dfa500
index f9634a64d186c076..b244770d115ea7bb 100644
dfa500
--- a/sysdeps/powerpc/fpu/fenv_libc.h
dfa500
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
dfa500
@@ -71,6 +71,11 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
dfa500
 	    asm volatile ("mtfsf 0xff,%0" : : "f" (d)); \
dfa500
 	} while(0)
dfa500
 
dfa500
+/* Set the last 2 nibbles of the FPSCR, which contain the
dfa500
+   exception enables and the rounding mode.
dfa500
+   'fegetenv_status' retrieves these bits by reading the FPSCR.  */
dfa500
+#define fesetenv_mode(env) __builtin_mtfsf (0b00000011, (env));
dfa500
+
dfa500
 /* This very handy macro:
dfa500
    - Sets the rounding mode to 'round to nearest';
dfa500
    - Sets the processor into IEEE mode; and
dfa500
@@ -209,8 +214,11 @@ enum {
dfa500
   (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
dfa500
 #define FPSCR_BASIC_EXCEPTIONS_MASK \
dfa500
   (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
dfa500
-
dfa500
+#define FPSCR_FPRF_MASK \
dfa500
+  (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
dfa500
+   FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
dfa500
 #define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
dfa500
+#define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK)
dfa500
 
dfa500
 /* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
dfa500
    in the FPSCR, albeit shifted to different but corresponding locations.
dfa500
diff --git a/sysdeps/powerpc/fpu/fesetmode.c b/sysdeps/powerpc/fpu/fesetmode.c
dfa500
index 32203a24ff434a32..29e088d5ab1c0d93 100644
dfa500
--- a/sysdeps/powerpc/fpu/fesetmode.c
dfa500
+++ b/sysdeps/powerpc/fpu/fesetmode.c
dfa500
@@ -19,11 +19,6 @@
dfa500
 #include <fenv_libc.h>
dfa500
 #include <fpu_control.h>
dfa500
 
dfa500
-#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM	\
dfa500
-		       | _FPU_MASK_XM | _FPU_MASK_IM)
dfa500
-
dfa500
-#define FPU_STATUS 0xbffff700ULL
dfa500
-
dfa500
 int
dfa500
 fesetmode (const femode_t *modep)
dfa500
 {
dfa500
@@ -32,18 +27,18 @@ fesetmode (const femode_t *modep)
dfa500
   /* Logic regarding enabled exceptions as in fesetenv.  */
dfa500
 
dfa500
   new.fenv = *modep;
dfa500
-  old.fenv = fegetenv_register ();
dfa500
-  new.l = (new.l & ~FPU_STATUS) | (old.l & FPU_STATUS);
dfa500
+  old.fenv = fegetenv_status ();
dfa500
+  new.l = (new.l & ~FPSCR_STATUS_MASK) | (old.l & FPSCR_STATUS_MASK);
dfa500
 
dfa500
   if (old.l == new.l)
dfa500
     return 0;
dfa500
 
dfa500
-  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
dfa500
+  if ((old.l & FPSCR_ENABLES_MASK) == 0 && (new.l & FPSCR_ENABLES_MASK) != 0)
dfa500
     (void) __fe_nomask_env_priv ();
dfa500
 
dfa500
-  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
dfa500
+  if ((old.l & FPSCR_ENABLES_MASK) != 0 && (new.l & FPSCR_ENABLES_MASK) == 0)
dfa500
     (void) __fe_mask_env ();
dfa500
 
dfa500
-  fesetenv_register (new.fenv);
dfa500
+  fesetenv_mode (new.fenv);
dfa500
   return 0;
dfa500
 }