7c0489
commit 3c1766ea10043f2e9625f3cba3bda37c84b32cf0
7c0489
Author: Paul A. Clarke <pc@us.ibm.com>
7c0489
Date:   Thu Jul 18 19:37:13 2019 -0500
7c0489
7c0489
    [powerpc] fe{en,dis}ableexcept, fesetmode: optimize FPSCR accesses
7c0489
    
7c0489
    Since fe{en,dis}ableexcept() and fesetmode() read-modify-write just the
7c0489
    "mode" (exception enable and rounding mode) bits of the Floating Point Status
7c0489
    Control Register (FPSCR), the lighter weight 'mffsl' instruction can be used
7c0489
    to read the FPSCR (enables and rounding mode), and 'mtfsf 0b00000011' can be
7c0489
    used to write just those bits back to the FPSCR.  The net is better performance.
7c0489
    
7c0489
    In addition, fe{en,dis}ableexcept() read the FPSCR again after writing it, or
7c0489
    they determine that it doesn't need to be written because it is not changing.
7c0489
    In either case, the local variable holds the current values of the enable
7c0489
    bits in the FPSCR.  This local variable can be used instead of again reading
7c0489
    the FPSCR.
7c0489
    
7c0489
    Also, that value of the FPSCR which is read the second time is validated
7c0489
    against the requested enables.  Since the write can't fail, this validation
7c0489
    step is unnecessary, and can be removed.  Instead, the exceptions to be
7c0489
    enabled (or disabled) are transformed into available bits in the FPSCR,
7c0489
    then validated after being transformed back, to ensure that all requested
7c0489
    bits are actually being set.  For example, FE_INVALID_SQRT can be
7c0489
    requested, but cannot actually be set.  This bit is not mapped during the
7c0489
    transformations, so a test for that bit being set before and after
7c0489
    transformations will show the bit would not be set, and the function will
7c0489
    return -1 for failure.
7c0489
    
7c0489
    Finally, convert the local macros in fesetmode.c to more generally useful
7c0489
    macros in fenv_libc.h.
7c0489
7c0489
diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c
7c0489
index 90bc3d12c6d8558c..2a776c72fb5a2b70 100644
7c0489
--- a/sysdeps/powerpc/fpu/fedisblxcpt.c
7c0489
+++ b/sysdeps/powerpc/fpu/fedisblxcpt.c
7c0489
@@ -26,23 +26,25 @@ fedisableexcept (int excepts)
7c0489
   int result, new;
7c0489
 
7c0489
   /* Get current exception mask to return.  */
7c0489
-  fe.fenv = curr.fenv = fegetenv_register ();
7c0489
+  fe.fenv = curr.fenv = fegetenv_status ();
7c0489
   result = fenv_reg_to_exceptions (fe.l);
7c0489
 
7c0489
   if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
7c0489
     excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
7c0489
 
7c0489
+  new = fenv_exceptions_to_reg (excepts);
7c0489
+
7c0489
+  if (fenv_reg_to_exceptions (new) != excepts)
7c0489
+    return -1;
7c0489
+
7c0489
   /* Sets the new exception mask.  */
7c0489
-  fe.l &= ~ fenv_exceptions_to_reg (excepts);
7c0489
+  fe.l &= ~new;
7c0489
 
7c0489
   if (fe.l != curr.l)
7c0489
-    fesetenv_register (fe.fenv);
7c0489
+    fesetenv_mode (fe.fenv);
7c0489
 
7c0489
-  new = __fegetexcept ();
7c0489
   if (new == 0 && result != 0)
7c0489
     (void)__fe_mask_env ();
7c0489
 
7c0489
-  if ((new & excepts) != 0)
7c0489
-    result = -1;
7c0489
   return result;
7c0489
 }
7c0489
diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c
7c0489
index e029971b9a460c28..6f5a828e80965bfa 100644
7c0489
--- a/sysdeps/powerpc/fpu/feenablxcpt.c
7c0489
+++ b/sysdeps/powerpc/fpu/feenablxcpt.c
7c0489
@@ -26,24 +26,25 @@ feenableexcept (int excepts)
7c0489
   int result, new;
7c0489
 
7c0489
   /* Get current exception mask to return.  */
7c0489
-  fe.fenv = curr.fenv = fegetenv_register ();
7c0489
+  fe.fenv = curr.fenv = fegetenv_status ();
7c0489
   result = fenv_reg_to_exceptions (fe.l);
7c0489
 
7c0489
   if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
7c0489
     excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
7c0489
 
7c0489
+  new = fenv_exceptions_to_reg (excepts);
7c0489
+
7c0489
+  if (fenv_reg_to_exceptions (new) != excepts)
7c0489
+    return -1;
7c0489
+
7c0489
   /* Sets the new exception mask.  */
7c0489
-  fe.l |= fenv_exceptions_to_reg (excepts);
7c0489
+  fe.l |= new;
7c0489
 
7c0489
   if (fe.l != curr.l)
7c0489
-    fesetenv_register (fe.fenv);
7c0489
+    fesetenv_mode (fe.fenv);
7c0489
 
7c0489
-  new = __fegetexcept ();
7c0489
   if (new != 0 && result == 0)
7c0489
     (void) __fe_nomask_env_priv ();
7c0489
 
7c0489
-  if ((new & excepts) != excepts)
7c0489
-    result = -1;
7c0489
-
7c0489
   return result;
7c0489
 }
7c0489
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
7c0489
index f9634a64d186c076..b244770d115ea7bb 100644
7c0489
--- a/sysdeps/powerpc/fpu/fenv_libc.h
7c0489
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
7c0489
@@ -71,6 +71,11 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
7c0489
 	    asm volatile ("mtfsf 0xff,%0" : : "f" (d)); \
7c0489
 	} while(0)
7c0489
 
7c0489
+/* Set the last 2 nibbles of the FPSCR, which contain the
7c0489
+   exception enables and the rounding mode.
7c0489
+   'fegetenv_status' retrieves these bits by reading the FPSCR.  */
7c0489
+#define fesetenv_mode(env) __builtin_mtfsf (0b00000011, (env));
7c0489
+
7c0489
 /* This very handy macro:
7c0489
    - Sets the rounding mode to 'round to nearest';
7c0489
    - Sets the processor into IEEE mode; and
7c0489
@@ -209,8 +214,11 @@ enum {
7c0489
   (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
7c0489
 #define FPSCR_BASIC_EXCEPTIONS_MASK \
7c0489
   (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
7c0489
-
7c0489
+#define FPSCR_FPRF_MASK \
7c0489
+  (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
7c0489
+   FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
7c0489
 #define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
7c0489
+#define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK)
7c0489
 
7c0489
 /* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
7c0489
    in the FPSCR, albeit shifted to different but corresponding locations.
7c0489
diff --git a/sysdeps/powerpc/fpu/fesetmode.c b/sysdeps/powerpc/fpu/fesetmode.c
7c0489
index 32203a24ff434a32..29e088d5ab1c0d93 100644
7c0489
--- a/sysdeps/powerpc/fpu/fesetmode.c
7c0489
+++ b/sysdeps/powerpc/fpu/fesetmode.c
7c0489
@@ -19,11 +19,6 @@
7c0489
 #include <fenv_libc.h>
7c0489
 #include <fpu_control.h>
7c0489
 
7c0489
-#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM	\
7c0489
-		       | _FPU_MASK_XM | _FPU_MASK_IM)
7c0489
-
7c0489
-#define FPU_STATUS 0xbffff700ULL
7c0489
-
7c0489
 int
7c0489
 fesetmode (const femode_t *modep)
7c0489
 {
7c0489
@@ -32,18 +27,18 @@ fesetmode (const femode_t *modep)
7c0489
   /* Logic regarding enabled exceptions as in fesetenv.  */
7c0489
 
7c0489
   new.fenv = *modep;
7c0489
-  old.fenv = fegetenv_register ();
7c0489
-  new.l = (new.l & ~FPU_STATUS) | (old.l & FPU_STATUS);
7c0489
+  old.fenv = fegetenv_status ();
7c0489
+  new.l = (new.l & ~FPSCR_STATUS_MASK) | (old.l & FPSCR_STATUS_MASK);
7c0489
 
7c0489
   if (old.l == new.l)
7c0489
     return 0;
7c0489
 
7c0489
-  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
7c0489
+  if ((old.l & FPSCR_ENABLES_MASK) == 0 && (new.l & FPSCR_ENABLES_MASK) != 0)
7c0489
     (void) __fe_nomask_env_priv ();
7c0489
 
7c0489
-  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
7c0489
+  if ((old.l & FPSCR_ENABLES_MASK) != 0 && (new.l & FPSCR_ENABLES_MASK) == 0)
7c0489
     (void) __fe_mask_env ();
7c0489
 
7c0489
-  fesetenv_register (new.fenv);
7c0489
+  fesetenv_mode (new.fenv);
7c0489
   return 0;
7c0489
 }