a2cf7d
commit 3db85a9814784a74536a1f0e7b7ddbfef7dc84bb
a2cf7d
Author: Paul A. Clarke <pc@us.ibm.com>
a2cf7d
Date:   Thu Jun 20 11:57:18 2019 -0500
a2cf7d
a2cf7d
    powerpc: Use faster means to access FPSCR when possible in some cases
a2cf7d
    
a2cf7d
    Using 'mffs' instruction to read the Floating Point Status Control Register
a2cf7d
    (FPSCR) can force a processor flush in some cases, with undesirable
a2cf7d
    performance impact.  If the values of the bits in the FPSCR which force the
a2cf7d
    flush are not needed, an instruction that is new to POWER9 (ISA version 3.0),
a2cf7d
    'mffsl' can be used instead.
a2cf7d
    
a2cf7d
    Cases included:  get_rounding_mode, fegetround, fegetmode, fegetexcept.
a2cf7d
    
a2cf7d
            * sysdeps/powerpc/bits/fenvinline.h (__fegetround): Use
a2cf7d
            __fegetround_ISA300() or __fegetround_ISA2() as appropriate.
a2cf7d
            (__fegetround_ISA300) New.
a2cf7d
            (__fegetround_ISA2) New.
a2cf7d
            * sysdeps/powerpc/fpu_control.h (IS_ISA300): New.
a2cf7d
            (_FPU_MFFS): Move implementation...
a2cf7d
            (_FPU_GETCW): Here.
a2cf7d
            (_FPU_MFFSL): Move implementation....
a2cf7d
            (_FPU_GET_RC_ISA300): Here. New.
a2cf7d
            (_FPU_GET_RC): Use _FPU_GET_RC_ISA300() or _FPU_GETCW() as appropriate.
a2cf7d
            * sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_status_ISA300): New.
a2cf7d
            (fegetenv_status): New.
a2cf7d
            * sysdeps/powerpc/fpu/fegetmode.c (fegetmode): Use fegetenv_status()
a2cf7d
            instead of fegetenv_register().
a2cf7d
            * sysdeps/powerpc/fpu/fegetexcept.c (__fegetexcept): Likewise.
a2cf7d
    
a2cf7d
    Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
a2cf7d
a2cf7d
diff --git a/sysdeps/powerpc/bits/fenvinline.h b/sysdeps/powerpc/bits/fenvinline.h
a2cf7d
index 41316386ba75e903..caec8ead6e17219d 100644
a2cf7d
--- a/sysdeps/powerpc/bits/fenvinline.h
a2cf7d
+++ b/sysdeps/powerpc/bits/fenvinline.h
a2cf7d
@@ -18,13 +18,36 @@
a2cf7d
 
a2cf7d
 #if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__
a2cf7d
 
a2cf7d
-/* Inline definition for fegetround.  */
a2cf7d
-# define __fegetround() \
a2cf7d
-  (__extension__  ({ int __fegetround_result;				      \
a2cf7d
-		     __asm__ __volatile__				      \
a2cf7d
-		       ("mcrfs 7,7 ; mfcr %0"				      \
a2cf7d
-			: "=r"(__fegetround_result) : : "cr7");		      \
a2cf7d
-		     __fegetround_result & 3; }))
a2cf7d
+/* Inline definitions for fegetround.  */
a2cf7d
+# define __fegetround_ISA300()						\
a2cf7d
+  (__extension__  ({							\
a2cf7d
+    union { double __d; unsigned long long __ll; } __u;			\
a2cf7d
+    __asm__ __volatile__ (						\
a2cf7d
+      ".machine push; .machine \"power9\"; mffsl %0; .machine pop"	\
a2cf7d
+      : "=f" (__u.__d));						\
a2cf7d
+    __u.__ll & 0x0000000000000003LL;					\
a2cf7d
+  }))
a2cf7d
+
a2cf7d
+# define __fegetround_ISA2()						\
a2cf7d
+  (__extension__  ({							\
a2cf7d
+     int __fegetround_result;						\
a2cf7d
+     __asm__ __volatile__ ("mcrfs 7,7 ; mfcr %0"			\
a2cf7d
+			   : "=r"(__fegetround_result) : : "cr7");	\
a2cf7d
+     __fegetround_result & 3;						\
a2cf7d
+  }))
a2cf7d
+
a2cf7d
+# ifdef _ARCH_PWR9
a2cf7d
+#  define __fegetround() __fegetround_ISA300()
a2cf7d
+# elif defined __BUILTIN_CPU_SUPPORTS__
a2cf7d
+#  define __fegetround()						\
a2cf7d
+  (__glibc_likely (__builtin_cpu_supports ("arch_3_00"))		\
a2cf7d
+   ? __fegetround_ISA300()						\
a2cf7d
+   : __fegetround_ISA2()						\
a2cf7d
+  )
a2cf7d
+# else
a2cf7d
+#  define __fegetround() __fegetround_ISA2()
a2cf7d
+# endif
a2cf7d
+
a2cf7d
 # define fegetround() __fegetround ()
a2cf7d
 
a2cf7d
 # ifndef __NO_MATH_INLINES
a2cf7d
diff --git a/sysdeps/powerpc/fpu/fegetexcept.c b/sysdeps/powerpc/fpu/fegetexcept.c
a2cf7d
index a053a32bfe11c0d4..9d77adea59939ece 100644
a2cf7d
--- a/sysdeps/powerpc/fpu/fegetexcept.c
a2cf7d
+++ b/sysdeps/powerpc/fpu/fegetexcept.c
a2cf7d
@@ -25,7 +25,7 @@ __fegetexcept (void)
a2cf7d
   fenv_union_t fe;
a2cf7d
   int result = 0;
a2cf7d
 
a2cf7d
-  fe.fenv = fegetenv_register ();
a2cf7d
+  fe.fenv = fegetenv_status ();
a2cf7d
 
a2cf7d
   if (fe.l & (1 << (31 - FPSCR_XE)))
a2cf7d
       result |= FE_INEXACT;
a2cf7d
diff --git a/sysdeps/powerpc/fpu/fegetmode.c b/sysdeps/powerpc/fpu/fegetmode.c
a2cf7d
index b83dc9f625d2248a..75493e5f24c8b05b 100644
a2cf7d
--- a/sysdeps/powerpc/fpu/fegetmode.c
a2cf7d
+++ b/sysdeps/powerpc/fpu/fegetmode.c
a2cf7d
@@ -21,6 +21,6 @@
a2cf7d
 int
a2cf7d
 fegetmode (femode_t *modep)
a2cf7d
 {
a2cf7d
-  *modep = fegetenv_register ();
a2cf7d
+  *modep = fegetenv_status ();
a2cf7d
   return 0;
a2cf7d
 }
a2cf7d
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
a2cf7d
index d6945903b525748e..cc00df033da47c1a 100644
a2cf7d
--- a/sysdeps/powerpc/fpu/fenv_libc.h
a2cf7d
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
a2cf7d
@@ -35,6 +35,27 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
a2cf7d
 #define fegetenv_register() \
a2cf7d
         ({ fenv_t env; asm volatile ("mffs %0" : "=f" (env)); env; })
a2cf7d
 
a2cf7d
+/* Equivalent to fegetenv_register, but only returns bits for
a2cf7d
+   status, exception enables, and mode.  */
a2cf7d
+
a2cf7d
+#define fegetenv_status_ISA300()					\
a2cf7d
+  ({register double __fr;						\
a2cf7d
+    __asm__ __volatile__ (						\
a2cf7d
+      ".machine push; .machine \"power9\"; mffsl %0; .machine pop"	\
a2cf7d
+      : "=f" (__fr));							\
a2cf7d
+    __fr;								\
a2cf7d
+  })
a2cf7d
+
a2cf7d
+#ifdef _ARCH_PWR9
a2cf7d
+# define fegetenv_status() fegetenv_status_ISA300()
a2cf7d
+#else
a2cf7d
+# define fegetenv_status()						\
a2cf7d
+  (__glibc_likely (__builtin_cpu_supports ("arch_3_00"))		\
a2cf7d
+   ? fegetenv_status_ISA300()						\
a2cf7d
+   : fegetenv_register()						\
a2cf7d
+  )
a2cf7d
+#endif
a2cf7d
+
a2cf7d
 /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
a2cf7d
 #define fesetenv_register(env) \
a2cf7d
 	do { \
a2cf7d
diff --git a/sysdeps/powerpc/fpu_control.h b/sysdeps/powerpc/fpu_control.h
a2cf7d
index 90063d77bbbf794f..e0ee622e246c0d61 100644
a2cf7d
--- a/sysdeps/powerpc/fpu_control.h
a2cf7d
+++ b/sysdeps/powerpc/fpu_control.h
a2cf7d
@@ -96,35 +96,37 @@ extern fpu_control_t __fpu_control;
a2cf7d
 typedef unsigned int fpu_control_t;
a2cf7d
 
a2cf7d
 /* Macros for accessing the hardware control word.  */
a2cf7d
-# define __FPU_MFFS()						\
a2cf7d
-  ({register double __fr;					\
a2cf7d
-    __asm__ __volatile__("mffs %0" : "=f" (__fr));		\
a2cf7d
-    __fr;							\
a2cf7d
-  })
a2cf7d
-
a2cf7d
 # define _FPU_GETCW(cw)						\
a2cf7d
   ({union { double __d; unsigned long long __ll; } __u;		\
a2cf7d
-    __u.__d = __FPU_MFFS();					\
a2cf7d
+    __asm__ __volatile__("mffs %0" : "=f" (__u.__d));		\
a2cf7d
     (cw) = (fpu_control_t) __u.__ll;				\
a2cf7d
     (fpu_control_t) __u.__ll;					\
a2cf7d
   })
a2cf7d
 
a2cf7d
-#ifdef _ARCH_PWR9
a2cf7d
-# define __FPU_MFFSL()						\
a2cf7d
-  ({register double __fr;					\
a2cf7d
-    __asm__ __volatile__("mffsl %0" : "=f" (__fr));		\
a2cf7d
-    __fr;							\
a2cf7d
+# define _FPU_GET_RC_ISA300()						\
a2cf7d
+  ({union { double __d; unsigned long long __ll; } __u;			\
a2cf7d
+    __asm__ __volatile__(						\
a2cf7d
+      ".machine push; .machine \"power9\"; mffsl %0; .machine pop" 	\
a2cf7d
+      : "=f" (__u.__d));						\
a2cf7d
+    (fpu_control_t) (__u.__ll & _FPU_MASK_RC);				\
a2cf7d
   })
a2cf7d
-#else
a2cf7d
-# define __FPU_MFFSL() __FPU_MFFS()
a2cf7d
-#endif
a2cf7d
-    
a2cf7d
-# define _FPU_GET_RC()						\
a2cf7d
-  ({union { double __d; unsigned long long __ll; } __u;		\
a2cf7d
-    __u.__d = __FPU_MFFSL();					\
a2cf7d
-    __u.__ll &= _FPU_MASK_RC;					\
a2cf7d
-    (fpu_control_t) __u.__ll;					\
a2cf7d
+
a2cf7d
+# ifdef _ARCH_PWR9
a2cf7d
+#  define _FPU_GET_RC() _FPU_GET_RC_ISA300()
a2cf7d
+# elif defined __BUILTIN_CPU_SUPPORTS__
a2cf7d
+#  define _FPU_GET_RC()							\
a2cf7d
+  ({fpu_control_t __rc;							\
a2cf7d
+    __rc = __glibc_likely (__builtin_cpu_supports ("arch_3_00"))	\
a2cf7d
+      ? _FPU_GET_RC_ISA300 ()						\
a2cf7d
+      : _FPU_GETCW (__rc) & _FPU_MASK_RC;				\
a2cf7d
+    __rc;								\
a2cf7d
+  })
a2cf7d
+# else
a2cf7d
+#  define _FPU_GET_RC()						\
a2cf7d
+  ({fpu_control_t __rc = _FPU_GETCW (__rc) & _FPU_MASK_RC;	\
a2cf7d
+    __rc;							\
a2cf7d
   })
a2cf7d
+# endif
a2cf7d
 
a2cf7d
 # define _FPU_SETCW(cw)						\
a2cf7d
   { union { double __d; unsigned long long __ll; } __u;		\