|
|
00db10 |
commit e6d90d675d4cae810be76a5ff41b8ae8bd6bc914
|
|
|
00db10 |
Author: Wilco Dijkstra <wdijkstr@arm.com>
|
|
|
00db10 |
Date: Mon Jun 23 17:15:41 2014 +0100
|
|
|
00db10 |
|
|
|
00db10 |
Add generic HAVE_RM_CTX implementation
|
|
|
00db10 |
|
|
|
00db10 |
This patch adds a generic implementation of HAVE_RM_CTX using standard
|
|
|
00db10 |
fenv calls. As a result math functions using SET_RESTORE_ROUND* macros
|
|
|
00db10 |
do not suffer from a large slowdown on targets which do not implement
|
|
|
00db10 |
optimized libc_fe*_ctx inline functions. Most of the libc_fe* inline
|
|
|
00db10 |
functions are now unused and could be removed in the future (there are
|
|
|
00db10 |
a few math functions left which use a mixture of standard fenv calls
|
|
|
00db10 |
and libc_fe* inline functions - they could be updated to use
|
|
|
00db10 |
SET_RESTORE_ROUND or improved to avoid expensive fenv manipulations
|
|
|
00db10 |
across just a few FP instructions).
|
|
|
00db10 |
|
|
|
00db10 |
libc_feholdsetround*_noex_ctx is added to enable better optimization of
|
|
|
00db10 |
SET_RESTORE_ROUND_NOEX* implementations.
|
|
|
00db10 |
|
|
|
00db10 |
Performance measurements on ARM and x86 of sin() show significant gains
|
|
|
00db10 |
over the current default, fairly close to a highly optimized fenv_private:
|
|
|
00db10 |
|
|
|
00db10 |
ARM x86
|
|
|
00db10 |
no fenv_private : 100% 100%
|
|
|
00db10 |
generic HAVE_RM_CTX : 250% 350%
|
|
|
00db10 |
fenv_private (CTX) : 250% 450%
|
|
|
00db10 |
|
|
|
00db10 |
2014-06-23 Will Newton <will.newton@linaro.org>
|
|
|
00db10 |
Wilco <wdijkstr@arm.com>
|
|
|
00db10 |
|
|
|
00db10 |
* sysdeps/generic/math_private.h: Add generic HAVE_RM_CTX
|
|
|
00db10 |
implementation. Include get-rounding-mode.h.
|
|
|
00db10 |
[!HAVE_RM_CTX]: Define HAVE_RM_CTX to zero.
|
|
|
00db10 |
[!libc_feholdsetround_noex_ctx]: Define
|
|
|
00db10 |
libc_feholdsetround_noex_ctx.
|
|
|
00db10 |
[!libc_feholdsetround_noexf_ctx]: Define
|
|
|
00db10 |
libc_feholdsetround_noexf_ctx.
|
|
|
00db10 |
[!libc_feholdsetround_noexl_ctx]: Define
|
|
|
00db10 |
libc_feholdsetround_noexl_ctx.
|
|
|
00db10 |
(libc_feholdsetround_ctx): New function.
|
|
|
00db10 |
(libc_feresetround_ctx): New function.
|
|
|
00db10 |
(libc_feholdsetround_noex_ctx): New function.
|
|
|
00db10 |
(libc_feresetround_noex_ctx): New function.
|
|
|
00db10 |
|
|
|
00db10 |
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
|
|
|
00db10 |
index 9b881a3..94c1e4a 100644
|
|
|
00db10 |
--- a/sysdeps/generic/math_private.h
|
|
|
00db10 |
+++ b/sysdeps/generic/math_private.h
|
|
|
00db10 |
@@ -20,6 +20,7 @@
|
|
|
00db10 |
#include <stdint.h>
|
|
|
00db10 |
#include <sys/types.h>
|
|
|
00db10 |
#include <fenv.h>
|
|
|
00db10 |
+#include <get-rounding-mode.h>
|
|
|
00db10 |
|
|
|
00db10 |
/* The original fdlibm code used statements like:
|
|
|
00db10 |
n0 = ((*(int*)&one)>>29)^1; * index of high word *
|
|
|
00db10 |
@@ -551,12 +552,26 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
|
|
|
00db10 |
# define libc_feresetround_noexl libc_fesetenvl
|
|
|
00db10 |
#endif
|
|
|
00db10 |
|
|
|
00db10 |
+#ifndef HAVE_RM_CTX
|
|
|
00db10 |
+# define HAVE_RM_CTX 0
|
|
|
00db10 |
+#endif
|
|
|
00db10 |
+
|
|
|
00db10 |
#if HAVE_RM_CTX
|
|
|
00db10 |
/* Set/Restore Rounding Modes only when necessary. If defined, these functions
|
|
|
00db10 |
set/restore floating point state only if the state needed within the lexical
|
|
|
00db10 |
block is different from the current state. This saves a lot of time when
|
|
|
00db10 |
the floating point unit is much slower than the fixed point units. */
|
|
|
00db10 |
|
|
|
00db10 |
+# ifndef libc_feholdsetround_noex_ctx
|
|
|
00db10 |
+# define libc_feholdsetround_noex_ctx libc_feholdsetround_ctx
|
|
|
00db10 |
+# endif
|
|
|
00db10 |
+# ifndef libc_feholdsetround_noexf_ctx
|
|
|
00db10 |
+# define libc_feholdsetround_noexf_ctx libc_feholdsetroundf_ctx
|
|
|
00db10 |
+# endif
|
|
|
00db10 |
+# ifndef libc_feholdsetround_noexl_ctx
|
|
|
00db10 |
+# define libc_feholdsetround_noexl_ctx libc_feholdsetroundl_ctx
|
|
|
00db10 |
+# endif
|
|
|
00db10 |
+
|
|
|
00db10 |
# ifndef libc_feresetround_noex_ctx
|
|
|
00db10 |
# define libc_feresetround_noex_ctx libc_fesetenv_ctx
|
|
|
00db10 |
# endif
|
|
|
00db10 |
@@ -567,24 +582,80 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
|
|
|
00db10 |
# define libc_feresetround_noexl_ctx libc_fesetenvl_ctx
|
|
|
00db10 |
# endif
|
|
|
00db10 |
|
|
|
00db10 |
-# ifndef libc_feholdsetround_53bit_ctx
|
|
|
00db10 |
-# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx
|
|
|
00db10 |
-# endif
|
|
|
00db10 |
+#else
|
|
|
00db10 |
|
|
|
00db10 |
-# ifndef libc_feresetround_53bit_ctx
|
|
|
00db10 |
-# define libc_feresetround_53bit_ctx libc_feresetround_ctx
|
|
|
00db10 |
-# endif
|
|
|
00db10 |
+/* Default implementation using standard fenv functions.
|
|
|
00db10 |
+ Avoid unnecessary rounding mode changes by first checking the
|
|
|
00db10 |
+ current rounding mode. Note the use of __glibc_unlikely is
|
|
|
00db10 |
+ important for performance. */
|
|
|
00db10 |
|
|
|
00db10 |
-# define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \
|
|
|
00db10 |
- struct rm_ctx ctx __attribute__((cleanup(CLEANUPFUNC ## _ctx))); \
|
|
|
00db10 |
- ROUNDFUNC ## _ctx (&ctx, (RM))
|
|
|
00db10 |
-#else
|
|
|
00db10 |
-# define SET_RESTORE_ROUND_GENERIC(RM, ROUNDFUNC, CLEANUPFUNC) \
|
|
|
00db10 |
- fenv_t __libc_save_rm __attribute__((cleanup(CLEANUPFUNC))); \
|
|
|
00db10 |
- ROUNDFUNC (&__libc_save_rm, (RM))
|
|
|
00db10 |
+static __always_inline void
|
|
|
00db10 |
+libc_feholdsetround_ctx (struct rm_ctx *ctx, int round)
|
|
|
00db10 |
+{
|
|
|
00db10 |
+ ctx->updated_status = false;
|
|
|
00db10 |
+
|
|
|
00db10 |
+ /* Update rounding mode only if different. */
|
|
|
00db10 |
+ if (__glibc_unlikely (round != get_rounding_mode ()))
|
|
|
00db10 |
+ {
|
|
|
00db10 |
+ ctx->updated_status = true;
|
|
|
00db10 |
+ fegetenv (&ctx->env);
|
|
|
00db10 |
+ fesetround (round);
|
|
|
00db10 |
+ }
|
|
|
00db10 |
+}
|
|
|
00db10 |
+
|
|
|
00db10 |
+static __always_inline void
|
|
|
00db10 |
+libc_feresetround_ctx (struct rm_ctx *ctx)
|
|
|
00db10 |
+{
|
|
|
00db10 |
+ /* Restore the rounding mode if updated. */
|
|
|
00db10 |
+ if (__glibc_unlikely (ctx->updated_status))
|
|
|
00db10 |
+ feupdateenv (&ctx->env);
|
|
|
00db10 |
+}
|
|
|
00db10 |
+
|
|
|
00db10 |
+static __always_inline void
|
|
|
00db10 |
+libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round)
|
|
|
00db10 |
+{
|
|
|
00db10 |
+ /* Save exception flags and rounding mode. */
|
|
|
00db10 |
+ fegetenv (&ctx->env);
|
|
|
00db10 |
+
|
|
|
00db10 |
+ /* Update rounding mode only if different. */
|
|
|
00db10 |
+ if (__glibc_unlikely (round != get_rounding_mode ()))
|
|
|
00db10 |
+ fesetround (round);
|
|
|
00db10 |
+}
|
|
|
00db10 |
+
|
|
|
00db10 |
+static __always_inline void
|
|
|
00db10 |
+libc_feresetround_noex_ctx (struct rm_ctx *ctx)
|
|
|
00db10 |
+{
|
|
|
00db10 |
+ /* Restore exception flags and rounding mode. */
|
|
|
00db10 |
+ fesetenv (&ctx->env);
|
|
|
00db10 |
+}
|
|
|
00db10 |
+
|
|
|
00db10 |
+# define libc_feholdsetroundf_ctx libc_feholdsetround_ctx
|
|
|
00db10 |
+# define libc_feholdsetroundl_ctx libc_feholdsetround_ctx
|
|
|
00db10 |
+# define libc_feresetroundf_ctx libc_feresetround_ctx
|
|
|
00db10 |
+# define libc_feresetroundl_ctx libc_feresetround_ctx
|
|
|
00db10 |
+
|
|
|
00db10 |
+# define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ctx
|
|
|
00db10 |
+# define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ctx
|
|
|
00db10 |
+# define libc_feresetround_noexf_ctx libc_feresetround_noex_ctx
|
|
|
00db10 |
+# define libc_feresetround_noexl_ctx libc_feresetround_noex_ctx
|
|
|
00db10 |
+
|
|
|
00db10 |
+#endif
|
|
|
00db10 |
+
|
|
|
00db10 |
+#ifndef libc_feholdsetround_53bit_ctx
|
|
|
00db10 |
+# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx
|
|
|
00db10 |
+#endif
|
|
|
00db10 |
+#ifndef libc_feresetround_53bit_ctx
|
|
|
00db10 |
+# define libc_feresetround_53bit_ctx libc_feresetround_ctx
|
|
|
00db10 |
#endif
|
|
|
00db10 |
|
|
|
00db10 |
-/* Save and restore the rounding mode within a lexical block. */
|
|
|
00db10 |
+#define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \
|
|
|
00db10 |
+ struct rm_ctx ctx __attribute__((cleanup (CLEANUPFUNC ## _ctx))); \
|
|
|
00db10 |
+ ROUNDFUNC ## _ctx (&ctx, (RM))
|
|
|
00db10 |
+
|
|
|
00db10 |
+/* Set the rounding mode within a lexical block. Restore the rounding mode to
|
|
|
00db10 |
+ the value at the start of the block. The exception mode must be preserved.
|
|
|
00db10 |
+ Exceptions raised within the block must be set in the exception flags.
|
|
|
00db10 |
+ Non-stop mode may be enabled inside the block. */
|
|
|
00db10 |
|
|
|
00db10 |
#define SET_RESTORE_ROUND(RM) \
|
|
|
00db10 |
SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround)
|
|
|
00db10 |
@@ -593,15 +664,21 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
|
|
|
00db10 |
#define SET_RESTORE_ROUNDL(RM) \
|
|
|
00db10 |
SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetroundl)
|
|
|
00db10 |
|
|
|
00db10 |
-/* Save and restore the rounding mode within a lexical block, and also
|
|
|
00db10 |
- the set of exceptions raised within the block may be discarded. */
|
|
|
00db10 |
+/* Set the rounding mode within a lexical block. Restore the rounding mode to
|
|
|
00db10 |
+ the value at the start of the block. The exception mode must be preserved.
|
|
|
00db10 |
+ Exceptions raised within the block must be discarded, and exception flags
|
|
|
00db10 |
+ are restored to the value at the start of the block.
|
|
|
00db10 |
+ Non-stop mode may be enabled inside the block. */
|
|
|
00db10 |
|
|
|
00db10 |
#define SET_RESTORE_ROUND_NOEX(RM) \
|
|
|
00db10 |
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround_noex)
|
|
|
00db10 |
+ SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noex, \
|
|
|
00db10 |
+ libc_feresetround_noex)
|
|
|
00db10 |
#define SET_RESTORE_ROUND_NOEXF(RM) \
|
|
|
00db10 |
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundf, libc_feresetround_noexf)
|
|
|
00db10 |
+ SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noexf, \
|
|
|
00db10 |
+ libc_feresetround_noexf)
|
|
|
00db10 |
#define SET_RESTORE_ROUND_NOEXL(RM) \
|
|
|
00db10 |
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetround_noexl)
|
|
|
00db10 |
+ SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noexl, \
|
|
|
00db10 |
+ libc_feresetround_noexl)
|
|
|
00db10 |
|
|
|
00db10 |
/* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits. */
|
|
|
00db10 |
#define SET_RESTORE_ROUND_53BIT(RM) \
|