|
|
ce426f |
commit 656b84c2ef525e3b69802c9057c5897e327b0332
|
|
|
ce426f |
Author: Wilco Dijkstra <wdijkstr@arm.com>
|
|
|
ce426f |
Date: Thu Aug 7 16:29:55 2014 +0000
|
|
|
ce426f |
|
|
|
ce426f |
This patch adds new function libc_feholdsetround_noex_aarch64_ctx, enabling
|
|
|
ce426f |
further optimization. libc_feholdsetround_aarch64_ctx now only needs to
|
|
|
ce426f |
read the FPCR in the typical case, avoiding a redundant FPSR read.
|
|
|
ce426f |
Performance results show a good improvement (5-10% on sin()) on cores with
|
|
|
ce426f |
expensive FPCR/FPSR instructions.
|
|
|
ce426f |
|
|
|
ce426f |
diff --git a/ports/sysdeps/aarch64/fpu/math_private.h b/ports/sysdeps/aarch64/fpu/math_private.h
|
|
|
ce426f |
index 023c9d0..b13c030 100644
|
|
|
ce426f |
--- a/ports/sysdeps/aarch64/fpu/math_private.h
|
|
|
ce426f |
+++ b/ports/sysdeps/aarch64/fpu/math_private.h
|
|
|
ce426f |
@@ -228,12 +228,9 @@ static __always_inline void
|
|
|
ce426f |
libc_feholdsetround_aarch64_ctx (struct rm_ctx *ctx, int r)
|
|
|
ce426f |
{
|
|
|
ce426f |
fpu_control_t fpcr;
|
|
|
ce426f |
- fpu_fpsr_t fpsr;
|
|
|
ce426f |
int round;
|
|
|
ce426f |
|
|
|
ce426f |
_FPU_GETCW (fpcr);
|
|
|
ce426f |
- _FPU_GETFPSR (fpsr);
|
|
|
ce426f |
- ctx->env.__fpsr = fpsr;
|
|
|
ce426f |
|
|
|
ce426f |
/* Check whether rounding modes are different. */
|
|
|
ce426f |
round = (fpcr ^ r) & _FPU_FPCR_RM_MASK;
|
|
|
ce426f |
@@ -264,6 +261,33 @@ libc_feresetround_aarch64_ctx (struct rm_ctx *ctx)
|
|
|
ce426f |
#define libc_feresetroundl_ctx libc_feresetround_aarch64_ctx
|
|
|
ce426f |
|
|
|
ce426f |
static __always_inline void
|
|
|
ce426f |
+libc_feholdsetround_noex_aarch64_ctx (struct rm_ctx *ctx, int r)
|
|
|
ce426f |
+{
|
|
|
ce426f |
+ fpu_control_t fpcr;
|
|
|
ce426f |
+ fpu_fpsr_t fpsr;
|
|
|
ce426f |
+ int round;
|
|
|
ce426f |
+
|
|
|
ce426f |
+ _FPU_GETCW (fpcr);
|
|
|
ce426f |
+ _FPU_GETFPSR (fpsr);
|
|
|
ce426f |
+ ctx->env.__fpsr = fpsr;
|
|
|
ce426f |
+
|
|
|
ce426f |
+ /* Check whether rounding modes are different. */
|
|
|
ce426f |
+ round = (fpcr ^ r) & _FPU_FPCR_RM_MASK;
|
|
|
ce426f |
+ ctx->updated_status = round != 0;
|
|
|
ce426f |
+
|
|
|
ce426f |
+ /* Set the rounding mode if changed. */
|
|
|
ce426f |
+ if (__glibc_unlikely (round != 0))
|
|
|
ce426f |
+ {
|
|
|
ce426f |
+ ctx->env.__fpcr = fpcr;
|
|
|
ce426f |
+ _FPU_SETCW (fpcr ^ round);
|
|
|
ce426f |
+ }
|
|
|
ce426f |
+}
|
|
|
ce426f |
+
|
|
|
ce426f |
+#define libc_feholdsetround_noex_ctx libc_feholdsetround_noex_aarch64_ctx
|
|
|
ce426f |
+#define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_aarch64_ctx
|
|
|
ce426f |
+#define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_aarch64_ctx
|
|
|
ce426f |
+
|
|
|
ce426f |
+static __always_inline void
|
|
|
ce426f |
libc_feresetround_noex_aarch64_ctx (struct rm_ctx *ctx)
|
|
|
ce426f |
{
|
|
|
ce426f |
/* Restore the rounding mode if updated. */
|