|
|
ce426f |
From 987322bc0b170570a7bd539480252453fcc7a6f5 Mon Sep 17 00:00:00 2001
|
|
|
ce426f |
From: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
|
|
|
ce426f |
Date: Fri, 29 Mar 2013 18:15:28 -0500
|
|
|
ce426f |
Subject: [PATCH 23/42] PowerPC: remove branch prediction from rint
|
|
|
ce426f |
implementation
|
|
|
ce426f |
|
|
|
ce426f |
The branch prediction hints is actually hurts performance in this case.
|
|
|
ce426f |
The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52'
|
|
|
ce426f |
is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a
|
|
|
ce426f |
general floating point function, expected input is not bounded and then
|
|
|
ce426f |
it is better to let the hardware handle the branches.
|
|
|
ce426f |
|
|
|
ce426f |
(backported from commit 60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f)
|
|
|
ce426f |
|
|
|
ce426f |
This backport does not include the benchmark tests from the original
|
|
|
ce426f |
commit.
|
|
|
ce426f |
---
|
|
|
ce426f |
sysdeps/powerpc/powerpc32/fpu/s_rint.S | 6 +++---
|
|
|
ce426f |
sysdeps/powerpc/powerpc32/fpu/s_rintf.S | 6 +++---
|
|
|
ce426f |
sysdeps/powerpc/powerpc64/fpu/s_rint.S | 6 +++---
|
|
|
ce426f |
sysdeps/powerpc/powerpc64/fpu/s_rintf.S | 6 +++---
|
|
|
ce426f |
4 files changed, 12 insertions(+), 12 deletions(-)
|
|
|
ce426f |
|
|
|
ce426f |
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S
|
|
|
ce426f |
index 0ab9e6c..c28e7f6 100644
|
|
|
ce426f |
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S
|
|
|
ce426f |
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S
|
|
|
ce426f |
@@ -45,14 +45,14 @@ ENTRY (__rint)
|
|
|
ce426f |
fsub fp12,fp13,fp13 /* generate 0.0 */
|
|
|
ce426f |
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
|
|
|
ce426f |
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
|
|
ce426f |
- bnllr- cr7
|
|
|
ce426f |
- bng- cr6,.L4
|
|
|
ce426f |
+ bnllr cr7
|
|
|
ce426f |
+ bng cr6,.L4
|
|
|
ce426f |
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
|
|
ce426f |
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
|
|
ce426f |
fabs fp1,fp1 /* if (x == 0.0) */
|
|
|
ce426f |
blr /* x = 0.0; */
|
|
|
ce426f |
.L4:
|
|
|
ce426f |
- bnllr- cr6 /* if (x < 0.0) */
|
|
|
ce426f |
+ bnllr cr6 /* if (x < 0.0) */
|
|
|
ce426f |
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
|
|
ce426f |
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
|
|
ce426f |
fnabs fp1,fp1 /* if (x == 0.0) */
|
|
|
ce426f |
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
|
|
|
ce426f |
index ddb47db..69aed9c 100644
|
|
|
ce426f |
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
|
|
|
ce426f |
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
|
|
|
ce426f |
@@ -41,14 +41,14 @@ ENTRY (__rintf)
|
|
|
ce426f |
fsubs fp12,fp13,fp13 /* generate 0.0 */
|
|
|
ce426f |
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
|
|
|
ce426f |
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
|
|
ce426f |
- bnllr- cr7
|
|
|
ce426f |
- bng- cr6,.L4
|
|
|
ce426f |
+ bnllr cr7
|
|
|
ce426f |
+ bng cr6,.L4
|
|
|
ce426f |
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
|
|
ce426f |
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
|
|
ce426f |
fabs fp1,fp1 /* if (x == 0.0) */
|
|
|
ce426f |
blr /* x = 0.0; */
|
|
|
ce426f |
.L4:
|
|
|
ce426f |
- bnllr- cr6 /* if (x < 0.0) */
|
|
|
ce426f |
+ bnllr cr6 /* if (x < 0.0) */
|
|
|
ce426f |
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
|
|
ce426f |
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
|
|
ce426f |
fnabs fp1,fp1 /* if (x == 0.0) */
|
|
|
ce426f |
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S
|
|
|
ce426f |
index db62405..560905a 100644
|
|
|
ce426f |
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S
|
|
|
ce426f |
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S
|
|
|
ce426f |
@@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0)
|
|
|
ce426f |
fsub fp12,fp13,fp13 /* generate 0.0 */
|
|
|
ce426f |
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
|
|
|
ce426f |
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
|
|
ce426f |
- bnllr- cr7
|
|
|
ce426f |
- bng- cr6,.L4
|
|
|
ce426f |
+ bnllr cr7
|
|
|
ce426f |
+ bng cr6,.L4
|
|
|
ce426f |
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
|
|
ce426f |
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
|
|
ce426f |
fabs fp1,fp1 /* if (x == 0.0) */
|
|
|
ce426f |
blr /* x = 0.0; */
|
|
|
ce426f |
.L4:
|
|
|
ce426f |
- bnllr- cr6 /* if (x < 0.0) */
|
|
|
ce426f |
+ bnllr cr6 /* if (x < 0.0) */
|
|
|
ce426f |
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
|
|
ce426f |
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
|
|
ce426f |
fnabs fp1,fp1 /* if (x == 0.0) */
|
|
|
ce426f |
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
|
|
|
ce426f |
index 248649d..c120d91 100644
|
|
|
ce426f |
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
|
|
|
ce426f |
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
|
|
|
ce426f |
@@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0)
|
|
|
ce426f |
fsubs fp12,fp13,fp13 /* generate 0.0 */
|
|
|
ce426f |
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
|
|
|
ce426f |
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
|
|
ce426f |
- bnllr- cr7
|
|
|
ce426f |
- bng- cr6,.L4
|
|
|
ce426f |
+ bnllr cr7
|
|
|
ce426f |
+ bng cr6,.L4
|
|
|
ce426f |
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
|
|
ce426f |
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
|
|
ce426f |
fabs fp1,fp1 /* if (x == 0.0) */
|
|
|
ce426f |
blr /* x = 0.0; */
|
|
|
ce426f |
.L4:
|
|
|
ce426f |
- bnllr- cr6 /* if (x < 0.0) */
|
|
|
ce426f |
+ bnllr cr6 /* if (x < 0.0) */
|
|
|
ce426f |
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
|
|
ce426f |
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
|
|
ce426f |
fnabs fp1,fp1 /* if (x == 0.0) */
|
|
|
ce426f |
--
|
|
|
ce426f |
1.7.11.7
|
|
|
ce426f |
|