Blob Blame History Raw
From 987322bc0b170570a7bd539480252453fcc7a6f5 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
Date: Fri, 29 Mar 2013 18:15:28 -0500
Subject: [PATCH 23/42] PowerPC: remove branch prediction from rint
 implementation

The branch prediction hints is actually hurts performance in this case.
The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52'
is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a
general floating point function, expected input is not bounded and then
it is better to let the hardware handle the branches.

(backported from commit 60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f)

This backport does not include the benchmark tests from the original
commit.
---
 sysdeps/powerpc/powerpc32/fpu/s_rint.S  | 6 +++---
 sysdeps/powerpc/powerpc32/fpu/s_rintf.S | 6 +++---
 sysdeps/powerpc/powerpc64/fpu/s_rint.S  | 6 +++---
 sysdeps/powerpc/powerpc64/fpu/s_rintf.S | 6 +++---
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
index 0ab9e6c..c28e7f6 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
@@ -45,14 +45,14 @@ ENTRY (__rint)
 	fsub	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO52)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
index ddb47db..69aed9c 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
@@ -41,14 +41,14 @@ ENTRY (__rintf)
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO23)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
index db62405..560905a 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
@@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0)
 	fsub	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO52)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
index 248649d..c120d91 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
@@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0)
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO23)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */
-- 
1.7.11.7