| From 987322bc0b170570a7bd539480252453fcc7a6f5 Mon Sep 17 00:00:00 2001 |
| From: Adhemerval Zanella <azanella@linux.vnet.ibm.com> |
| Date: Fri, 29 Mar 2013 18:15:28 -0500 |
| Subject: [PATCH 23/42] PowerPC: remove branch prediction from rint |
| implementation |
| |
| The branch prediction hints is actually hurts performance in this case. |
| The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52' |
| is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a |
| general floating point function, expected input is not bounded and then |
| it is better to let the hardware handle the branches. |
| |
| (backported from commit 60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f) |
| |
| This backport does not include the benchmark tests from the original |
| commit. |
| |
| sysdeps/powerpc/powerpc32/fpu/s_rint.S | 6 +++--- |
| sysdeps/powerpc/powerpc32/fpu/s_rintf.S | 6 +++--- |
| sysdeps/powerpc/powerpc64/fpu/s_rint.S | 6 +++--- |
| sysdeps/powerpc/powerpc64/fpu/s_rintf.S | 6 +++--- |
| 4 files changed, 12 insertions(+), 12 deletions(-) |
| |
| diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S |
| index 0ab9e6c..c28e7f6 100644 |
| |
| |
| @@ -45,14 +45,14 @@ ENTRY (__rint) |
| fsub fp12,fp13,fp13 /* generate 0.0 */ |
| fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ |
| fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ |
| - bnllr- cr7 |
| - bng- cr6,.L4 |
| + bnllr cr7 |
| + bng cr6,.L4 |
| fadd fp1,fp1,fp13 /* x+= TWO52; */ |
| fsub fp1,fp1,fp13 /* x-= TWO52; */ |
| fabs fp1,fp1 /* if (x == 0.0) */ |
| blr /* x = 0.0; */ |
| .L4: |
| - bnllr- cr6 /* if (x < 0.0) */ |
| + bnllr cr6 /* if (x < 0.0) */ |
| fsub fp1,fp1,fp13 /* x-= TWO52; */ |
| fadd fp1,fp1,fp13 /* x+= TWO52; */ |
| fnabs fp1,fp1 /* if (x == 0.0) */ |
| diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S |
| index ddb47db..69aed9c 100644 |
| |
| |
| @@ -41,14 +41,14 @@ ENTRY (__rintf) |
| fsubs fp12,fp13,fp13 /* generate 0.0 */ |
| fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ |
| fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ |
| - bnllr- cr7 |
| - bng- cr6,.L4 |
| + bnllr cr7 |
| + bng cr6,.L4 |
| fadds fp1,fp1,fp13 /* x+= TWO23; */ |
| fsubs fp1,fp1,fp13 /* x-= TWO23; */ |
| fabs fp1,fp1 /* if (x == 0.0) */ |
| blr /* x = 0.0; */ |
| .L4: |
| - bnllr- cr6 /* if (x < 0.0) */ |
| + bnllr cr6 /* if (x < 0.0) */ |
| fsubs fp1,fp1,fp13 /* x-= TWO23; */ |
| fadds fp1,fp1,fp13 /* x+= TWO23; */ |
| fnabs fp1,fp1 /* if (x == 0.0) */ |
| diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S |
| index db62405..560905a 100644 |
| |
| |
| @@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0) |
| fsub fp12,fp13,fp13 /* generate 0.0 */ |
| fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ |
| fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ |
| - bnllr- cr7 |
| - bng- cr6,.L4 |
| + bnllr cr7 |
| + bng cr6,.L4 |
| fadd fp1,fp1,fp13 /* x+= TWO52; */ |
| fsub fp1,fp1,fp13 /* x-= TWO52; */ |
| fabs fp1,fp1 /* if (x == 0.0) */ |
| blr /* x = 0.0; */ |
| .L4: |
| - bnllr- cr6 /* if (x < 0.0) */ |
| + bnllr cr6 /* if (x < 0.0) */ |
| fsub fp1,fp1,fp13 /* x-= TWO52; */ |
| fadd fp1,fp1,fp13 /* x+= TWO52; */ |
| fnabs fp1,fp1 /* if (x == 0.0) */ |
| diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S |
| index 248649d..c120d91 100644 |
| |
| |
| @@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0) |
| fsubs fp12,fp13,fp13 /* generate 0.0 */ |
| fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ |
| fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ |
| - bnllr- cr7 |
| - bng- cr6,.L4 |
| + bnllr cr7 |
| + bng cr6,.L4 |
| fadds fp1,fp1,fp13 /* x+= TWO23; */ |
| fsubs fp1,fp1,fp13 /* x-= TWO23; */ |
| fabs fp1,fp1 /* if (x == 0.0) */ |
| blr /* x = 0.0; */ |
| .L4: |
| - bnllr- cr6 /* if (x < 0.0) */ |
| + bnllr cr6 /* if (x < 0.0) */ |
| fsubs fp1,fp1,fp13 /* x-= TWO23; */ |
| fadds fp1,fp1,fp13 /* x+= TWO23; */ |
| fnabs fp1,fp1 /* if (x == 0.0) */ |
| -- |
| 1.7.11.7 |
| |