|
|
00db10 |
From 561857f53a543684862c2b6d2308bc13affa2a18 Mon Sep 17 00:00:00 2001
|
|
|
00db10 |
From: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
|
|
|
00db10 |
Date: Tue, 18 Aug 2015 22:40:56 +0530
|
|
|
00db10 |
Subject: [PATCH] powerpc: Handle worstcase behavior in strstr() for POWER7
|
|
|
00db10 |
|
|
|
00db10 |
Instead of checking needle length, constant 'n' number of comparisons
|
|
|
00db10 |
is checked to fall back to default implementation. This patch is tested
|
|
|
00db10 |
on powerpc64 and powerpc64le.
|
|
|
00db10 |
|
|
|
00db10 |
2015-08-25 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
|
|
|
00db10 |
|
|
|
00db10 |
* sysdeps/powerpc/powerpc64/power7/strstr.S: Handle worst case.
|
|
|
00db10 |
|
|
|
00db10 |
(cherry picked from commit fe7faec3e56a8dd64f78023a2f4a74fc8d42e79f)
|
|
|
00db10 |
---
|
|
|
00db10 |
ChangeLog | 4 ++++
|
|
|
00db10 |
sysdeps/powerpc/powerpc64/power7/strstr.S | 22 +++++++++++++++-------
|
|
|
00db10 |
2 files changed, 19 insertions(+), 7 deletions(-)
|
|
|
00db10 |
|
|
|
00db10 |
diff --git a/ChangeLog b/ChangeLog
|
|
|
00db10 |
index cf95a84..5cbd6d6 100644
|
|
|
00db10 |
diff --git a/sysdeps/powerpc/powerpc64/power7/strstr.S b/sysdeps/powerpc/powerpc64/power7/strstr.S
|
|
|
00db10 |
index bfb0c49..fb3c810 100644
|
|
|
00db10 |
--- a/sysdeps/powerpc/powerpc64/power7/strstr.S
|
|
|
00db10 |
+++ b/sysdeps/powerpc/powerpc64/power7/strstr.S
|
|
|
00db10 |
@@ -23,6 +23,8 @@
|
|
|
00db10 |
/* The performance gain is obtained using aligned memory access, load
|
|
|
00db10 |
* doubleword and usage of cmpb instruction for quicker comparison. */
|
|
|
00db10 |
|
|
|
00db10 |
+#define ITERATIONS 64
|
|
|
00db10 |
+
|
|
|
00db10 |
#ifndef STRLEN
|
|
|
00db10 |
/* For builds with no IFUNC support, local calls should be made to internal
|
|
|
00db10 |
GLIBC symbol (created by libc_hidden_builtin_def). */
|
|
|
00db10 |
@@ -62,6 +64,8 @@ EALIGN (strstr, 4, 0)
|
|
|
00db10 |
cfi_offset(r30, -16)
|
|
|
00db10 |
std r29, -24(r1) /* Save callers register r29. */
|
|
|
00db10 |
cfi_offset(r29, -24)
|
|
|
00db10 |
+ std r28, -32(r1) /* Save callers register r28. */
|
|
|
00db10 |
+ cfi_offset(r28, -32)
|
|
|
00db10 |
std r0, 16(r1) /* Store the link register. */
|
|
|
00db10 |
cfi_offset(lr, 16)
|
|
|
00db10 |
stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */
|
|
|
00db10 |
@@ -69,7 +73,6 @@ EALIGN (strstr, 4, 0)
|
|
|
00db10 |
|
|
|
00db10 |
dcbt 0, r3
|
|
|
00db10 |
dcbt 0, r4
|
|
|
00db10 |
-
|
|
|
00db10 |
cmpdi cr7, r3, 0
|
|
|
00db10 |
beq cr7, L(retnull)
|
|
|
00db10 |
cmpdi cr7, r4, 0
|
|
|
00db10 |
@@ -84,10 +87,6 @@ EALIGN (strstr, 4, 0)
|
|
|
00db10 |
cmpdi cr7, r3, 0 /* If search str is null. */
|
|
|
00db10 |
beq cr7, L(ret_r3)
|
|
|
00db10 |
|
|
|
00db10 |
- /* Call __strstr_ppc if needle len > 2048 */
|
|
|
00db10 |
- cmpdi cr7, r3, 2048
|
|
|
00db10 |
- bgt cr7, L(default)
|
|
|
00db10 |
-
|
|
|
00db10 |
mr r31, r3
|
|
|
00db10 |
mr r4, r3
|
|
|
00db10 |
mr r3, r29
|
|
|
00db10 |
@@ -105,7 +104,8 @@ EALIGN (strstr, 4, 0)
|
|
|
00db10 |
/* If first char of search str is not present. */
|
|
|
00db10 |
cmpdi cr7, r3, 0
|
|
|
00db10 |
ble cr7, L(end)
|
|
|
00db10 |
-
|
|
|
00db10 |
+ /* Reg r28 is used to count the number of iterations. */
|
|
|
00db10 |
+ li r28, 0
|
|
|
00db10 |
rldicl r8, r3, 0, 52 /* Page cross check. */
|
|
|
00db10 |
cmpldi cr7, r8, 4096-16
|
|
|
00db10 |
bgt cr7, L(bytebybyte)
|
|
|
00db10 |
@@ -324,6 +324,10 @@ L(return4):
|
|
|
00db10 |
.align 4
|
|
|
00db10 |
L(begin):
|
|
|
00db10 |
mr r3, r8
|
|
|
00db10 |
+ /* When our iterations exceed ITERATIONS,fall back to default. */
|
|
|
00db10 |
+ addi r28, r28, 1
|
|
|
00db10 |
+ cmpdi cr7, r28, ITERATIONS
|
|
|
00db10 |
+ beq cr7, L(default)
|
|
|
00db10 |
lbz r4, 0(r30)
|
|
|
00db10 |
bl STRCHR
|
|
|
00db10 |
nop
|
|
|
00db10 |
@@ -423,6 +427,10 @@ L(nextbyte):
|
|
|
00db10 |
cmpdi cr7, r9, -1
|
|
|
00db10 |
beq cr7, L(end)
|
|
|
00db10 |
addi r3, r4, 1
|
|
|
00db10 |
+ /* When our iterations exceed ITERATIONS,fall back to default. */
|
|
|
00db10 |
+ addi r28, r28, 1
|
|
|
00db10 |
+ cmpdi cr7, r28, ITERATIONS
|
|
|
00db10 |
+ beq cr7, L(default)
|
|
|
00db10 |
lbz r4, 0(r30)
|
|
|
00db10 |
bl STRCHR
|
|
|
00db10 |
nop
|
|
|
00db10 |
@@ -490,7 +498,6 @@ L(retnull):
|
|
|
00db10 |
|
|
|
00db10 |
.align 4
|
|
|
00db10 |
L(default):
|
|
|
00db10 |
- mr r3, r29
|
|
|
00db10 |
mr r4, r30
|
|
|
00db10 |
bl __strstr_ppc
|
|
|
00db10 |
nop
|
|
|
00db10 |
@@ -500,6 +507,7 @@ L(end):
|
|
|
00db10 |
addi r1, r1, FRAMESIZE /* Restore stack pointer. */
|
|
|
00db10 |
cfi_adjust_cfa_offset(-FRAMESIZE)
|
|
|
00db10 |
ld r0, 16(r1) /* Restore the saved link register. */
|
|
|
00db10 |
+ ld r28, -32(r1) /* Restore callers save register r28. */
|
|
|
00db10 |
ld r29, -24(r1) /* Restore callers save register r29. */
|
|
|
00db10 |
ld r30, -16(r1) /* Restore callers save register r30. */
|
|
|
00db10 |
ld r31, -8(r1) /* Restore callers save register r31. */
|
|
|
00db10 |
--
|
|
|
00db10 |
2.1.0
|
|
|
00db10 |
|