00db10
From 561857f53a543684862c2b6d2308bc13affa2a18 Mon Sep 17 00:00:00 2001
00db10
From: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
00db10
Date: Tue, 18 Aug 2015 22:40:56 +0530
00db10
Subject: [PATCH] powerpc: Handle worstcase behavior in strstr() for POWER7
00db10
00db10
Instead of checking needle length, constant 'n' number of comparisons
00db10
is checked to fall back to default implementation.  This patch is tested
00db10
on powerpc64 and powerpc64le.
00db10
00db10
2015-08-25  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
00db10
00db10
	* sysdeps/powerpc/powerpc64/power7/strstr.S: Handle worst case.
00db10
00db10
(cherry picked from commit fe7faec3e56a8dd64f78023a2f4a74fc8d42e79f)
00db10
---
00db10
 ChangeLog                                 |  4 ++++
00db10
 sysdeps/powerpc/powerpc64/power7/strstr.S | 22 +++++++++++++++-------
00db10
 2 files changed, 19 insertions(+), 7 deletions(-)
00db10
00db10
diff --git a/ChangeLog b/ChangeLog
00db10
index cf95a84..5cbd6d6 100644
00db10
diff --git a/sysdeps/powerpc/powerpc64/power7/strstr.S b/sysdeps/powerpc/powerpc64/power7/strstr.S
00db10
index bfb0c49..fb3c810 100644
00db10
--- a/sysdeps/powerpc/powerpc64/power7/strstr.S
00db10
+++ b/sysdeps/powerpc/powerpc64/power7/strstr.S
00db10
@@ -23,6 +23,8 @@
00db10
 /* The performance gain is obtained using aligned memory access, load
00db10
  * doubleword and usage of cmpb instruction for quicker comparison.  */
00db10
 
00db10
+#define ITERATIONS	64
00db10
+
00db10
 #ifndef STRLEN
00db10
 /* For builds with no IFUNC support, local calls should be made to internal
00db10
    GLIBC symbol (created by libc_hidden_builtin_def).  */
00db10
@@ -62,6 +64,8 @@ EALIGN (strstr, 4, 0)
00db10
 	cfi_offset(r30, -16)
00db10
 	std	r29, -24(r1)		/* Save callers register r29.  */
00db10
 	cfi_offset(r29, -24)
00db10
+	std	r28, -32(r1)		/* Save callers register r28.  */
00db10
+	cfi_offset(r28, -32)
00db10
 	std	r0, 16(r1)		/* Store the link register.  */
00db10
 	cfi_offset(lr, 16)
00db10
 	stdu	r1, -FRAMESIZE(r1)	/* Create the stack frame.  */
00db10
@@ -69,7 +73,6 @@ EALIGN (strstr, 4, 0)
00db10
 
00db10
 	dcbt	0, r3
00db10
 	dcbt	0, r4
00db10
-
00db10
 	cmpdi	cr7, r3, 0
00db10
 	beq	cr7, L(retnull)
00db10
 	cmpdi	cr7, r4, 0
00db10
@@ -84,10 +87,6 @@ EALIGN (strstr, 4, 0)
00db10
 	cmpdi	cr7, r3, 0	/* If search str is null.  */
00db10
 	beq	cr7, L(ret_r3)
00db10
 
00db10
-	/* Call __strstr_ppc if needle len > 2048 */
00db10
-	cmpdi	cr7, r3, 2048
00db10
-	bgt	cr7, L(default)
00db10
-
00db10
 	mr	r31, r3
00db10
 	mr	r4, r3
00db10
 	mr	r3, r29
00db10
@@ -105,7 +104,8 @@ EALIGN (strstr, 4, 0)
00db10
 	/* If first char of search str is not present.  */
00db10
 	cmpdi	cr7, r3, 0
00db10
 	ble	cr7, L(end)
00db10
-
00db10
+	/* Reg r28 is used to count the number of iterations. */
00db10
+	li	r28, 0
00db10
 	rldicl	r8, r3, 0, 52	/* Page cross check.  */
00db10
 	cmpldi	cr7, r8, 4096-16
00db10
 	bgt	cr7, L(bytebybyte)
00db10
@@ -324,6 +324,10 @@ L(return4):
00db10
 	.align	4
00db10
 L(begin):
00db10
 	mr	r3, r8
00db10
+	/* When our iterations exceed ITERATIONS,fall back to default. */
00db10
+	addi	r28, r28, 1
00db10
+	cmpdi	cr7, r28, ITERATIONS
00db10
+	beq	cr7, L(default)
00db10
 	lbz	r4, 0(r30)
00db10
 	bl	STRCHR
00db10
 	nop
00db10
@@ -423,6 +427,10 @@ L(nextbyte):
00db10
 	cmpdi	cr7, r9, -1
00db10
 	beq	cr7, L(end)
00db10
 	addi	r3, r4, 1
00db10
+	/* When our iterations exceed ITERATIONS,fall back to default. */
00db10
+	addi	r28, r28, 1
00db10
+	cmpdi	cr7, r28, ITERATIONS
00db10
+	beq	cr7, L(default)
00db10
 	lbz	r4, 0(r30)
00db10
 	bl	STRCHR
00db10
 	nop
00db10
@@ -490,7 +498,6 @@ L(retnull):
00db10
 
00db10
 	.align	4
00db10
 L(default):
00db10
-	mr	r3, r29
00db10
 	mr	r4, r30
00db10
 	bl	__strstr_ppc
00db10
 	nop
00db10
@@ -500,6 +507,7 @@ L(end):
00db10
 	addi	r1, r1, FRAMESIZE	/* Restore stack pointer.  */
00db10
 	cfi_adjust_cfa_offset(-FRAMESIZE)
00db10
 	ld	r0, 16(r1)	/* Restore the saved link register.  */
00db10
+	ld	r28, -32(r1)	/* Restore callers save register r28.  */
00db10
 	ld	r29, -24(r1)	/* Restore callers save register r29.  */
00db10
 	ld	r30, -16(r1)	/* Restore callers save register r30.  */
00db10
 	ld	r31, -8(r1)	/* Restore callers save register r31.  */
00db10
-- 
00db10
2.1.0
00db10