ce426f
From 561857f53a543684862c2b6d2308bc13affa2a18 Mon Sep 17 00:00:00 2001
ce426f
From: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
ce426f
Date: Tue, 18 Aug 2015 22:40:56 +0530
ce426f
Subject: [PATCH] powerpc: Handle worstcase behavior in strstr() for POWER7
ce426f
ce426f
Instead of checking needle length, constant 'n' number of comparisons
ce426f
is checked to fall back to default implementation.  This patch is tested
ce426f
on powerpc64 and powerpc64le.
ce426f
ce426f
2015-08-25  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
ce426f
ce426f
	* sysdeps/powerpc/powerpc64/power7/strstr.S: Handle worst case.
ce426f
ce426f
(cherry picked from commit fe7faec3e56a8dd64f78023a2f4a74fc8d42e79f)
ce426f
---
ce426f
 ChangeLog                                 |  4 ++++
ce426f
 sysdeps/powerpc/powerpc64/power7/strstr.S | 22 +++++++++++++++-------
ce426f
 2 files changed, 19 insertions(+), 7 deletions(-)
ce426f
ce426f
diff --git a/ChangeLog b/ChangeLog
ce426f
index cf95a84..5cbd6d6 100644
ce426f
diff --git a/sysdeps/powerpc/powerpc64/power7/strstr.S b/sysdeps/powerpc/powerpc64/power7/strstr.S
ce426f
index bfb0c49..fb3c810 100644
ce426f
--- a/sysdeps/powerpc/powerpc64/power7/strstr.S
ce426f
+++ b/sysdeps/powerpc/powerpc64/power7/strstr.S
ce426f
@@ -23,6 +23,8 @@
ce426f
 /* The performance gain is obtained using aligned memory access, load
ce426f
  * doubleword and usage of cmpb instruction for quicker comparison.  */
ce426f
 
ce426f
+#define ITERATIONS	64
ce426f
+
ce426f
 #ifndef STRLEN
ce426f
 /* For builds with no IFUNC support, local calls should be made to internal
ce426f
    GLIBC symbol (created by libc_hidden_builtin_def).  */
ce426f
@@ -62,6 +64,8 @@ EALIGN (strstr, 4, 0)
ce426f
 	cfi_offset(r30, -16)
ce426f
 	std	r29, -24(r1)		/* Save callers register r29.  */
ce426f
 	cfi_offset(r29, -24)
ce426f
+	std	r28, -32(r1)		/* Save callers register r28.  */
ce426f
+	cfi_offset(r28, -32)
ce426f
 	std	r0, 16(r1)		/* Store the link register.  */
ce426f
 	cfi_offset(lr, 16)
ce426f
 	stdu	r1, -FRAMESIZE(r1)	/* Create the stack frame.  */
ce426f
@@ -69,7 +73,6 @@ EALIGN (strstr, 4, 0)
ce426f
 
ce426f
 	dcbt	0, r3
ce426f
 	dcbt	0, r4
ce426f
-
ce426f
 	cmpdi	cr7, r3, 0
ce426f
 	beq	cr7, L(retnull)
ce426f
 	cmpdi	cr7, r4, 0
ce426f
@@ -84,10 +87,6 @@ EALIGN (strstr, 4, 0)
ce426f
 	cmpdi	cr7, r3, 0	/* If search str is null.  */
ce426f
 	beq	cr7, L(ret_r3)
ce426f
 
ce426f
-	/* Call __strstr_ppc if needle len > 2048 */
ce426f
-	cmpdi	cr7, r3, 2048
ce426f
-	bgt	cr7, L(default)
ce426f
-
ce426f
 	mr	r31, r3
ce426f
 	mr	r4, r3
ce426f
 	mr	r3, r29
ce426f
@@ -105,7 +104,8 @@ EALIGN (strstr, 4, 0)
ce426f
 	/* If first char of search str is not present.  */
ce426f
 	cmpdi	cr7, r3, 0
ce426f
 	ble	cr7, L(end)
ce426f
-
ce426f
+	/* Reg r28 is used to count the number of iterations. */
ce426f
+	li	r28, 0
ce426f
 	rldicl	r8, r3, 0, 52	/* Page cross check.  */
ce426f
 	cmpldi	cr7, r8, 4096-16
ce426f
 	bgt	cr7, L(bytebybyte)
ce426f
@@ -324,6 +324,10 @@ L(return4):
ce426f
 	.align	4
ce426f
 L(begin):
ce426f
 	mr	r3, r8
ce426f
+	/* When our iterations exceed ITERATIONS,fall back to default. */
ce426f
+	addi	r28, r28, 1
ce426f
+	cmpdi	cr7, r28, ITERATIONS
ce426f
+	beq	cr7, L(default)
ce426f
 	lbz	r4, 0(r30)
ce426f
 	bl	STRCHR
ce426f
 	nop
ce426f
@@ -423,6 +427,10 @@ L(nextbyte):
ce426f
 	cmpdi	cr7, r9, -1
ce426f
 	beq	cr7, L(end)
ce426f
 	addi	r3, r4, 1
ce426f
+	/* When our iterations exceed ITERATIONS,fall back to default. */
ce426f
+	addi	r28, r28, 1
ce426f
+	cmpdi	cr7, r28, ITERATIONS
ce426f
+	beq	cr7, L(default)
ce426f
 	lbz	r4, 0(r30)
ce426f
 	bl	STRCHR
ce426f
 	nop
ce426f
@@ -490,7 +498,6 @@ L(retnull):
ce426f
 
ce426f
 	.align	4
ce426f
 L(default):
ce426f
-	mr	r3, r29
ce426f
 	mr	r4, r30
ce426f
 	bl	__strstr_ppc
ce426f
 	nop
ce426f
@@ -500,6 +507,7 @@ L(end):
ce426f
 	addi	r1, r1, FRAMESIZE	/* Restore stack pointer.  */
ce426f
 	cfi_adjust_cfa_offset(-FRAMESIZE)
ce426f
 	ld	r0, 16(r1)	/* Restore the saved link register.  */
ce426f
+	ld	r28, -32(r1)	/* Restore callers save register r28.  */
ce426f
 	ld	r29, -24(r1)	/* Restore callers save register r29.  */
ce426f
 	ld	r30, -16(r1)	/* Restore callers save register r30.  */
ce426f
 	ld	r31, -8(r1)	/* Restore callers save register r31.  */
ce426f
-- 
ce426f
2.1.0
ce426f