8ae002
From 561857f53a543684862c2b6d2308bc13affa2a18 Mon Sep 17 00:00:00 2001
8ae002
From: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
8ae002
Date: Tue, 18 Aug 2015 22:40:56 +0530
8ae002
Subject: [PATCH] powerpc: Handle worstcase behavior in strstr() for POWER7
8ae002
8ae002
Instead of checking needle length, constant 'n' number of comparisons
8ae002
is checked to fall back to default implementation.  This patch is tested
8ae002
on powerpc64 and powerpc64le.
8ae002
8ae002
2015-08-25  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
8ae002
8ae002
	* sysdeps/powerpc/powerpc64/power7/strstr.S: Handle worst case.
8ae002
8ae002
(cherry picked from commit fe7faec3e56a8dd64f78023a2f4a74fc8d42e79f)
8ae002
---
8ae002
 ChangeLog                                 |  4 ++++
8ae002
 sysdeps/powerpc/powerpc64/power7/strstr.S | 22 +++++++++++++++-------
8ae002
 2 files changed, 19 insertions(+), 7 deletions(-)
8ae002
8ae002
diff --git a/ChangeLog b/ChangeLog
8ae002
index cf95a84..5cbd6d6 100644
8ae002
diff --git a/sysdeps/powerpc/powerpc64/power7/strstr.S b/sysdeps/powerpc/powerpc64/power7/strstr.S
8ae002
index bfb0c49..fb3c810 100644
8ae002
--- a/sysdeps/powerpc/powerpc64/power7/strstr.S
8ae002
+++ b/sysdeps/powerpc/powerpc64/power7/strstr.S
8ae002
@@ -23,6 +23,8 @@
8ae002
 /* The performance gain is obtained using aligned memory access, load
8ae002
  * doubleword and usage of cmpb instruction for quicker comparison.  */
8ae002
 
8ae002
+#define ITERATIONS	64
8ae002
+
8ae002
 #ifndef STRLEN
8ae002
 /* For builds with no IFUNC support, local calls should be made to internal
8ae002
    GLIBC symbol (created by libc_hidden_builtin_def).  */
8ae002
@@ -62,6 +64,8 @@ EALIGN (strstr, 4, 0)
8ae002
 	cfi_offset(r30, -16)
8ae002
 	std	r29, -24(r1)		/* Save callers register r29.  */
8ae002
 	cfi_offset(r29, -24)
8ae002
+	std	r28, -32(r1)		/* Save callers register r28.  */
8ae002
+	cfi_offset(r28, -32)
8ae002
 	std	r0, 16(r1)		/* Store the link register.  */
8ae002
 	cfi_offset(lr, 16)
8ae002
 	stdu	r1, -FRAMESIZE(r1)	/* Create the stack frame.  */
8ae002
@@ -69,7 +73,6 @@ EALIGN (strstr, 4, 0)
8ae002
 
8ae002
 	dcbt	0, r3
8ae002
 	dcbt	0, r4
8ae002
-
8ae002
 	cmpdi	cr7, r3, 0
8ae002
 	beq	cr7, L(retnull)
8ae002
 	cmpdi	cr7, r4, 0
8ae002
@@ -84,10 +87,6 @@ EALIGN (strstr, 4, 0)
8ae002
 	cmpdi	cr7, r3, 0	/* If search str is null.  */
8ae002
 	beq	cr7, L(ret_r3)
8ae002
 
8ae002
-	/* Call __strstr_ppc if needle len > 2048 */
8ae002
-	cmpdi	cr7, r3, 2048
8ae002
-	bgt	cr7, L(default)
8ae002
-
8ae002
 	mr	r31, r3
8ae002
 	mr	r4, r3
8ae002
 	mr	r3, r29
8ae002
@@ -105,7 +104,8 @@ EALIGN (strstr, 4, 0)
8ae002
 	/* If first char of search str is not present.  */
8ae002
 	cmpdi	cr7, r3, 0
8ae002
 	ble	cr7, L(end)
8ae002
-
8ae002
+	/* Reg r28 is used to count the number of iterations. */
8ae002
+	li	r28, 0
8ae002
 	rldicl	r8, r3, 0, 52	/* Page cross check.  */
8ae002
 	cmpldi	cr7, r8, 4096-16
8ae002
 	bgt	cr7, L(bytebybyte)
8ae002
@@ -324,6 +324,10 @@ L(return4):
8ae002
 	.align	4
8ae002
 L(begin):
8ae002
 	mr	r3, r8
8ae002
+	/* When our iterations exceed ITERATIONS,fall back to default. */
8ae002
+	addi	r28, r28, 1
8ae002
+	cmpdi	cr7, r28, ITERATIONS
8ae002
+	beq	cr7, L(default)
8ae002
 	lbz	r4, 0(r30)
8ae002
 	bl	STRCHR
8ae002
 	nop
8ae002
@@ -423,6 +427,10 @@ L(nextbyte):
8ae002
 	cmpdi	cr7, r9, -1
8ae002
 	beq	cr7, L(end)
8ae002
 	addi	r3, r4, 1
8ae002
+	/* When our iterations exceed ITERATIONS,fall back to default. */
8ae002
+	addi	r28, r28, 1
8ae002
+	cmpdi	cr7, r28, ITERATIONS
8ae002
+	beq	cr7, L(default)
8ae002
 	lbz	r4, 0(r30)
8ae002
 	bl	STRCHR
8ae002
 	nop
8ae002
@@ -490,7 +498,6 @@ L(retnull):
8ae002
 
8ae002
 	.align	4
8ae002
 L(default):
8ae002
-	mr	r3, r29
8ae002
 	mr	r4, r30
8ae002
 	bl	__strstr_ppc
8ae002
 	nop
8ae002
@@ -500,6 +507,7 @@ L(end):
8ae002
 	addi	r1, r1, FRAMESIZE	/* Restore stack pointer.  */
8ae002
 	cfi_adjust_cfa_offset(-FRAMESIZE)
8ae002
 	ld	r0, 16(r1)	/* Restore the saved link register.  */
8ae002
+	ld	r28, -32(r1)	/* Restore callers save register r28.  */
8ae002
 	ld	r29, -24(r1)	/* Restore callers save register r29.  */
8ae002
 	ld	r30, -16(r1)	/* Restore callers save register r30.  */
8ae002
 	ld	r31, -8(r1)	/* Restore callers save register r31.  */
8ae002
-- 
8ae002
2.1.0
8ae002