c65238
commit 87868c2418fb74357757e3b739ce5b76b17a8929
c65238
Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
c65238
Date:   Wed Jun 25 11:54:31 2014 -0500
c65238
c65238
    PowerPC: Align power7 memcpy using VSX to quadword
c65238
    
c65238
    This patch changes power7 memcpy to use VSX instructions only when
c65238
    memory is aligned to quardword.  It is to avoid unaligned kernel traps
c65238
    on non-cacheable memory (for instance, memory-mapped I/O).
c65238
c65238
diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S
c65238
index 52c2a6b..e540fea 100644
c65238
--- a/sysdeps/powerpc/powerpc32/power7/memcpy.S
c65238
+++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S
c65238
@@ -38,8 +38,8 @@ EALIGN (memcpy, 5, 0)
c65238
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
c65238
 				    code.  */
c65238
 
c65238
-	andi.   11,3,7	      /* Check alignment of DST.  */
c65238
-	clrlwi  10,4,29	      /* Check alignment of SRC.  */
c65238
+	andi.   11,3,15	      /* Check alignment of DST.  */
c65238
+	clrlwi  10,4,28	      /* Check alignment of SRC.  */
c65238
 	cmplw   cr6,10,11     /* SRC and DST alignments match?  */
c65238
 	mr	12,4
c65238
 	mr	31,5
c65238
diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
c65238
index bbfd381..58d9b12 100644
c65238
--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
c65238
+++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
c65238
@@ -36,16 +36,11 @@ EALIGN (memcpy, 5, 0)
c65238
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
c65238
 				    code.  */
c65238
 
c65238
-#ifdef __LITTLE_ENDIAN__
c65238
-/* In little-endian mode, power7 takes an alignment trap on any lxvd2x
c65238
-   or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy
c65238
-   loop is only used for quadword aligned copies.  */
c65238
+/* Align copies using VSX instructions to quadword. It is to avoid alignment
c65238
+   traps when memcpy is used on non-cacheable memory (for instance, memory
c65238
+   mapped I/O).  */
c65238
 	andi.	10,3,15
c65238
 	clrldi	11,4,60
c65238
-#else
c65238
-	andi.	10,3,7		/* Check alignment of DST.  */
c65238
-	clrldi	11,4,61		/* Check alignment of SRC.  */
c65238
-#endif
c65238
 	cmpld	cr6,10,11	/* SRC and DST alignments match?  */
c65238
 
c65238
 	mr	dst,3
c65238
@@ -53,13 +48,9 @@ EALIGN (memcpy, 5, 0)
c65238
 	beq	L(aligned_copy)
c65238
 
c65238
 	mtocrf	0x01,0
c65238
-#ifdef __LITTLE_ENDIAN__
c65238
 	clrldi	0,0,60
c65238
-#else
c65238
-	clrldi	0,0,61
c65238
-#endif
c65238
 
c65238
-/* Get the DST and SRC aligned to 8 bytes (16 for little-endian).  */
c65238
+/* Get the DST and SRC aligned to 16 bytes.  */
c65238
 1:
c65238
 	bf	31,2f
c65238
 	lbz	6,0(src)
c65238
@@ -79,14 +70,12 @@ EALIGN (memcpy, 5, 0)
c65238
 	stw	6,0(dst)
c65238
 	addi	dst,dst,4
c65238
 8:
c65238
-#ifdef __LITTLE_ENDIAN__
c65238
 	bf	28,16f
c65238
 	ld	6,0(src)
c65238
 	addi	src,src,8
c65238
 	std	6,0(dst)
c65238
 	addi	dst,dst,8
c65238
 16:
c65238
-#endif
c65238
 	subf	cnt,0,cnt
c65238
 
c65238
 /* Main aligned copy loop. Copies 128 bytes at a time. */
c65238
@@ -298,9 +287,6 @@ L(copy_LE_8):
c65238
 	.align	4
c65238
 L(copy_GE_32_unaligned):
c65238
 	clrldi	0,0,60	      /* Number of bytes until the 1st dst quadword.  */
c65238
-#ifndef __LITTLE_ENDIAN__
c65238
-	andi.	10,3,15	      /* Check alignment of DST (against quadwords).  */
c65238
-#endif
c65238
 	srdi	9,cnt,4	      /* Number of full quadwords remaining.  */
c65238
 
c65238
 	beq	L(copy_GE_32_unaligned_cont)