00db10
commit 87868c2418fb74357757e3b739ce5b76b17a8929
00db10
Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
00db10
Date:   Wed Jun 25 11:54:31 2014 -0500
00db10
00db10
    PowerPC: Align power7 memcpy using VSX to quadword
00db10
    
00db10
    This patch changes power7 memcpy to use VSX instructions only when
00db10
    memory is aligned to quardword.  It is to avoid unaligned kernel traps
00db10
    on non-cacheable memory (for instance, memory-mapped I/O).
00db10
00db10
diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S
00db10
index 52c2a6b..e540fea 100644
00db10
--- a/sysdeps/powerpc/powerpc32/power7/memcpy.S
00db10
+++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S
00db10
@@ -38,8 +38,8 @@ EALIGN (memcpy, 5, 0)
00db10
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
00db10
 				    code.  */
00db10
 
00db10
-	andi.   11,3,7	      /* Check alignment of DST.  */
00db10
-	clrlwi  10,4,29	      /* Check alignment of SRC.  */
00db10
+	andi.   11,3,15	      /* Check alignment of DST.  */
00db10
+	clrlwi  10,4,28	      /* Check alignment of SRC.  */
00db10
 	cmplw   cr6,10,11     /* SRC and DST alignments match?  */
00db10
 	mr	12,4
00db10
 	mr	31,5
00db10
diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
00db10
index bbfd381..58d9b12 100644
00db10
--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
00db10
+++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
00db10
@@ -36,16 +36,11 @@ EALIGN (memcpy, 5, 0)
00db10
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
00db10
 				    code.  */
00db10
 
00db10
-#ifdef __LITTLE_ENDIAN__
00db10
-/* In little-endian mode, power7 takes an alignment trap on any lxvd2x
00db10
-   or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy
00db10
-   loop is only used for quadword aligned copies.  */
00db10
+/* Align copies using VSX instructions to quadword. It is to avoid alignment
00db10
+   traps when memcpy is used on non-cacheable memory (for instance, memory
00db10
+   mapped I/O).  */
00db10
 	andi.	10,3,15
00db10
 	clrldi	11,4,60
00db10
-#else
00db10
-	andi.	10,3,7		/* Check alignment of DST.  */
00db10
-	clrldi	11,4,61		/* Check alignment of SRC.  */
00db10
-#endif
00db10
 	cmpld	cr6,10,11	/* SRC and DST alignments match?  */
00db10
 
00db10
 	mr	dst,3
00db10
@@ -53,13 +48,9 @@ EALIGN (memcpy, 5, 0)
00db10
 	beq	L(aligned_copy)
00db10
 
00db10
 	mtocrf	0x01,0
00db10
-#ifdef __LITTLE_ENDIAN__
00db10
 	clrldi	0,0,60
00db10
-#else
00db10
-	clrldi	0,0,61
00db10
-#endif
00db10
 
00db10
-/* Get the DST and SRC aligned to 8 bytes (16 for little-endian).  */
00db10
+/* Get the DST and SRC aligned to 16 bytes.  */
00db10
 1:
00db10
 	bf	31,2f
00db10
 	lbz	6,0(src)
00db10
@@ -79,14 +70,12 @@ EALIGN (memcpy, 5, 0)
00db10
 	stw	6,0(dst)
00db10
 	addi	dst,dst,4
00db10
 8:
00db10
-#ifdef __LITTLE_ENDIAN__
00db10
 	bf	28,16f
00db10
 	ld	6,0(src)
00db10
 	addi	src,src,8
00db10
 	std	6,0(dst)
00db10
 	addi	dst,dst,8
00db10
 16:
00db10
-#endif
00db10
 	subf	cnt,0,cnt
00db10
 
00db10
 /* Main aligned copy loop. Copies 128 bytes at a time. */
00db10
@@ -298,9 +287,6 @@ L(copy_LE_8):
00db10
 	.align	4
00db10
 L(copy_GE_32_unaligned):
00db10
 	clrldi	0,0,60	      /* Number of bytes until the 1st dst quadword.  */
00db10
-#ifndef __LITTLE_ENDIAN__
00db10
-	andi.	10,3,15	      /* Check alignment of DST (against quadwords).  */
00db10
-#endif
00db10
 	srdi	9,cnt,4	      /* Number of full quadwords remaining.  */
00db10
 
00db10
 	beq	L(copy_GE_32_unaligned_cont)