ce426f
commit 87868c2418fb74357757e3b739ce5b76b17a8929
ce426f
Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
ce426f
Date:   Wed Jun 25 11:54:31 2014 -0500
ce426f
ce426f
    PowerPC: Align power7 memcpy using VSX to quadword
ce426f
    
ce426f
    This patch changes power7 memcpy to use VSX instructions only when
ce426f
    memory is aligned to quardword.  It is to avoid unaligned kernel traps
ce426f
    on non-cacheable memory (for instance, memory-mapped I/O).
ce426f
ce426f
diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S
ce426f
index 52c2a6b..e540fea 100644
ce426f
--- a/sysdeps/powerpc/powerpc32/power7/memcpy.S
ce426f
+++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S
ce426f
@@ -38,8 +38,8 @@ EALIGN (memcpy, 5, 0)
ce426f
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
ce426f
 				    code.  */
ce426f
 
ce426f
-	andi.   11,3,7	      /* Check alignment of DST.  */
ce426f
-	clrlwi  10,4,29	      /* Check alignment of SRC.  */
ce426f
+	andi.   11,3,15	      /* Check alignment of DST.  */
ce426f
+	clrlwi  10,4,28	      /* Check alignment of SRC.  */
ce426f
 	cmplw   cr6,10,11     /* SRC and DST alignments match?  */
ce426f
 	mr	12,4
ce426f
 	mr	31,5
ce426f
diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
ce426f
index bbfd381..58d9b12 100644
ce426f
--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
ce426f
+++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
ce426f
@@ -36,16 +36,11 @@ EALIGN (memcpy, 5, 0)
ce426f
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
ce426f
 				    code.  */
ce426f
 
ce426f
-#ifdef __LITTLE_ENDIAN__
ce426f
-/* In little-endian mode, power7 takes an alignment trap on any lxvd2x
ce426f
-   or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy
ce426f
-   loop is only used for quadword aligned copies.  */
ce426f
+/* Align copies using VSX instructions to quadword. It is to avoid alignment
ce426f
+   traps when memcpy is used on non-cacheable memory (for instance, memory
ce426f
+   mapped I/O).  */
ce426f
 	andi.	10,3,15
ce426f
 	clrldi	11,4,60
ce426f
-#else
ce426f
-	andi.	10,3,7		/* Check alignment of DST.  */
ce426f
-	clrldi	11,4,61		/* Check alignment of SRC.  */
ce426f
-#endif
ce426f
 	cmpld	cr6,10,11	/* SRC and DST alignments match?  */
ce426f
 
ce426f
 	mr	dst,3
ce426f
@@ -53,13 +48,9 @@ EALIGN (memcpy, 5, 0)
ce426f
 	beq	L(aligned_copy)
ce426f
 
ce426f
 	mtocrf	0x01,0
ce426f
-#ifdef __LITTLE_ENDIAN__
ce426f
 	clrldi	0,0,60
ce426f
-#else
ce426f
-	clrldi	0,0,61
ce426f
-#endif
ce426f
 
ce426f
-/* Get the DST and SRC aligned to 8 bytes (16 for little-endian).  */
ce426f
+/* Get the DST and SRC aligned to 16 bytes.  */
ce426f
 1:
ce426f
 	bf	31,2f
ce426f
 	lbz	6,0(src)
ce426f
@@ -79,14 +70,12 @@ EALIGN (memcpy, 5, 0)
ce426f
 	stw	6,0(dst)
ce426f
 	addi	dst,dst,4
ce426f
 8:
ce426f
-#ifdef __LITTLE_ENDIAN__
ce426f
 	bf	28,16f
ce426f
 	ld	6,0(src)
ce426f
 	addi	src,src,8
ce426f
 	std	6,0(dst)
ce426f
 	addi	dst,dst,8
ce426f
 16:
ce426f
-#endif
ce426f
 	subf	cnt,0,cnt
ce426f
 
ce426f
 /* Main aligned copy loop. Copies 128 bytes at a time. */
ce426f
@@ -298,9 +287,6 @@ L(copy_LE_8):
ce426f
 	.align	4
ce426f
 L(copy_GE_32_unaligned):
ce426f
 	clrldi	0,0,60	      /* Number of bytes until the 1st dst quadword.  */
ce426f
-#ifndef __LITTLE_ENDIAN__
ce426f
-	andi.	10,3,15	      /* Check alignment of DST (against quadwords).  */
ce426f
-#endif
ce426f
 	srdi	9,cnt,4	      /* Number of full quadwords remaining.  */
ce426f
 
ce426f
 	beq	L(copy_GE_32_unaligned_cont)