|
|
00db10 |
commit 87868c2418fb74357757e3b739ce5b76b17a8929
|
|
|
00db10 |
Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
|
|
|
00db10 |
Date: Wed Jun 25 11:54:31 2014 -0500
|
|
|
00db10 |
|
|
|
00db10 |
PowerPC: Align power7 memcpy using VSX to quadword
|
|
|
00db10 |
|
|
|
00db10 |
This patch changes power7 memcpy to use VSX instructions only when
|
|
|
00db10 |
memory is aligned to quardword. It is to avoid unaligned kernel traps
|
|
|
00db10 |
on non-cacheable memory (for instance, memory-mapped I/O).
|
|
|
00db10 |
|
|
|
00db10 |
diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S
|
|
|
00db10 |
index 52c2a6b..e540fea 100644
|
|
|
00db10 |
--- a/sysdeps/powerpc/powerpc32/power7/memcpy.S
|
|
|
00db10 |
+++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S
|
|
|
00db10 |
@@ -38,8 +38,8 @@ EALIGN (memcpy, 5, 0)
|
|
|
00db10 |
ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move
|
|
|
00db10 |
code. */
|
|
|
00db10 |
|
|
|
00db10 |
- andi. 11,3,7 /* Check alignment of DST. */
|
|
|
00db10 |
- clrlwi 10,4,29 /* Check alignment of SRC. */
|
|
|
00db10 |
+ andi. 11,3,15 /* Check alignment of DST. */
|
|
|
00db10 |
+ clrlwi 10,4,28 /* Check alignment of SRC. */
|
|
|
00db10 |
cmplw cr6,10,11 /* SRC and DST alignments match? */
|
|
|
00db10 |
mr 12,4
|
|
|
00db10 |
mr 31,5
|
|
|
00db10 |
diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
|
|
|
00db10 |
index bbfd381..58d9b12 100644
|
|
|
00db10 |
--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
|
|
|
00db10 |
+++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
|
|
|
00db10 |
@@ -36,16 +36,11 @@ EALIGN (memcpy, 5, 0)
|
|
|
00db10 |
ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move
|
|
|
00db10 |
code. */
|
|
|
00db10 |
|
|
|
00db10 |
-#ifdef __LITTLE_ENDIAN__
|
|
|
00db10 |
-/* In little-endian mode, power7 takes an alignment trap on any lxvd2x
|
|
|
00db10 |
- or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy
|
|
|
00db10 |
- loop is only used for quadword aligned copies. */
|
|
|
00db10 |
+/* Align copies using VSX instructions to quadword. It is to avoid alignment
|
|
|
00db10 |
+ traps when memcpy is used on non-cacheable memory (for instance, memory
|
|
|
00db10 |
+ mapped I/O). */
|
|
|
00db10 |
andi. 10,3,15
|
|
|
00db10 |
clrldi 11,4,60
|
|
|
00db10 |
-#else
|
|
|
00db10 |
- andi. 10,3,7 /* Check alignment of DST. */
|
|
|
00db10 |
- clrldi 11,4,61 /* Check alignment of SRC. */
|
|
|
00db10 |
-#endif
|
|
|
00db10 |
cmpld cr6,10,11 /* SRC and DST alignments match? */
|
|
|
00db10 |
|
|
|
00db10 |
mr dst,3
|
|
|
00db10 |
@@ -53,13 +48,9 @@ EALIGN (memcpy, 5, 0)
|
|
|
00db10 |
beq L(aligned_copy)
|
|
|
00db10 |
|
|
|
00db10 |
mtocrf 0x01,0
|
|
|
00db10 |
-#ifdef __LITTLE_ENDIAN__
|
|
|
00db10 |
clrldi 0,0,60
|
|
|
00db10 |
-#else
|
|
|
00db10 |
- clrldi 0,0,61
|
|
|
00db10 |
-#endif
|
|
|
00db10 |
|
|
|
00db10 |
-/* Get the DST and SRC aligned to 8 bytes (16 for little-endian). */
|
|
|
00db10 |
+/* Get the DST and SRC aligned to 16 bytes. */
|
|
|
00db10 |
1:
|
|
|
00db10 |
bf 31,2f
|
|
|
00db10 |
lbz 6,0(src)
|
|
|
00db10 |
@@ -79,14 +70,12 @@ EALIGN (memcpy, 5, 0)
|
|
|
00db10 |
stw 6,0(dst)
|
|
|
00db10 |
addi dst,dst,4
|
|
|
00db10 |
8:
|
|
|
00db10 |
-#ifdef __LITTLE_ENDIAN__
|
|
|
00db10 |
bf 28,16f
|
|
|
00db10 |
ld 6,0(src)
|
|
|
00db10 |
addi src,src,8
|
|
|
00db10 |
std 6,0(dst)
|
|
|
00db10 |
addi dst,dst,8
|
|
|
00db10 |
16:
|
|
|
00db10 |
-#endif
|
|
|
00db10 |
subf cnt,0,cnt
|
|
|
00db10 |
|
|
|
00db10 |
/* Main aligned copy loop. Copies 128 bytes at a time. */
|
|
|
00db10 |
@@ -298,9 +287,6 @@ L(copy_LE_8):
|
|
|
00db10 |
.align 4
|
|
|
00db10 |
L(copy_GE_32_unaligned):
|
|
|
00db10 |
clrldi 0,0,60 /* Number of bytes until the 1st dst quadword. */
|
|
|
00db10 |
-#ifndef __LITTLE_ENDIAN__
|
|
|
00db10 |
- andi. 10,3,15 /* Check alignment of DST (against quadwords). */
|
|
|
00db10 |
-#endif
|
|
|
00db10 |
srdi 9,cnt,4 /* Number of full quadwords remaining. */
|
|
|
00db10 |
|
|
|
00db10 |
beq L(copy_GE_32_unaligned_cont)
|