Backport of the following patch as a prerequistite for 96d6fd6c4060d739abb1822e7ad633af749532b2: commit 69f13dbf06c6195de0ada8632271d58ca3cf55da Author: Adhemerval Zanella Date: Thu Sep 26 09:29:19 2013 -0500 PowerPC: strcpy/stpcpy optimization for PPC64/POWER7 This patch intends to unify both strcpy and stpcpy implementationsi for PPC64 and PPC64/POWER7. The idead default powerpc64 implementation is to provide both doubleword and word aligned memory access. For PPC64/POWER7 is also provide doubleword and word memory access, remove the branch hints, use the cmpb instruction for compare doubleword/words, and add an optimization for inputs of same alignment. ChangeLog: 2013-10-04 Adhemerval Zanella * sysdeps/powerpc/powerpc64/strcpy.S (strcpy): Add word load/store to provide a boost for large inputs with word alignment. * sysdeps/powerpc/powerpc64/stpcpy.S (__stpcpy): Rewrite implementation based on optimized PPC64 strcpy. * sysdeps/powerpc/powerpc64/power7/strcpy.S: New file: optimized strcpy for PPC64/POWER7 based on both doubleword and word load/store. * sysdeps/powerpc/powerpc64/power7/stpcpy.S: New file: optimized stpcpy for PPC64/POWER7 based on PPC64/POWER7 strcpy. diff --git a/sysdeps/powerpc/powerpc64/power7/stpcpy.S b/sysdeps/powerpc/powerpc64/power7/stpcpy.S new file mode 100644 index 0000000..727dd06 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/stpcpy.S @@ -0,0 +1,24 @@ +/* Optimized stpcpy implementation for PowerPC64/POWER7. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define USE_AS_STPCPY +#include + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/sysdeps/powerpc/powerpc64/power7/strcpy.S b/sysdeps/powerpc/powerpc64/power7/strcpy.S new file mode 100644 index 0000000..5c341a1 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/strcpy.S @@ -0,0 +1,274 @@ +/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER7. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* Implements the function + + char * [r3] strcpy (char *dest [r3], const char *src [r4]) + + or + + char * [r3] strcpy (char *dest [r3], const char *src [r4]) + + if USE_AS_STPCPY is defined. It tries to use aligned memory accesses + when possible using the following algorithm: + + if (((((uintptr_t)dst & 0x7UL) == 0) && ((uintptr_t)src & 0x7UL) == 0)) + goto aligned_doubleword_copy; + if (((((uintptr_t)dst & 0x3UL) == 0) && ((uintptr_t)src & 0x3UL) == 0)) + goto aligned_word_copy; + if (((uintptr_t)dst & 0x7UL) == ((uintptr_t)src & 0x7UL)) + goto same_alignment; + goto unaligned; + + The aligned comparison are made using cmpb instructions. */ + +#ifdef USE_AS_STPCPY +# define FUNC_NAME __stpcpy +#else +# define FUNC_NAME strcpy +#endif + + .machine power7 +EALIGN (FUNC_NAME, 4, 0) + CALL_MCOUNT 2 + +#define rTMP r0 +#ifdef USE_AS_STPCPY +#define rRTN r3 /* pointer to previous word/doubleword in dest */ +#else +#define rRTN r12 /* pointer to previous word/doubleword in dest */ +#endif +#define rSRC r4 /* pointer to previous word/doubleword in src */ +#define rMASK r5 /* mask 0xffffffff | 0xffffffffffffffff */ +#define rWORD r6 /* current word from src */ +#define rALT r7 /* alternate word from src */ +#define rRTNAL r8 /* alignment of return pointer */ +#define rSRCAL r9 /* alignment of source pointer */ +#define rALCNT r10 /* bytes to read to reach 8 bytes alignment */ +#define rSUBAL r11 /* doubleword minus unaligned displacement */ + +#ifndef USE_AS_STPCPY +/* Save the dst pointer to use as return value. */ + mr rRTN, r3 +#endif + or rTMP, rSRC, rRTN + clrldi. rTMP, rTMP, 61 + bne L(check_word_alignment) + b L(aligned_doubleword_copy) + +L(same_alignment): +/* Src and dst with same alignment: align both to doubleword. */ + mr rALCNT, rRTN + lbz rWORD, 0(rSRC) + subfic rSUBAL, rRTNAL, 8 + addi rRTN, rRTN, 1 + addi rSRC, rSRC, 1 + cmpdi cr7, rWORD, 0 + stb rWORD, 0(rALCNT) + beq cr7, L(s2) + + add rALCNT, rALCNT, rSUBAL + subf rALCNT, rRTN, rALCNT + addi rALCNT, rALCNT, 1 + mtctr rALCNT + b L(s1) + + .align 4 +L(s0): + addi rSRC, rSRC, 1 + lbz rWORD, -1(rSRC) + cmpdi cr7, rWORD, 0 + stb rWORD, -1(rALCNT) + beqlr cr7 + mr rRTN, rALCNT +L(s1): + addi rALCNT, rRTN,1 + bdnz L(s0) + b L(aligned_doubleword_copy) + .align 4 +L(s2): + mr rRTN, rALCNT + blr + +/* For doubleword aligned memory, operate using doubleword load and stores. */ + .align 4 +L(aligned_doubleword_copy): + li rMASK, 0 + addi rRTN, rRTN, -8 + ld rWORD, 0(rSRC) + b L(g2) + + .align 4 +L(g0): ldu rALT, 8(rSRC) + stdu rWORD, 8(rRTN) + cmpb rTMP, rALT, rMASK + cmpdi rTMP, 0 + bne L(g1) + ldu rWORD, 8(rSRC) + stdu rALT, 8(rRTN) +L(g2): cmpb rTMP, rWORD, rMASK + cmpdi rTMP, 0 /* If rTMP is 0, no null's have been found. */ + beq L(g0) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g1): +#ifdef __LITTLE_ENDIAN__ + extrdi. rTMP, rALT, 8, 56 + stbu rALT, 8(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 48 + stbu rTMP, 1(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 40 + stbu rTMP, 1(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 32 + stbu rTMP, 1(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 24 + stbu rTMP, 1(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 16 + stbu rTMP, 1(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 8 + stbu rTMP, 1(rRTN) + beqlr- + extrdi rTMP, rALT, 8, 0 + stbu rTMP, 1(rRTN) +#else + extrdi. rTMP, rALT, 8, 0 + stbu rTMP, 8(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 8 + stbu rTMP, 1(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 16 + stbu rTMP, 1(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 24 + stbu rTMP, 1(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 32 + stbu rTMP, 1(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 40 + stbu rTMP, 1(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 48 + stbu rTMP, 1(rRTN) + beqlr + stbu rALT, 1(rRTN) +#endif + blr + +L(check_word_alignment): + clrldi. rTMP, rTMP, 62 + beq L(aligned_word_copy) + rldicl rRTNAL, rRTN, 0, 61 + rldicl rSRCAL, rSRC, 0, 61 + cmpld cr7, rSRCAL, rRTNAL + beq cr7, L(same_alignment) + b L(unaligned) + +/* For word aligned memory, operate using word load and stores. */ + .align 4 +L(aligned_word_copy): + li rMASK, 0 + addi rRTN, rRTN, -4 + lwz rWORD, 0(rSRC) + b L(g5) + + .align 4 +L(g3): lwzu rALT, 4(rSRC) + stwu rWORD, 4(rRTN) + cmpb rTMP, rALT, rMASK + cmpwi rTMP, 0 + bne L(g4) + lwzu rWORD, 4(rSRC) + stwu rALT, 4(rRTN) +L(g5): cmpb rTMP, rWORD, rMASK + cmpwi rTMP, 0 /* If rTMP is 0, no null in word. */ + beq L(g3) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g4): +#ifdef __LITTLE_ENDIAN__ + rlwinm. rTMP, rALT, 0, 24, 31 + stbu rALT, 4(rRTN) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + rlwinm rTMP, rALT, 8, 24, 31 + stbu rTMP, 1(rRTN) +#else + rlwinm. rTMP, rALT, 8, 24, 31 + stbu rTMP, 4(rRTN) + beqlr + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rRTN) + beqlr + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rRTN) + beqlr + stbu rALT, 1(rRTN) +#endif + blr + +/* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 +L(unaligned): + lbz rWORD, 0(rSRC) + addi rRTN, rRTN, -1 + cmpdi rWORD, 0 + beq L(u2) + + .align 5 +L(u0): lbzu rALT, 1(rSRC) + stbu rWORD, 1(rRTN) + cmpdi rALT, 0 + beq L(u1) + lbzu rWORD, 1(rSRC) + stbu rALT, 1(rRTN) + cmpdi rWORD, 0 + beq L(u2) + lbzu rALT, 1(rSRC) + stbu rWORD, 1(rRTN) + cmpdi rALT, 0 + beq L(u1) + lbzu rWORD, 1(rSRC) + stbu rALT, 1(rRTN) + cmpdi rWORD, 0 + bne L(u0) +L(u2): stbu rWORD, 1(rRTN) + blr +L(u1): stbu rALT, 1(rRTN) + blr +END (FUNC_NAME) + +#ifndef USE_AS_STPCPY +libc_hidden_builtin_def (strcpy) +#endif diff --git a/sysdeps/powerpc/powerpc64/stpcpy.S b/sysdeps/powerpc/powerpc64/stpcpy.S index d795b61..09aa3be 100644 --- a/sysdeps/powerpc/powerpc64/stpcpy.S +++ b/sysdeps/powerpc/powerpc64/stpcpy.S @@ -1,5 +1,5 @@ /* Optimized stpcpy implementation for PowerPC64. - Copyright (C) 1997, 1999, 2000, 2002, 2004 Free Software Foundation, Inc. + Copyright (C) 1997-2013 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,123 +16,9 @@ License along with the GNU C Library; if not, see . */ -#include -#include -#include +#define USE_AS_STPCPY +#include -/* See strlen.s for comments on how the end-of-string testing works. */ - -/* char * [r3] stpcpy (char *dest [r3], const char *src [r4]) */ - -EALIGN (BP_SYM (__stpcpy), 4, 0) - CALL_MCOUNT 2 - -#define rTMP r0 -#define rRTN r3 -#if __BOUNDED_POINTERS__ -# define rDEST r4 /* pointer to previous word in dest */ -# define rSRC r5 /* pointer to previous word in src */ -# define rLOW r11 -# define rHIGH r12 -#else -# define rDEST r3 /* pointer to previous word in dest */ -# define rSRC r4 /* pointer to previous word in src */ -#endif -#define rWORD r6 /* current word from src */ -#define rFEFE r7 /* 0xfefefeff */ -#define r7F7F r8 /* 0x7f7f7f7f */ -#define rNEG r9 /* ~(word in src | 0x7f7f7f7f) */ -#define rALT r10 /* alternate word from src */ - - CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH) - CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH) - STORE_RETURN_BOUNDS (rLOW, rHIGH) - - or rTMP, rSRC, rDEST - clrldi. rTMP, rTMP, 62 - addi rDEST, rDEST, -4 - bne L(unaligned) - - lis rFEFE, -0x101 - lis r7F7F, 0x7f7f - lwz rWORD, 0(rSRC) - addi rFEFE, rFEFE, -0x101 - addi r7F7F, r7F7F, 0x7f7f - b L(g2) - -L(g0): lwzu rALT, 4(rSRC) - stwu rWORD, 4(rDEST) - add rTMP, rFEFE, rALT - nor rNEG, r7F7F, rALT - and. rTMP, rTMP, rNEG - bne- L(g1) - lwzu rWORD, 4(rSRC) - stwu rALT, 4(rDEST) -L(g2): add rTMP, rFEFE, rWORD - nor rNEG, r7F7F, rWORD - and. rTMP, rTMP, rNEG - beq+ L(g0) - - mr rALT, rWORD -/* We've hit the end of the string. Do the rest byte-by-byte. */ -L(g1): -#ifdef __LITTLE_ENDIAN__ - rlwinm. rTMP, rALT, 0, 24, 31 - stbu rALT, 4(rDEST) - beqlr- - rlwinm. rTMP, rALT, 24, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - rlwinm. rTMP, rALT, 16, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - rlwinm rTMP, rALT, 8, 24, 31 - stbu rTMP, 1(rDEST) - blr -#else - rlwinm. rTMP, rALT, 8, 24, 31 - stbu rTMP, 4(rDEST) - beqlr- - rlwinm. rTMP, rALT, 16, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - rlwinm. rTMP, rALT, 24, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - stbu rALT, 1(rDEST) - CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) - STORE_RETURN_VALUE (rDEST) - blr -#endif - -/* Oh well. In this case, we just do a byte-by-byte copy. */ - .align 4 - nop -L(unaligned): - lbz rWORD, 0(rSRC) - addi rDEST, rDEST, 3 - cmpwi rWORD, 0 - beq- L(u2) - -L(u0): lbzu rALT, 1(rSRC) - stbu rWORD, 1(rDEST) - cmpwi rALT, 0 - beq- L(u1) - nop /* Let 601 load start of loop. */ - lbzu rWORD, 1(rSRC) - stbu rALT, 1(rDEST) - cmpwi rWORD, 0 - bne+ L(u0) -L(u2): stbu rWORD, 1(rDEST) - CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) - STORE_RETURN_VALUE (rDEST) - blr -L(u1): stbu rALT, 1(rDEST) - CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) - STORE_RETURN_VALUE (rDEST) - blr -END (BP_SYM (__stpcpy)) - -weak_alias (BP_SYM (__stpcpy), BP_SYM (stpcpy)) +weak_alias (__stpcpy, stpcpy) libc_hidden_def (__stpcpy) libc_hidden_builtin_def (stpcpy) diff --git a/sysdeps/powerpc/powerpc64/strcpy.S b/sysdeps/powerpc/powerpc64/strcpy.S index 9434c27..793325d 100644 --- a/sysdeps/powerpc/powerpc64/strcpy.S +++ b/sysdeps/powerpc/powerpc64/strcpy.S @@ -1,5 +1,5 @@ /* Optimized strcpy implementation for PowerPC64. - Copyright (C) 1997, 1999, 2000, 2002, 2003, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-2013 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,52 +17,43 @@ . */ #include -#include -#include /* See strlen.s for comments on how the end-of-string testing works. */ /* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */ -EALIGN (BP_SYM (strcpy), 4, 0) +#ifdef USE_AS_STPCPY +# define FUNC_NAME __stpcpy +#else +# define FUNC_NAME strcpy +#endif + +EALIGN (FUNC_NAME, 4, 0) CALL_MCOUNT 2 #define rTMP r0 -#define rRTN r3 /* incoming DEST arg preserved as result */ -/* Note. The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. - These artifacts are left in the code as a reminder in case we need - bounded pointer support in the future. */ -#if __BOUNDED_POINTERS__ -# define rDEST r4 /* pointer to previous word in dest */ -# define rSRC r5 /* pointer to previous word in src */ -# define rLOW r11 -# define rHIGH r12 +#ifdef USE_AS_STPCPY +#define rRTN r3 /* pointer to previous word/doubleword in dest */ #else -# define rSRC r4 /* pointer to previous word in src */ -# define rDEST r5 /* pointer to previous word in dest */ +#define rRTN r12 /* pointer to previous word/doubleword in dest */ #endif +#define rSRC r4 /* pointer to previous word/doubleword in src */ #define rWORD r6 /* current word from src */ -#define rFEFE r7 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ -#define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */ -#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rFEFE r7 /* constant 0xfefefeff | 0xfefefefefefefeff */ +#define r7F7F r8 /* constant 0x7f7f7f7f | 0x7f7f7f7f7f7f7f7f */ +#define rNEG r9 /* ~(word in s1 | r7F7F) */ #define rALT r10 /* alternate word from src */ - CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH) - CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH) - STORE_RETURN_BOUNDS (rLOW, rHIGH) - - dcbt 0,rSRC +#ifndef USE_AS_STPCPY +/* Save the dst pointer to use as return value. */ + mr rRTN, r3 +#endif or rTMP, rSRC, rRTN clrldi. rTMP, rTMP, 61 -#if __BOUNDED_POINTERS__ - addi rDEST, rDEST, -8 -#else - addi rDEST, rRTN, -8 -#endif - dcbtst 0,rRTN - bne L(unaligned) + bne L(check_word_alignment) + +/* For doubleword aligned memory, operate using doubleword load and stores. */ + addi rRTN, rRTN, -8 lis rFEFE, -0x101 lis r7F7F, 0x7f7f @@ -75,13 +66,13 @@ EALIGN (BP_SYM (strcpy), 4, 0) b L(g2) L(g0): ldu rALT, 8(rSRC) - stdu rWORD, 8(rDEST) + stdu rWORD, 8(rRTN) add rTMP, rFEFE, rALT nor rNEG, r7F7F, rALT and. rTMP, rTMP, rNEG bne- L(g1) ldu rWORD, 8(rSRC) - stdu rALT, 8(rDEST) + stdu rALT, 8(rRTN) L(g2): add rTMP, rFEFE, rWORD nor rNEG, r7F7F, rWORD and. rTMP, rTMP, rNEG @@ -92,80 +83,134 @@ L(g2): add rTMP, rFEFE, rWORD L(g1): #ifdef __LITTLE_ENDIAN__ extrdi. rTMP, rALT, 8, 56 - stb rALT, 8(rDEST) + stbu rALT, 8(rRTN) beqlr- extrdi. rTMP, rALT, 8, 48 - stb rTMP, 9(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 40 - stb rTMP, 10(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 32 - stb rTMP, 11(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 24 - stb rTMP, 12(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 16 - stb rTMP, 13(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 8 - stb rTMP, 14(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi rTMP, rALT, 8, 0 - stb rTMP, 15(rDEST) - blr + stbu rTMP, 1(rRTN) #else extrdi. rTMP, rALT, 8, 0 - stb rTMP, 8(rDEST) + stbu rTMP, 8(rRTN) beqlr- extrdi. rTMP, rALT, 8, 8 - stb rTMP, 9(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 16 - stb rTMP, 10(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 24 - stb rTMP, 11(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 32 - stb rTMP, 12(rDEST) - beqlr- + stbu rTMP, 1(rRTN) + beqlr extrdi. rTMP, rALT, 8, 40 - stb rTMP, 13(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 48 - stb rTMP, 14(rDEST) + stbu rTMP, 1(rRTN) beqlr- - stb rALT, 15(rDEST) - /* GKM FIXME: check high bound. */ + stbu rALT, 1(rRTN) +#endif blr + +L(check_word_alignment): + clrldi. rTMP, rTMP, 62 + bne L(unaligned) + +/* For word aligned memory, operate using word load and stores. */ + addi rRTN, rRTN, -4 + + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + lwz rWORD, 0(rSRC) + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + b L(g5) + +L(g3): lwzu rALT, 4(rSRC) + stwu rWORD, 4(rRTN) + add rTMP, rFEFE, rALT + nor rNEG, r7F7F, rALT + and. rTMP, rTMP, rNEG + bne- L(g4) + lwzu rWORD, 4(rSRC) + stwu rALT, 4(rRTN) +L(g5): add rTMP, rFEFE, rWORD + nor rNEG, r7F7F, rWORD + and. rTMP, rTMP, rNEG + beq+ L(g3) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g4): +#ifdef __LITTLE_ENDIAN__ + rlwinm. rTMP, rALT, 0, 24, 31 + stbu rALT, 4(rRTN) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + rlwinm rTMP, rALT, 8, 24, 31 + stbu rTMP, 1(rRTN) +#else + rlwinm. rTMP, rALT, 8, 24, 31 + stbu rTMP, 4(rRTN) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + stbu rALT, 1(rRTN) #endif + blr /* Oh well. In this case, we just do a byte-by-byte copy. */ .align 4 nop L(unaligned): lbz rWORD, 0(rSRC) - addi rDEST, rRTN, -1 + addi rRTN, rRTN, -1 cmpwi rWORD, 0 beq- L(u2) L(u0): lbzu rALT, 1(rSRC) - stbu rWORD, 1(rDEST) + stbu rWORD, 1(rRTN) cmpwi rALT, 0 beq- L(u1) nop /* Let 601 load start of loop. */ lbzu rWORD, 1(rSRC) - stbu rALT, 1(rDEST) + stbu rALT, 1(rRTN) cmpwi rWORD, 0 bne+ L(u0) -L(u2): stb rWORD, 1(rDEST) - /* GKM FIXME: check high bound. */ +L(u2): stbu rWORD, 1(rRTN) blr -L(u1): stb rALT, 1(rDEST) - /* GKM FIXME: check high bound. */ +L(u1): stbu rALT, 1(rRTN) blr +END (FUNC_NAME) -END (BP_SYM (strcpy)) +#ifndef USE_AS_STPCPY libc_hidden_builtin_def (strcpy) +#endif