From f34499b3a506359f84fcb63a125e44d4a8e4ee68 Mon Sep 17 00:00:00 2001 From: Stefan Liebler Date: Thu, 8 Oct 2015 13:23:50 +0200 Subject: [PATCH 29/30] S390: Optimize wmemcmp. upstream-commit-id: f21216015b7395c535abc01b9585a1ae3ceaa132 https://www.sourceware.org/ml/libc-alpha/2015-07/msg00086.html This patch provides optimized version of wmemcmp with the z13 vector instructions. ChangeLog: * sysdeps/s390/multiarch/wmemcmp-c.c: New File. * sysdeps/s390/multiarch/wmemcmp-vx.S: Likewise. * sysdeps/s390/multiarch/wmemcmp.c: Likewise. * sysdeps/s390/multiarch/Makefile (sysdep_routines): Add wmemcmp functions. * sysdeps/s390/multiarch/ifunc-impl-list-common.c (__libc_ifunc_impl_list_common): Add ifunc test for wmemcmp. * benchtests/bench-wmemcmp.c: New File. * benchtests/Makefile (wcsmbs-bench): Add wmemcmp. --- benchtests/Makefile | 2 +- benchtests/bench-wmemcmp.c | 20 +++++ sysdeps/s390/multiarch/Makefile | 3 +- sysdeps/s390/multiarch/ifunc-impl-list.c | 2 + sysdeps/s390/multiarch/wmemcmp-c.c | 26 ++++++ sysdeps/s390/multiarch/wmemcmp-vx.S | 149 +++++++++++++++++++++++++++++++ sysdeps/s390/multiarch/wmemcmp.c | 27 ++++++ 7 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 benchtests/bench-wmemcmp.c create mode 100644 sysdeps/s390/multiarch/wmemcmp-c.c create mode 100644 sysdeps/s390/multiarch/wmemcmp-vx.S create mode 100644 sysdeps/s390/multiarch/wmemcmp.c diff --git a/benchtests/Makefile b/benchtests/Makefile index b5edfdd..911b6df 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -40,7 +40,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \ strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok wcsmbs-bench := wcslen wcsnlen wcscpy wcpcpy wcsncpy wcpncpy wcscat wcsncat \ wcsncmp wcsncmp wcschr wcschrnul wcsrchr wcsspn wcspbrk wcscspn \ - wmemchr wmemset + wmemchr wmemset wmemcmp string-bench-all := $(string-bench) ${wcsmbs-bench} stdlib-bench := strtod diff --git a/benchtests/bench-wmemcmp.c b/benchtests/bench-wmemcmp.c new file mode 100644 index 0000000..8b33f89 --- /dev/null +++ b/benchtests/bench-wmemcmp.c @@ -0,0 +1,20 @@ +/* Measure wmemcmp functions. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define WIDE 1 +#include "bench-memcmp.c" diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile index eac88e0..929a545 100644 --- a/sysdeps/s390/multiarch/Makefile +++ b/sysdeps/s390/multiarch/Makefile @@ -38,5 +38,6 @@ sysdep_routines += wcslen wcslen-vx wcslen-c \ wcspbrk wcspbrk-vx wcspbrk-c \ wcscspn wcscspn-vx wcscspn-c \ wmemchr wmemchr-vx wmemchr-c \ - wmemset wmemset-vx wmemset-c + wmemset wmemset-vx wmemset-c \ + wmemcmp wmemcmp-vx wmemcmp-c endif diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c index 44d534b..5ea258b 100644 --- a/sysdeps/s390/multiarch/ifunc-impl-list.c +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -135,6 +135,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_VX_IMPL (wmemset); + IFUNC_VX_IMPL (wmemcmp); + #endif /* HAVE_S390_VX_ASM_SUPPORT */ return i; diff --git a/sysdeps/s390/multiarch/wmemcmp-c.c b/sysdeps/s390/multiarch/wmemcmp-c.c new file mode 100644 index 0000000..b43e5d4 --- /dev/null +++ b/sysdeps/s390/multiarch/wmemcmp-c.c @@ -0,0 +1,26 @@ +/* Default wmemcmp implementation for S/390. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && !defined NOT_IN_libc +# define WMEMCMP __wmemcmp_c + +# include +extern __typeof (wmemcmp) __wmemcmp_c; + +# include +#endif diff --git a/sysdeps/s390/multiarch/wmemcmp-vx.S b/sysdeps/s390/multiarch/wmemcmp-vx.S new file mode 100644 index 0000000..b509bef --- /dev/null +++ b/sysdeps/s390/multiarch/wmemcmp-vx.S @@ -0,0 +1,149 @@ +/* Vector Optimized 32/64 bit S/390 version of wmemcmp. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && !defined NOT_IN_libc + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n) + Compare at most n characters of two wchar_t-arrays. + + Register usage: + -r0=tmp + -r1=number of blocks + -r2=s1 + -r3=s2 + -r4=n + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__wmemcmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + clgije %r4,0,.Lend_equal /* Nothing to do if n == 0. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + lghi %r5,0 /* current_len = 0. */ + + clgijh %r4,16,.Lgt16 + +.Lremaining: + aghi %r4,-1 /* vstl needs highest index. */ + vll %v16,%r4,0(%r2) + vll %v17,%r4,0(%r3) + vfenef %v18,%v16,%v17 /* Compare not equal. */ + vlgvb %r1,%v18,7 /* Load unequal index or 16 if not found. */ + clrj %r1,%r4,12,.Lfound2 /* r1 <= r4 -> unequal within loaded + bytes. */ + +.Lend_equal: + lghi %r2,0 + br %r14 + +.Lfound: + /* vfenezf found an unequal element or zero. + This instruction compares unsigned words, but wchar_t is signed. + Thus we have to compare the found element again. */ + vlgvb %r1,%v18,7 /* Extract not equal byte-index. */ +.Lfound2: + srl %r1,2 /* And convert it to character-index. */ + vlgvf %r0,%v16,0(%r1) /* Load character-values. */ + vlgvf %r1,%v17,0(%r1) + cr %r0,%r1 + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 + +.Lgt16: + clgijh %r4,64,.Lpreloop64 + +.Lpreloop16: + srlg %r1,%r4,4 /* Split into 16byte blocks */ +.Lloop16: + vl %v16,0(%r5,%r2) + vl %v17,0(%r5,%r3) + aghi %r5,16 + vfenefs %v18,%v16,%v17 /* Compare not equal. */ + jno .Lfound + brctg %r1,.Lloop16 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,15 /* Get remaining bytes */ + locgre %r2,%r4 + ber %r14 + la %r2,0(%r5,%r2) + la %r3,0(%r5,%r3) + j .Lremaining + +.Lpreloop64: + srlg %r1,%r4,6 /* Split into 64byte blocks */ +.Lloop64: + vl %v16,0(%r5,%r2) + vl %v17,0(%r5,%r3) + vfenefs %v18,%v16,%v17 /* Compare not equal. */ + jno .Lfound + + vl %v16,16(%r5,%r2) + vl %v17,16(%r5,%r3) + vfenefs %v18,%v16,%v17 + jno .Lfound + + vl %v16,32(%r5,%r2) + vl %v17,32(%r5,%r3) + vfenefs %v18,%v16,%v17 + jno .Lfound + + vl %v16,48(%r5,%r2) + vl %v17,48(%r5,%r3) + aghi %r5,64 + vfenefs %v18,%v16,%v17 + jno .Lfound + + brctg %r1,.Lloop64 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,63 /* Get remaining bytes */ + locgre %r2,%r4 + ber %r14 + clgijh %r4,16,.Lpreloop16 + la %r2,0(%r5,%r2) + la %r3,0(%r5,%r3) + j .Lremaining +END(__wmemcmp_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && !defined NOT_IN_libc */ diff --git a/sysdeps/s390/multiarch/wmemcmp.c b/sysdeps/s390/multiarch/wmemcmp.c new file mode 100644 index 0000000..24a57e9 --- /dev/null +++ b/sysdeps/s390/multiarch/wmemcmp.c @@ -0,0 +1,27 @@ +/* Multiple versions of wmemcmp. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && !defined NOT_IN_libc +# include +# include + +s390_vx_libc_ifunc2 (__wmemcmp, wmemcmp) + +#else +# include +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && !defined NOT_IN_libc) */ -- 2.3.0