|
|
00db10 |
This patch provides a compatibility kludge for legacy applications
|
|
|
00db10 |
which do not provide proper stack alignment. It cannot be upstreamed.
|
|
|
00db10 |
|
|
|
00db10 |
The kludge is active for both i386 and x86-64, but it is actually only
|
|
|
00db10 |
needed on i386.
|
|
|
00db10 |
|
|
|
00db10 |
In Fedora, the same effect was achieved by disabling multi-arch
|
|
|
00db10 |
altogether:
|
|
|
00db10 |
|
|
|
00db10 |
https://bugzilla.redhat.com/show_bug.cgi?id=1471427
|
|
|
00db10 |
|
|
|
00db10 |
Further discussion is on this internal bug:
|
|
|
00db10 |
|
|
|
00db10 |
https://bugzilla.redhat.com/show_bug.cgi?id=1478332
|
|
|
00db10 |
|
|
|
00db10 |
|
|
|
00db10 |
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/ifunc-impl-list.c 2012-12-24 20:02:13.000000000 -0700
|
|
|
00db10 |
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/ifunc-impl-list.c 2015-04-20 09:04:11.826569951 -0600
|
|
|
00db10 |
@@ -212,6 +212,8 @@
|
|
|
00db10 |
|
|
|
00db10 |
/* Support sysdeps/x86_64/multiarch/strstr-c.c. */
|
|
|
00db10 |
IFUNC_IMPL (i, name, strstr,
|
|
|
00db10 |
+ IFUNC_IMPL_ADD (array, i, strstr, use_unaligned_strstr (),
|
|
|
00db10 |
+ __strstr_sse2_unaligned)
|
|
|
00db10 |
IFUNC_IMPL_ADD (array, i, strstr, HAS_SSE4_2, __strstr_sse42)
|
|
|
00db10 |
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
|
|
|
00db10 |
|
|
|
00db10 |
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/init-arch.h 2012-12-24 20:02:13.000000000 -0700
|
|
|
00db10 |
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/init-arch.h 2015-04-20 12:15:17.984742925 -0600
|
|
|
00db10 |
@@ -63,6 +63,34 @@
|
|
|
00db10 |
#else /* __ASSEMBLER__ */
|
|
|
00db10 |
|
|
|
00db10 |
# include <sys/param.h>
|
|
|
00db10 |
+# include <sys/types.h>
|
|
|
00db10 |
+# include <sysdep.h>
|
|
|
00db10 |
+# include <stdbool.h>
|
|
|
00db10 |
+
|
|
|
00db10 |
+/* Ugly hack to make it possible to select a strstr and strcasestr
|
|
|
00db10 |
+ implementation that avoids using the stack for 16-byte aligned
|
|
|
00db10 |
+ SSE temporaries. Doing so makes it possible to call the functions
|
|
|
00db10 |
+ with a stack that's not 16-byte aligned as can happen, for example,
|
|
|
00db10 |
+ as a result of compiling the functions' callers with the GCC
|
|
|
00db10 |
+ -mpreferred-stack-boubdary=2 or =3 option, or with the ICC
|
|
|
00db10 |
+ -falign-stack=assume-4-byte option. See rhbz 1150282 for details.
|
|
|
00db10 |
+
|
|
|
00db10 |
+ The ifunc selector uses the unaligned version by default if this
|
|
|
00db10 |
+ file exists and is accessible. */
|
|
|
00db10 |
+# define ENABLE_STRSTR_UNALIGNED_PATHNAME \
|
|
|
00db10 |
+ "/etc/sysconfig/64bit_strstr_via_64bit_strstr_sse2_unaligned"
|
|
|
00db10 |
+
|
|
|
00db10 |
+static bool __attribute__ ((unused))
|
|
|
00db10 |
+use_unaligned_strstr (void)
|
|
|
00db10 |
+{
|
|
|
00db10 |
+ struct stat unaligned_strstr_etc_sysconfig_file;
|
|
|
00db10 |
+
|
|
|
00db10 |
+ /* TLS may not have been set up yet, so avoid using stat since it tries to
|
|
|
00db10 |
+ set errno. */
|
|
|
00db10 |
+ return INTERNAL_SYSCALL (stat, , 2,
|
|
|
00db10 |
+ ENABLE_STRSTR_UNALIGNED_PATHNAME,
|
|
|
00db10 |
+ &unaligned_strstr_etc_sysconfig_file) == 0;
|
|
|
00db10 |
+}
|
|
|
00db10 |
|
|
|
00db10 |
enum
|
|
|
00db10 |
{
|
|
|
00db10 |
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Makefile 2012-12-24 20:02:13.000000000 -0700
|
|
|
00db10 |
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Makefile 2015-04-20 09:04:11.854569626 -0600
|
|
|
00db10 |
@@ -17,7 +17,7 @@
|
|
|
00db10 |
strcat-sse2-unaligned strncat-sse2-unaligned \
|
|
|
00db10 |
strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \
|
|
|
00db10 |
strnlen-sse2-no-bsf strrchr-sse2-no-bsf strchr-sse2-no-bsf \
|
|
|
00db10 |
- memcmp-ssse3
|
|
|
00db10 |
+ memcmp-ssse3 strstr-sse2-unaligned
|
|
|
00db10 |
ifeq (yes,$(config-cflags-sse4))
|
|
|
00db10 |
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
|
|
|
00db10 |
CFLAGS-varshift.c += -msse4
|
|
|
00db10 |
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr-c.c 2012-12-24 20:02:13.000000000 -0700
|
|
|
00db10 |
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr-c.c 2015-04-20 09:04:11.861569545 -0600
|
|
|
00db10 |
@@ -12,7 +12,8 @@
|
|
|
00db10 |
|
|
|
00db10 |
#if 1
|
|
|
00db10 |
libc_ifunc (__strcasestr,
|
|
|
00db10 |
- HAS_SSE4_2 ? __strcasestr_sse42 : __strcasestr_sse2);
|
|
|
00db10 |
+ HAS_SSE4_2 && !use_unaligned_strstr () ? __strcasestr_sse42 :
|
|
|
00db10 |
+ __strcasestr_sse2);
|
|
|
00db10 |
#else
|
|
|
00db10 |
libc_ifunc (__strcasestr,
|
|
|
00db10 |
0 ? __strcasestr_sse42 : __strcasestr_sse2);
|
|
|
00db10 |
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr-c.c 2012-12-24 20:02:13.000000000 -0700
|
|
|
00db10 |
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr-c.c 2015-04-20 09:04:11.866569487 -0600
|
|
|
00db10 |
@@ -34,6 +34,7 @@
|
|
|
00db10 |
#include "string/strstr.c"
|
|
|
00db10 |
|
|
|
00db10 |
extern __typeof (__redirect_strstr) __strstr_sse42 attribute_hidden;
|
|
|
00db10 |
+extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden;
|
|
|
00db10 |
extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
|
|
|
00db10 |
|
|
|
00db10 |
#include "init-arch.h"
|
|
|
00db10 |
@@ -41,7 +42,9 @@
|
|
|
00db10 |
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
|
|
|
00db10 |
ifunc symbol properly. */
|
|
|
00db10 |
extern __typeof (__redirect_strstr) __libc_strstr;
|
|
|
00db10 |
-libc_ifunc (__libc_strstr, HAS_SSE4_2 ? __strstr_sse42 : __strstr_sse2)
|
|
|
00db10 |
+libc_ifunc (__libc_strstr, HAS_SSE4_2 ? (use_unaligned_strstr () ?
|
|
|
00db10 |
+ __strstr_sse2_unaligned :
|
|
|
00db10 |
+ __strstr_sse42) : __strstr_sse2)
|
|
|
00db10 |
|
|
|
00db10 |
#undef strstr
|
|
|
00db10 |
strong_alias (__libc_strstr, strstr)
|
|
|
00db10 |
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S 1969-12-31 17:00:00.000000000 -0700
|
|
|
00db10 |
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S 2015-04-20 09:04:11.866569487 -0600
|
|
|
00db10 |
@@ -0,0 +1,374 @@
|
|
|
00db10 |
+/* strstr with unaligned loads
|
|
|
00db10 |
+ Copyright (C) 2009-2015 Free Software Foundation, Inc.
|
|
|
00db10 |
+ This file is part of the GNU C Library.
|
|
|
00db10 |
+
|
|
|
00db10 |
+ The GNU C Library is free software; you can redistribute it and/or
|
|
|
00db10 |
+ modify it under the terms of the GNU Lesser General Public
|
|
|
00db10 |
+ License as published by the Free Software Foundation; either
|
|
|
00db10 |
+ version 2.1 of the License, or (at your option) any later version.
|
|
|
00db10 |
+
|
|
|
00db10 |
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
|
00db10 |
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
00db10 |
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
00db10 |
+ Lesser General Public License for more details.
|
|
|
00db10 |
+
|
|
|
00db10 |
+ You should have received a copy of the GNU Lesser General Public
|
|
|
00db10 |
+ License along with the GNU C Library; if not, see
|
|
|
00db10 |
+ <http://www.gnu.org/licenses/>. */
|
|
|
00db10 |
+
|
|
|
00db10 |
+#include <sysdep.h>
|
|
|
00db10 |
+
|
|
|
00db10 |
+ENTRY(__strstr_sse2_unaligned)
|
|
|
00db10 |
+ movzbl (%rsi), %eax
|
|
|
00db10 |
+ testb %al, %al
|
|
|
00db10 |
+ je L(empty)
|
|
|
00db10 |
+ movzbl 1(%rsi), %edx
|
|
|
00db10 |
+ testb %dl, %dl
|
|
|
00db10 |
+ je L(strchr)
|
|
|
00db10 |
+ movd %eax, %xmm1
|
|
|
00db10 |
+ movd %edx, %xmm2
|
|
|
00db10 |
+ movq %rdi, %rax
|
|
|
00db10 |
+ andl $4095, %eax
|
|
|
00db10 |
+ punpcklbw %xmm1, %xmm1
|
|
|
00db10 |
+ cmpq $4031, %rax
|
|
|
00db10 |
+ punpcklbw %xmm2, %xmm2
|
|
|
00db10 |
+ punpcklwd %xmm1, %xmm1
|
|
|
00db10 |
+ punpcklwd %xmm2, %xmm2
|
|
|
00db10 |
+ pshufd $0, %xmm1, %xmm1
|
|
|
00db10 |
+ pshufd $0, %xmm2, %xmm2
|
|
|
00db10 |
+ ja L(cross_page)
|
|
|
00db10 |
+ movdqu (%rdi), %xmm3
|
|
|
00db10 |
+ pxor %xmm5, %xmm5
|
|
|
00db10 |
+ movdqu 1(%rdi), %xmm4
|
|
|
00db10 |
+ movdqa %xmm3, %xmm6
|
|
|
00db10 |
+ pcmpeqb %xmm1, %xmm3
|
|
|
00db10 |
+ pcmpeqb %xmm2, %xmm4
|
|
|
00db10 |
+ movdqu 16(%rdi), %xmm0
|
|
|
00db10 |
+ pcmpeqb %xmm5, %xmm6
|
|
|
00db10 |
+ pminub %xmm4, %xmm3
|
|
|
00db10 |
+ movdqa %xmm3, %xmm4
|
|
|
00db10 |
+ movdqu 17(%rdi), %xmm3
|
|
|
00db10 |
+ pcmpeqb %xmm0, %xmm5
|
|
|
00db10 |
+ pcmpeqb %xmm2, %xmm3
|
|
|
00db10 |
+ por %xmm6, %xmm4
|
|
|
00db10 |
+ pcmpeqb %xmm1, %xmm0
|
|
|
00db10 |
+ pminub %xmm3, %xmm0
|
|
|
00db10 |
+ por %xmm5, %xmm0
|
|
|
00db10 |
+ pmovmskb %xmm4, %r8d
|
|
|
00db10 |
+ pmovmskb %xmm0, %eax
|
|
|
00db10 |
+ salq $16, %rax
|
|
|
00db10 |
+ orq %rax, %r8
|
|
|
00db10 |
+ je L(next_32_bytes)
|
|
|
00db10 |
+L(next_pair_index):
|
|
|
00db10 |
+ bsf %r8, %rax
|
|
|
00db10 |
+ addq %rdi, %rax
|
|
|
00db10 |
+ cmpb $0, (%rax)
|
|
|
00db10 |
+ je L(zero1)
|
|
|
00db10 |
+ movzbl 2(%rsi), %edx
|
|
|
00db10 |
+ testb %dl, %dl
|
|
|
00db10 |
+ je L(found1)
|
|
|
00db10 |
+ cmpb 2(%rax), %dl
|
|
|
00db10 |
+ jne L(next_pair)
|
|
|
00db10 |
+ xorl %edx, %edx
|
|
|
00db10 |
+ jmp L(pair_loop_start)
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(strchr):
|
|
|
00db10 |
+ movzbl %al, %esi
|
|
|
00db10 |
+ jmp __strchr_sse2
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(pair_loop):
|
|
|
00db10 |
+ addq $1, %rdx
|
|
|
00db10 |
+ cmpb 2(%rax,%rdx), %cl
|
|
|
00db10 |
+ jne L(next_pair)
|
|
|
00db10 |
+L(pair_loop_start):
|
|
|
00db10 |
+ movzbl 3(%rsi,%rdx), %ecx
|
|
|
00db10 |
+ testb %cl, %cl
|
|
|
00db10 |
+ jne L(pair_loop)
|
|
|
00db10 |
+L(found1):
|
|
|
00db10 |
+ ret
|
|
|
00db10 |
+L(zero1):
|
|
|
00db10 |
+ xorl %eax, %eax
|
|
|
00db10 |
+ ret
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(next_pair):
|
|
|
00db10 |
+ leaq -1(%r8), %rax
|
|
|
00db10 |
+ andq %rax, %r8
|
|
|
00db10 |
+ jne L(next_pair_index)
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(next_32_bytes):
|
|
|
00db10 |
+ movdqu 32(%rdi), %xmm3
|
|
|
00db10 |
+ pxor %xmm5, %xmm5
|
|
|
00db10 |
+ movdqu 33(%rdi), %xmm4
|
|
|
00db10 |
+ movdqa %xmm3, %xmm6
|
|
|
00db10 |
+ pcmpeqb %xmm1, %xmm3
|
|
|
00db10 |
+ pcmpeqb %xmm2, %xmm4
|
|
|
00db10 |
+ movdqu 48(%rdi), %xmm0
|
|
|
00db10 |
+ pcmpeqb %xmm5, %xmm6
|
|
|
00db10 |
+ pminub %xmm4, %xmm3
|
|
|
00db10 |
+ movdqa %xmm3, %xmm4
|
|
|
00db10 |
+ movdqu 49(%rdi), %xmm3
|
|
|
00db10 |
+ pcmpeqb %xmm0, %xmm5
|
|
|
00db10 |
+ pcmpeqb %xmm2, %xmm3
|
|
|
00db10 |
+ por %xmm6, %xmm4
|
|
|
00db10 |
+ pcmpeqb %xmm1, %xmm0
|
|
|
00db10 |
+ pminub %xmm3, %xmm0
|
|
|
00db10 |
+ por %xmm5, %xmm0
|
|
|
00db10 |
+ pmovmskb %xmm4, %eax
|
|
|
00db10 |
+ salq $32, %rax
|
|
|
00db10 |
+ pmovmskb %xmm0, %r8d
|
|
|
00db10 |
+ salq $48, %r8
|
|
|
00db10 |
+ orq %rax, %r8
|
|
|
00db10 |
+ je L(loop_header)
|
|
|
00db10 |
+L(next_pair2_index):
|
|
|
00db10 |
+ bsfq %r8, %rax
|
|
|
00db10 |
+ addq %rdi, %rax
|
|
|
00db10 |
+ cmpb $0, (%rax)
|
|
|
00db10 |
+ je L(zero2)
|
|
|
00db10 |
+ movzbl 2(%rsi), %edx
|
|
|
00db10 |
+ testb %dl, %dl
|
|
|
00db10 |
+ je L(found2)
|
|
|
00db10 |
+ cmpb 2(%rax), %dl
|
|
|
00db10 |
+ jne L(next_pair2)
|
|
|
00db10 |
+ xorl %edx, %edx
|
|
|
00db10 |
+ jmp L(pair_loop2_start)
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(pair_loop2):
|
|
|
00db10 |
+ addq $1, %rdx
|
|
|
00db10 |
+ cmpb 2(%rax,%rdx), %cl
|
|
|
00db10 |
+ jne L(next_pair2)
|
|
|
00db10 |
+L(pair_loop2_start):
|
|
|
00db10 |
+ movzbl 3(%rsi,%rdx), %ecx
|
|
|
00db10 |
+ testb %cl, %cl
|
|
|
00db10 |
+ jne L(pair_loop2)
|
|
|
00db10 |
+L(found2):
|
|
|
00db10 |
+ ret
|
|
|
00db10 |
+ L(zero2):
|
|
|
00db10 |
+ xorl %eax, %eax
|
|
|
00db10 |
+ ret
|
|
|
00db10 |
+L(empty):
|
|
|
00db10 |
+ mov %rdi, %rax
|
|
|
00db10 |
+ ret
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(next_pair2):
|
|
|
00db10 |
+ leaq -1(%r8), %rax
|
|
|
00db10 |
+ andq %rax, %r8
|
|
|
00db10 |
+ jne L(next_pair2_index)
|
|
|
00db10 |
+L(loop_header):
|
|
|
00db10 |
+ movq $-512, %r11
|
|
|
00db10 |
+ movq %rdi, %r9
|
|
|
00db10 |
+
|
|
|
00db10 |
+ pxor %xmm7, %xmm7
|
|
|
00db10 |
+ andq $-64, %rdi
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(loop):
|
|
|
00db10 |
+ movdqa 64(%rdi), %xmm3
|
|
|
00db10 |
+ movdqu 63(%rdi), %xmm6
|
|
|
00db10 |
+ movdqa %xmm3, %xmm0
|
|
|
00db10 |
+ pxor %xmm2, %xmm3
|
|
|
00db10 |
+ pxor %xmm1, %xmm6
|
|
|
00db10 |
+ movdqa 80(%rdi), %xmm10
|
|
|
00db10 |
+ por %xmm3, %xmm6
|
|
|
00db10 |
+ pminub %xmm10, %xmm0
|
|
|
00db10 |
+ movdqu 79(%rdi), %xmm3
|
|
|
00db10 |
+ pxor %xmm2, %xmm10
|
|
|
00db10 |
+ pxor %xmm1, %xmm3
|
|
|
00db10 |
+ movdqa 96(%rdi), %xmm9
|
|
|
00db10 |
+ por %xmm10, %xmm3
|
|
|
00db10 |
+ pminub %xmm9, %xmm0
|
|
|
00db10 |
+ pxor %xmm2, %xmm9
|
|
|
00db10 |
+ movdqa 112(%rdi), %xmm8
|
|
|
00db10 |
+ addq $64, %rdi
|
|
|
00db10 |
+ pminub %xmm6, %xmm3
|
|
|
00db10 |
+ movdqu 31(%rdi), %xmm4
|
|
|
00db10 |
+ pminub %xmm8, %xmm0
|
|
|
00db10 |
+ pxor %xmm2, %xmm8
|
|
|
00db10 |
+ pxor %xmm1, %xmm4
|
|
|
00db10 |
+ por %xmm9, %xmm4
|
|
|
00db10 |
+ pminub %xmm4, %xmm3
|
|
|
00db10 |
+ movdqu 47(%rdi), %xmm5
|
|
|
00db10 |
+ pxor %xmm1, %xmm5
|
|
|
00db10 |
+ por %xmm8, %xmm5
|
|
|
00db10 |
+ pminub %xmm5, %xmm3
|
|
|
00db10 |
+ pminub %xmm3, %xmm0
|
|
|
00db10 |
+ pcmpeqb %xmm7, %xmm0
|
|
|
00db10 |
+ pmovmskb %xmm0, %eax
|
|
|
00db10 |
+ testl %eax, %eax
|
|
|
00db10 |
+ je L(loop)
|
|
|
00db10 |
+ pminub (%rdi), %xmm6
|
|
|
00db10 |
+ pminub 32(%rdi),%xmm4
|
|
|
00db10 |
+ pminub 48(%rdi),%xmm5
|
|
|
00db10 |
+ pcmpeqb %xmm7, %xmm6
|
|
|
00db10 |
+ pcmpeqb %xmm7, %xmm5
|
|
|
00db10 |
+ pmovmskb %xmm6, %edx
|
|
|
00db10 |
+ movdqa 16(%rdi), %xmm8
|
|
|
00db10 |
+ pcmpeqb %xmm7, %xmm4
|
|
|
00db10 |
+ movdqu 15(%rdi), %xmm0
|
|
|
00db10 |
+ pmovmskb %xmm5, %r8d
|
|
|
00db10 |
+ movdqa %xmm8, %xmm3
|
|
|
00db10 |
+ pmovmskb %xmm4, %ecx
|
|
|
00db10 |
+ pcmpeqb %xmm1,%xmm0
|
|
|
00db10 |
+ pcmpeqb %xmm2,%xmm3
|
|
|
00db10 |
+ salq $32, %rcx
|
|
|
00db10 |
+ pcmpeqb %xmm7,%xmm8
|
|
|
00db10 |
+ salq $48, %r8
|
|
|
00db10 |
+ pminub %xmm0,%xmm3
|
|
|
00db10 |
+ orq %rcx, %rdx
|
|
|
00db10 |
+ por %xmm3,%xmm8
|
|
|
00db10 |
+ orq %rdx, %r8
|
|
|
00db10 |
+ pmovmskb %xmm8, %eax
|
|
|
00db10 |
+ salq $16, %rax
|
|
|
00db10 |
+ orq %rax, %r8
|
|
|
00db10 |
+ je L(loop)
|
|
|
00db10 |
+L(next_pair_index3):
|
|
|
00db10 |
+ bsfq %r8, %rcx
|
|
|
00db10 |
+ addq %rdi, %rcx
|
|
|
00db10 |
+ cmpb $0, (%rcx)
|
|
|
00db10 |
+ je L(zero)
|
|
|
00db10 |
+ xorl %eax, %eax
|
|
|
00db10 |
+ movzbl 2(%rsi), %edx
|
|
|
00db10 |
+ testb %dl, %dl
|
|
|
00db10 |
+ je L(success3)
|
|
|
00db10 |
+ cmpb 1(%rcx), %dl
|
|
|
00db10 |
+ jne L(next_pair3)
|
|
|
00db10 |
+ jmp L(pair_loop_start3)
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(pair_loop3):
|
|
|
00db10 |
+ addq $1, %rax
|
|
|
00db10 |
+ cmpb 1(%rcx,%rax), %dl
|
|
|
00db10 |
+ jne L(next_pair3)
|
|
|
00db10 |
+L(pair_loop_start3):
|
|
|
00db10 |
+ movzbl 3(%rsi,%rax), %edx
|
|
|
00db10 |
+ testb %dl, %dl
|
|
|
00db10 |
+ jne L(pair_loop3)
|
|
|
00db10 |
+L(success3):
|
|
|
00db10 |
+ lea -1(%rcx), %rax
|
|
|
00db10 |
+ ret
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(next_pair3):
|
|
|
00db10 |
+ addq %rax, %r11
|
|
|
00db10 |
+ movq %rdi, %rax
|
|
|
00db10 |
+ subq %r9, %rax
|
|
|
00db10 |
+ cmpq %r11, %rax
|
|
|
00db10 |
+ jl L(switch_strstr)
|
|
|
00db10 |
+ leaq -1(%r8), %rax
|
|
|
00db10 |
+ andq %rax, %r8
|
|
|
00db10 |
+ jne L(next_pair_index3)
|
|
|
00db10 |
+ jmp L(loop)
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(switch_strstr):
|
|
|
00db10 |
+ movq %rdi, %rdi
|
|
|
00db10 |
+ jmp __strstr_sse2
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(cross_page):
|
|
|
00db10 |
+
|
|
|
00db10 |
+ movq %rdi, %rax
|
|
|
00db10 |
+ pxor %xmm0, %xmm0
|
|
|
00db10 |
+ andq $-64, %rax
|
|
|
00db10 |
+ movdqa (%rax), %xmm3
|
|
|
00db10 |
+ movdqu -1(%rax), %xmm4
|
|
|
00db10 |
+ movdqa %xmm3, %xmm8
|
|
|
00db10 |
+ movdqa 16(%rax), %xmm5
|
|
|
00db10 |
+ pcmpeqb %xmm1, %xmm4
|
|
|
00db10 |
+ pcmpeqb %xmm0, %xmm8
|
|
|
00db10 |
+ pcmpeqb %xmm2, %xmm3
|
|
|
00db10 |
+ movdqa %xmm5, %xmm7
|
|
|
00db10 |
+ pminub %xmm4, %xmm3
|
|
|
00db10 |
+ movdqu 15(%rax), %xmm4
|
|
|
00db10 |
+ pcmpeqb %xmm0, %xmm7
|
|
|
00db10 |
+ por %xmm3, %xmm8
|
|
|
00db10 |
+ movdqa %xmm5, %xmm3
|
|
|
00db10 |
+ movdqa 32(%rax), %xmm5
|
|
|
00db10 |
+ pcmpeqb %xmm1, %xmm4
|
|
|
00db10 |
+ pcmpeqb %xmm2, %xmm3
|
|
|
00db10 |
+ movdqa %xmm5, %xmm6
|
|
|
00db10 |
+ pmovmskb %xmm8, %ecx
|
|
|
00db10 |
+ pminub %xmm4, %xmm3
|
|
|
00db10 |
+ movdqu 31(%rax), %xmm4
|
|
|
00db10 |
+ por %xmm3, %xmm7
|
|
|
00db10 |
+ movdqa %xmm5, %xmm3
|
|
|
00db10 |
+ pcmpeqb %xmm0, %xmm6
|
|
|
00db10 |
+ movdqa 48(%rax), %xmm5
|
|
|
00db10 |
+ pcmpeqb %xmm1, %xmm4
|
|
|
00db10 |
+ pmovmskb %xmm7, %r8d
|
|
|
00db10 |
+ pcmpeqb %xmm2, %xmm3
|
|
|
00db10 |
+ pcmpeqb %xmm5, %xmm0
|
|
|
00db10 |
+ pminub %xmm4, %xmm3
|
|
|
00db10 |
+ movdqu 47(%rax), %xmm4
|
|
|
00db10 |
+ por %xmm3, %xmm6
|
|
|
00db10 |
+ movdqa %xmm5, %xmm3
|
|
|
00db10 |
+ salq $16, %r8
|
|
|
00db10 |
+ pcmpeqb %xmm1, %xmm4
|
|
|
00db10 |
+ pcmpeqb %xmm2, %xmm3
|
|
|
00db10 |
+ pmovmskb %xmm6, %r10d
|
|
|
00db10 |
+ pminub %xmm4, %xmm3
|
|
|
00db10 |
+ por %xmm3, %xmm0
|
|
|
00db10 |
+ salq $32, %r10
|
|
|
00db10 |
+ orq %r10, %r8
|
|
|
00db10 |
+ orq %rcx, %r8
|
|
|
00db10 |
+ movl %edi, %ecx
|
|
|
00db10 |
+ pmovmskb %xmm0, %edx
|
|
|
00db10 |
+ subl %eax, %ecx
|
|
|
00db10 |
+ salq $48, %rdx
|
|
|
00db10 |
+ orq %rdx, %r8
|
|
|
00db10 |
+ shrq %cl, %r8
|
|
|
00db10 |
+ je L(loop_header)
|
|
|
00db10 |
+L(next_pair_index4):
|
|
|
00db10 |
+ bsfq %r8, %rax
|
|
|
00db10 |
+ addq %rdi, %rax
|
|
|
00db10 |
+ cmpb $0, (%rax)
|
|
|
00db10 |
+ je L(zero)
|
|
|
00db10 |
+
|
|
|
00db10 |
+ cmpq %rax,%rdi
|
|
|
00db10 |
+ je L(next_pair4)
|
|
|
00db10 |
+
|
|
|
00db10 |
+ movzbl 2(%rsi), %edx
|
|
|
00db10 |
+ testb %dl, %dl
|
|
|
00db10 |
+ je L(found3)
|
|
|
00db10 |
+ cmpb 1(%rax), %dl
|
|
|
00db10 |
+ jne L(next_pair4)
|
|
|
00db10 |
+ xorl %edx, %edx
|
|
|
00db10 |
+ jmp L(pair_loop_start4)
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(pair_loop4):
|
|
|
00db10 |
+ addq $1, %rdx
|
|
|
00db10 |
+ cmpb 1(%rax,%rdx), %cl
|
|
|
00db10 |
+ jne L(next_pair4)
|
|
|
00db10 |
+L(pair_loop_start4):
|
|
|
00db10 |
+ movzbl 3(%rsi,%rdx), %ecx
|
|
|
00db10 |
+ testb %cl, %cl
|
|
|
00db10 |
+ jne L(pair_loop4)
|
|
|
00db10 |
+L(found3):
|
|
|
00db10 |
+ subq $1, %rax
|
|
|
00db10 |
+ ret
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(next_pair4):
|
|
|
00db10 |
+ leaq -1(%r8), %rax
|
|
|
00db10 |
+ andq %rax, %r8
|
|
|
00db10 |
+ jne L(next_pair_index4)
|
|
|
00db10 |
+ jmp L(loop_header)
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(found):
|
|
|
00db10 |
+ rep
|
|
|
00db10 |
+ ret
|
|
|
00db10 |
+
|
|
|
00db10 |
+ .p2align 4
|
|
|
00db10 |
+L(zero):
|
|
|
00db10 |
+ xorl %eax, %eax
|
|
|
00db10 |
+ ret
|
|
|
00db10 |
+
|
|
|
00db10 |
+
|
|
|
00db10 |
+END(__strstr_sse2_unaligned)
|
|
|
00db10 |
--- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strstr-c.c 2012-12-24 20:02:13.000000000 -0700
|
|
|
00db10 |
+++ glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strstr-c.c 2015-04-20 09:04:11.876569371 -0600
|
|
|
00db10 |
@@ -1,8 +1,6 @@
|
|
|
00db10 |
/* Multiple versions of strstr
|
|
|
00db10 |
All versions must be listed in ifunc-impl-list.c. */
|
|
|
00db10 |
|
|
|
00db10 |
-#include "init-arch.h"
|
|
|
00db10 |
-
|
|
|
00db10 |
#define STRSTR __strstr_ia32
|
|
|
00db10 |
#if defined SHARED && defined DO_VERSIONING && !defined NO_HIDDEN
|
|
|
00db10 |
#undef libc_hidden_builtin_def
|
|
|
00db10 |
@@ -17,13 +15,17 @@
|
|
|
00db10 |
|
|
|
00db10 |
#include "string/strstr.c"
|
|
|
00db10 |
|
|
|
00db10 |
+#include "init-arch.h"
|
|
|
00db10 |
+
|
|
|
00db10 |
extern __typeof (__redirect_strstr) __strstr_sse42 attribute_hidden;
|
|
|
00db10 |
extern __typeof (__redirect_strstr) __strstr_ia32 attribute_hidden;
|
|
|
00db10 |
|
|
|
00db10 |
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
|
|
|
00db10 |
ifunc symbol properly. */
|
|
|
00db10 |
extern __typeof (__redirect_strstr) __libc_strstr;
|
|
|
00db10 |
-libc_ifunc (__libc_strstr, HAS_SSE4_2 ? __strstr_sse42 : __strstr_ia32)
|
|
|
00db10 |
+libc_ifunc (__libc_strstr,
|
|
|
00db10 |
+ HAS_SSE4_2 && !use_unaligned_strstr () ?
|
|
|
00db10 |
+ __strstr_sse42 : __strstr_ia32)
|
|
|
00db10 |
|
|
|
00db10 |
#undef strstr
|
|
|
00db10 |
strong_alias (__libc_strstr, strstr)
|