isaacpittman-hitachi / rpms / openssl

Forked from rpms/openssl 2 years ago
Clone

Blame SOURCES/openssl-1.1.1-intel-cet.patch

c95581
diff -up openssl-1.1.1e/crypto/aes/asm/aesni-x86_64.pl.intel-cet openssl-1.1.1e/crypto/aes/asm/aesni-x86_64.pl
c95581
--- openssl-1.1.1e/crypto/aes/asm/aesni-x86_64.pl.intel-cet	2020-03-17 15:31:17.000000000 +0100
c95581
+++ openssl-1.1.1e/crypto/aes/asm/aesni-x86_64.pl	2020-03-19 17:07:02.626522694 +0100
c95581
@@ -275,6 +275,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 ${PREFIX}_encrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	movups	($inp),$inout0		# load input
c95581
 	mov	240($key),$rounds	# key->rounds
c95581
 ___
c95581
@@ -293,6 +294,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 ${PREFIX}_decrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	movups	($inp),$inout0		# load input
c95581
 	mov	240($key),$rounds	# key->rounds
c95581
 ___
c95581
@@ -613,6 +615,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 aesni_ecb_encrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 ___
c95581
 $code.=<<___ if ($win64);
c95581
 	lea	-0x58(%rsp),%rsp
c95581
@@ -985,6 +988,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 aesni_ccm64_encrypt_blocks:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 ___
c95581
 $code.=<<___ if ($win64);
c95581
 	lea	-0x58(%rsp),%rsp
c95581
@@ -1077,6 +1081,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 aesni_ccm64_decrypt_blocks:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 ___
c95581
 $code.=<<___ if ($win64);
c95581
 	lea	-0x58(%rsp),%rsp
c95581
@@ -1203,6 +1208,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 aesni_ctr32_encrypt_blocks:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	cmp	\$1,$len
c95581
 	jne	.Lctr32_bulk
c95581
 
c95581
@@ -1775,6 +1781,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 aesni_xts_encrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	lea	(%rsp),%r11			# frame pointer
c95581
 .cfi_def_cfa_register	%r11
c95581
 	push	%rbp
c95581
@@ -2258,6 +2265,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 aesni_xts_decrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	lea	(%rsp),%r11			# frame pointer
c95581
 .cfi_def_cfa_register	%r11
c95581
 	push	%rbp
c95581
@@ -2783,6 +2791,7 @@ $code.=<<___;
c95581
 .align	32
c95581
 aesni_ocb_encrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	lea	(%rsp),%rax
c95581
 	push	%rbx
c95581
 .cfi_push	%rbx
c95581
@@ -3249,6 +3258,7 @@ __ocb_encrypt1:
c95581
 .align	32
c95581
 aesni_ocb_decrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	lea	(%rsp),%rax
c95581
 	push	%rbx
c95581
 .cfi_push	%rbx
c95581
@@ -3737,6 +3747,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 ${PREFIX}_cbc_encrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	test	$len,$len		# check length
c95581
 	jz	.Lcbc_ret
c95581
 
c95581
diff -up openssl-1.1.1e/crypto/aes/asm/vpaes-x86_64.pl.intel-cet openssl-1.1.1e/crypto/aes/asm/vpaes-x86_64.pl
c95581
--- openssl-1.1.1e/crypto/aes/asm/vpaes-x86_64.pl.intel-cet	2020-03-17 15:31:17.000000000 +0100
c95581
+++ openssl-1.1.1e/crypto/aes/asm/vpaes-x86_64.pl	2020-03-19 17:00:15.974621757 +0100
c95581
@@ -696,6 +696,7 @@ _vpaes_schedule_mangle:
c95581
 .align	16
c95581
 ${PREFIX}_set_encrypt_key:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 ___
c95581
 $code.=<<___ if ($win64);
c95581
 	lea	-0xb8(%rsp),%rsp
c95581
@@ -746,6 +747,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 ${PREFIX}_set_decrypt_key:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 ___
c95581
 $code.=<<___ if ($win64);
c95581
 	lea	-0xb8(%rsp),%rsp
c95581
@@ -801,6 +803,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 ${PREFIX}_encrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 ___
c95581
 $code.=<<___ if ($win64);
c95581
 	lea	-0xb8(%rsp),%rsp
c95581
@@ -846,6 +849,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 ${PREFIX}_decrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 ___
c95581
 $code.=<<___ if ($win64);
c95581
 	lea	-0xb8(%rsp),%rsp
c95581
@@ -897,6 +901,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 ${PREFIX}_cbc_encrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	xchg	$key,$len
c95581
 ___
c95581
 ($len,$key)=($key,$len);
c95581
diff -up openssl-1.1.1e/crypto/async/arch/async_posix.c.intel-cet openssl-1.1.1e/crypto/async/arch/async_posix.c
c95581
--- openssl-1.1.1e/crypto/async/arch/async_posix.c.intel-cet	2020-03-17 15:31:17.000000000 +0100
c95581
+++ openssl-1.1.1e/crypto/async/arch/async_posix.c	2020-03-19 17:00:15.974621757 +0100
c95581
@@ -34,7 +34,9 @@ void async_local_cleanup(void)
c95581
 
c95581
 int async_fibre_makecontext(async_fibre *fibre)
c95581
 {
c95581
+#ifndef USE_SWAPCONTEXT
c95581
     fibre->env_init = 0;
c95581
+#endif
c95581
     if (getcontext(&fibre->fibre) == 0) {
c95581
         fibre->fibre.uc_stack.ss_sp = OPENSSL_malloc(STACKSIZE);
c95581
         if (fibre->fibre.uc_stack.ss_sp != NULL) {
c95581
diff -up openssl-1.1.1e/crypto/async/arch/async_posix.h.intel-cet openssl-1.1.1e/crypto/async/arch/async_posix.h
c95581
--- openssl-1.1.1e/crypto/async/arch/async_posix.h.intel-cet	2020-03-19 17:00:15.435631166 +0100
c95581
+++ openssl-1.1.1e/crypto/async/arch/async_posix.h	2020-03-19 17:00:15.975621739 +0100
c95581
@@ -25,17 +25,33 @@
c95581
 #  define ASYNC_POSIX
c95581
 #  define ASYNC_ARCH
c95581
 
c95581
+#  ifdef __CET__
c95581
+/*
c95581
+ * When Intel CET is enabled, makecontext will create a different
c95581
+ * shadow stack for each context.  async_fibre_swapcontext cannot
c95581
+ * use _longjmp.  It must call swapcontext to swap shadow stack as
c95581
+ * well as normal stack.
c95581
+ */
c95581
+#   define USE_SWAPCONTEXT
c95581
+#  endif
c95581
 #  include <ucontext.h>
c95581
-#  include <setjmp.h>
c95581
+#  ifndef USE_SWAPCONTEXT
c95581
+#   include <setjmp.h>
c95581
+#  endif
c95581
 
c95581
 typedef struct async_fibre_st {
c95581
     ucontext_t fibre;
c95581
+#  ifndef USE_SWAPCONTEXT
c95581
     jmp_buf env;
c95581
     int env_init;
c95581
+#  endif
c95581
 } async_fibre;
c95581
 
c95581
 static ossl_inline int async_fibre_swapcontext(async_fibre *o, async_fibre *n, int r)
c95581
 {
c95581
+#  ifdef USE_SWAPCONTEXT
c95581
+    swapcontext(&o->fibre, &n->fibre);
c95581
+#  else
c95581
     o->env_init = 1;
c95581
 
c95581
     if (!r || !_setjmp(o->env)) {
c95581
@@ -44,6 +60,7 @@ static ossl_inline int async_fibre_swapc
c95581
         else
c95581
             setcontext(&n->fibre);
c95581
     }
c95581
+#  endif
c95581
 
c95581
     return 1;
c95581
 }
c95581
diff -up openssl-1.1.1e/crypto/camellia/asm/cmll-x86_64.pl.intel-cet openssl-1.1.1e/crypto/camellia/asm/cmll-x86_64.pl
c95581
--- openssl-1.1.1e/crypto/camellia/asm/cmll-x86_64.pl.intel-cet	2020-03-17 15:31:17.000000000 +0100
c95581
+++ openssl-1.1.1e/crypto/camellia/asm/cmll-x86_64.pl	2020-03-19 17:00:15.975621739 +0100
c95581
@@ -685,6 +685,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 Camellia_cbc_encrypt:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	cmp	\$0,%rdx
c95581
 	je	.Lcbc_abort
c95581
 	push	%rbx
c95581
diff -up openssl-1.1.1e/crypto/modes/asm/ghash-x86_64.pl.intel-cet openssl-1.1.1e/crypto/modes/asm/ghash-x86_64.pl
c95581
--- openssl-1.1.1e/crypto/modes/asm/ghash-x86_64.pl.intel-cet	2020-03-17 15:31:17.000000000 +0100
c95581
+++ openssl-1.1.1e/crypto/modes/asm/ghash-x86_64.pl	2020-03-19 17:00:15.975621739 +0100
c95581
@@ -239,6 +239,7 @@ $code=<<___;
c95581
 .align	16
c95581
 gcm_gmult_4bit:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	push	%rbx
c95581
 .cfi_push	%rbx
c95581
 	push	%rbp		# %rbp and others are pushed exclusively in
c95581
@@ -286,6 +287,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 gcm_ghash_4bit:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	push	%rbx
c95581
 .cfi_push	%rbx
c95581
 	push	%rbp
c95581
@@ -612,6 +614,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 gcm_gmult_clmul:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 .L_gmult_clmul:
c95581
 	movdqu		($Xip),$Xi
c95581
 	movdqa		.Lbswap_mask(%rip),$T3
c95581
@@ -663,6 +666,7 @@ $code.=<<___;
c95581
 .align	32
c95581
 gcm_ghash_clmul:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 .L_ghash_clmul:
c95581
 ___
c95581
 $code.=<<___ if ($win64);
c95581
@@ -1166,6 +1170,7 @@ $code.=<<___;
c95581
 .align	32
c95581
 gcm_gmult_avx:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	jmp	.L_gmult_clmul
c95581
 .cfi_endproc
c95581
 .size	gcm_gmult_avx,.-gcm_gmult_avx
c95581
@@ -1177,6 +1182,7 @@ $code.=<<___;
c95581
 .align	32
c95581
 gcm_ghash_avx:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 ___
c95581
 if ($avx) {
c95581
 my ($Xip,$Htbl,$inp,$len)=@_4args;
c95581
diff -up openssl-1.1.1e/crypto/perlasm/cbc.pl.intel-cet openssl-1.1.1e/crypto/perlasm/cbc.pl
c95581
--- openssl-1.1.1e/crypto/perlasm/cbc.pl.intel-cet	2020-03-17 15:31:17.000000000 +0100
c95581
+++ openssl-1.1.1e/crypto/perlasm/cbc.pl	2020-03-19 17:00:15.976621722 +0100
c95581
@@ -165,21 +165,28 @@ sub cbc
c95581
 	&jmp_ptr($count);
c95581
 
c95581
 &set_label("ej7");
c95581
+	&endbranch()
c95581
 	&movb(&HB("edx"),	&BP(6,$in,"",0));
c95581
 	&shl("edx",8);
c95581
 &set_label("ej6");
c95581
+	&endbranch()
c95581
 	&movb(&HB("edx"),	&BP(5,$in,"",0));
c95581
 &set_label("ej5");
c95581
+	&endbranch()
c95581
 	&movb(&LB("edx"),	&BP(4,$in,"",0));
c95581
 &set_label("ej4");
c95581
+	&endbranch()
c95581
 	&mov("ecx",		&DWP(0,$in,"",0));
c95581
 	&jmp(&label("ejend"));
c95581
 &set_label("ej3");
c95581
+	&endbranch()
c95581
 	&movb(&HB("ecx"),	&BP(2,$in,"",0));
c95581
 	&shl("ecx",8);
c95581
 &set_label("ej2");
c95581
+	&endbranch()
c95581
 	&movb(&HB("ecx"),	&BP(1,$in,"",0));
c95581
 &set_label("ej1");
c95581
+	&endbranch()
c95581
 	&movb(&LB("ecx"),	&BP(0,$in,"",0));
c95581
 &set_label("ejend");
c95581
 
c95581
diff -up openssl-1.1.1e/crypto/perlasm/x86_64-xlate.pl.intel-cet openssl-1.1.1e/crypto/perlasm/x86_64-xlate.pl
c95581
--- openssl-1.1.1e/crypto/perlasm/x86_64-xlate.pl.intel-cet	2020-03-17 15:31:17.000000000 +0100
c95581
+++ openssl-1.1.1e/crypto/perlasm/x86_64-xlate.pl	2020-03-19 17:00:15.984621582 +0100
c95581
@@ -101,6 +101,33 @@ elsif (!$gas)
c95581
     $decor="\$L\$";
c95581
 }
c95581
 
c95581
+my $cet_property;
c95581
+if ($flavour =~ /elf/) {
c95581
+	# Always generate .note.gnu.property section for ELF outputs to
c95581
+	# mark Intel CET support since all input files must be marked
c95581
+	# with Intel CET support in order for linker to mark output with
c95581
+	# Intel CET support.
c95581
+	my $p2align=3; $p2align=2 if ($flavour eq "elf32");
c95581
+	$cet_property = <<_____;
c95581
+	.section ".note.gnu.property", "a"
c95581
+	.p2align $p2align
c95581
+	.long 1f - 0f
c95581
+	.long 4f - 1f
c95581
+	.long 5
c95581
+0:
c95581
+	.asciz "GNU"
c95581
+1:
c95581
+	.p2align $p2align
c95581
+	.long 0xc0000002
c95581
+	.long 3f - 2f
c95581
+2:
c95581
+	.long 3
c95581
+3:
c95581
+	.p2align $p2align
c95581
+4:
c95581
+_____
c95581
+}
c95581
+
c95581
 my $current_segment;
c95581
 my $current_function;
c95581
 my %globals;
c95581
@@ -1213,6 +1240,7 @@ while(defined(my $line=<>)) {
c95581
     print $line,"\n";
c95581
 }
c95581
 
c95581
+print "$cet_property"			if ($cet_property);
c95581
 print "\n$current_segment\tENDS\n"	if ($current_segment && $masm);
c95581
 print "END\n"				if ($masm);
c95581
 
c95581
diff -up openssl-1.1.1e/crypto/perlasm/x86gas.pl.intel-cet openssl-1.1.1e/crypto/perlasm/x86gas.pl
c95581
--- openssl-1.1.1e/crypto/perlasm/x86gas.pl.intel-cet	2020-03-17 15:31:17.000000000 +0100
c95581
+++ openssl-1.1.1e/crypto/perlasm/x86gas.pl	2020-03-19 17:00:15.985621565 +0100
c95581
@@ -124,6 +124,7 @@ sub ::function_begin_B
c95581
     push(@out,".align\t$align\n");
c95581
     push(@out,"$func:\n");
c95581
     push(@out,"$begin:\n")		if ($global);
c95581
+    &::endbranch();
c95581
     $::stack=4;
c95581
 }
c95581
 
c95581
@@ -172,6 +173,26 @@ sub ::file_end
c95581
 	else		{ push (@out,"$tmp\n"); }
c95581
     }
c95581
     push(@out,$initseg) if ($initseg);
c95581
+    if ($::elf) {
c95581
+	push(@out,"
c95581
+	.section \".note.gnu.property\", \"a\"
c95581
+	.p2align 2
c95581
+	.long 1f - 0f
c95581
+	.long 4f - 1f
c95581
+	.long 5
c95581
+0:
c95581
+	.asciz \"GNU\"
c95581
+1:
c95581
+	.p2align 2
c95581
+	.long 0xc0000002
c95581
+	.long 3f - 2f
c95581
+2:
c95581
+	.long 3
c95581
+3:
c95581
+	.p2align 2
c95581
+4:
c95581
+");
c95581
+    }
c95581
 }
c95581
 
c95581
 sub ::data_byte	{   push(@out,".byte\t".join(',',@_)."\n");   }
c95581
diff -up openssl-1.1.1e/crypto/poly1305/asm/poly1305-x86_64.pl.intel-cet openssl-1.1.1e/crypto/poly1305/asm/poly1305-x86_64.pl
c95581
--- openssl-1.1.1e/crypto/poly1305/asm/poly1305-x86_64.pl.intel-cet	2020-03-19 17:00:38.185234015 +0100
c95581
+++ openssl-1.1.1e/crypto/poly1305/asm/poly1305-x86_64.pl	2020-03-19 17:05:46.575850341 +0100
c95581
@@ -2806,6 +2806,7 @@ $code.=<<___;
c95581
 .align	32
c95581
 poly1305_blocks_vpmadd52:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	shr	\$4,$len
c95581
 	jz	.Lno_data_vpmadd52		# too short
c95581
 
c95581
@@ -3739,6 +3740,7 @@ $code.=<<___;
c95581
 .align	32
c95581
 poly1305_emit_base2_44:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	mov	0($ctx),%r8	# load hash value
c95581
 	mov	8($ctx),%r9
c95581
 	mov	16($ctx),%r10
c95581
diff -up openssl-1.1.1e/crypto/rc4/asm/rc4-x86_64.pl.intel-cet openssl-1.1.1e/crypto/rc4/asm/rc4-x86_64.pl
c95581
--- openssl-1.1.1e/crypto/rc4/asm/rc4-x86_64.pl.intel-cet	2020-03-19 17:00:38.190233928 +0100
c95581
+++ openssl-1.1.1e/crypto/rc4/asm/rc4-x86_64.pl	2020-03-19 17:05:02.598618064 +0100
c95581
@@ -140,6 +140,7 @@ $code=<<___;
c95581
 .align	16
c95581
 RC4:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	or	$len,$len
c95581
 	jne	.Lentry
c95581
 	ret
c95581
@@ -455,6 +456,7 @@ $code.=<<___;
c95581
 .align	16
c95581
 RC4_set_key:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	lea	8($dat),$dat
c95581
 	lea	($inp,$len),$inp
c95581
 	neg	$len
c95581
@@ -529,6 +531,7 @@ RC4_set_key:
c95581
 .align	16
c95581
 RC4_options:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	lea	.Lopts(%rip),%rax
c95581
 	mov	OPENSSL_ia32cap_P(%rip),%edx
c95581
 	bt	\$20,%edx
c95581
diff -up openssl-1.1.1e/crypto/x86_64cpuid.pl.intel-cet openssl-1.1.1e/crypto/x86_64cpuid.pl
c95581
--- openssl-1.1.1e/crypto/x86_64cpuid.pl.intel-cet	2020-03-17 15:31:17.000000000 +0100
c95581
+++ openssl-1.1.1e/crypto/x86_64cpuid.pl	2020-03-19 17:03:58.172742775 +0100
c95581
@@ -40,6 +40,7 @@ print<<___;
c95581
 .align	16
c95581
 OPENSSL_atomic_add:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	movl	($arg1),%eax
c95581
 .Lspin:	leaq	($arg2,%rax),%r8
c95581
 	.byte	0xf0		# lock
c95581
@@ -56,6 +57,7 @@ OPENSSL_atomic_add:
c95581
 .align	16
c95581
 OPENSSL_rdtsc:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	rdtsc
c95581
 	shl	\$32,%rdx
c95581
 	or	%rdx,%rax
c95581
@@ -68,6 +70,7 @@ OPENSSL_rdtsc:
c95581
 .align	16
c95581
 OPENSSL_ia32_cpuid:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	mov	%rbx,%r8		# save %rbx
c95581
 .cfi_register	%rbx,%r8
c95581
 
c95581
@@ -237,6 +240,7 @@ OPENSSL_ia32_cpuid:
c95581
 .align  16
c95581
 OPENSSL_cleanse:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	xor	%rax,%rax
c95581
 	cmp	\$15,$arg2
c95581
 	jae	.Lot
c95581
@@ -274,6 +278,7 @@ OPENSSL_cleanse:
c95581
 .align  16
c95581
 CRYPTO_memcmp:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	xor	%rax,%rax
c95581
 	xor	%r10,%r10
c95581
 	cmp	\$0,$arg3
c95581
@@ -312,6 +317,7 @@ print<<___ if (!$win64);
c95581
 .align	16
c95581
 OPENSSL_wipe_cpu:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	pxor	%xmm0,%xmm0
c95581
 	pxor	%xmm1,%xmm1
c95581
 	pxor	%xmm2,%xmm2
c95581
@@ -346,6 +352,8 @@ print<<___ if ($win64);
c95581
 .type	OPENSSL_wipe_cpu,\@abi-omnipotent
c95581
 .align	16
c95581
 OPENSSL_wipe_cpu:
c95581
+.cfi_startproc
c95581
+	endbranch
c95581
 	pxor	%xmm0,%xmm0
c95581
 	pxor	%xmm1,%xmm1
c95581
 	pxor	%xmm2,%xmm2
c95581
@@ -376,6 +384,7 @@ print<<___;
c95581
 .align	16
c95581
 OPENSSL_instrument_bus:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	mov	$arg1,$out	# tribute to Win64
c95581
 	mov	$arg2,$cnt
c95581
 	mov	$arg2,$max
c95581
@@ -410,6 +419,7 @@ OPENSSL_instrument_bus:
c95581
 .align	16
c95581
 OPENSSL_instrument_bus2:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	mov	$arg1,$out	# tribute to Win64
c95581
 	mov	$arg2,$cnt
c95581
 	mov	$arg3,$max
c95581
@@ -465,6 +475,7 @@ print<<___;
c95581
 .align	16
c95581
 OPENSSL_ia32_${rdop}_bytes:
c95581
 .cfi_startproc
c95581
+	endbranch
c95581
 	xor	%rax, %rax	# return value
c95581
 	cmp	\$0,$arg2
c95581
 	je	.Ldone_${rdop}_bytes