076f82
commit e805606193e1a39956ca5ef73cb44a8796730686
076f82
Author: Noah Goldstein <goldstein.w.n@gmail.com>
076f82
Date:   Mon Jun 6 21:11:28 2022 -0700
076f82
076f82
    x86: Add COND_VZEROUPPER that can replace vzeroupper if no `ret`
076f82
    
076f82
    The RTM vzeroupper mitigation has no way of replacing inline
076f82
    vzeroupper not before a return.
076f82
    
076f82
    This can be useful when hoisting a vzeroupper to save code size
076f82
    for example:
076f82
    
076f82
    ```
076f82
    L(foo):
076f82
            cmpl    %eax, %edx
076f82
            jz      L(bar)
076f82
            tzcntl  %eax, %eax
076f82
            addq    %rdi, %rax
076f82
            VZEROUPPER_RETURN
076f82
    
076f82
    L(bar):
076f82
            xorl    %eax, %eax
076f82
            VZEROUPPER_RETURN
076f82
    ```
076f82
    
076f82
    Can become:
076f82
    
076f82
    ```
076f82
    L(foo):
076f82
            COND_VZEROUPPER
076f82
            cmpl    %eax, %edx
076f82
            jz      L(bar)
076f82
            tzcntl  %eax, %eax
076f82
            addq    %rdi, %rax
076f82
            ret
076f82
    
076f82
    L(bar):
076f82
            xorl    %eax, %eax
076f82
            ret
076f82
    ```
076f82
    
076f82
    This code does not change any existing functionality.
076f82
    
076f82
    There is no difference in the objdump of libc.so before and after this
076f82
    patch.
076f82
    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
076f82
    
076f82
    (cherry picked from commit dd5c483b2598f411428df4d8864c15c4b8a3cd68)
076f82
076f82
diff --git a/sysdeps/x86_64/multiarch/avx-rtm-vecs.h b/sysdeps/x86_64/multiarch/avx-rtm-vecs.h
076f82
index 3f531dd47fceefe9..6ca9f5e6bae7ba72 100644
076f82
--- a/sysdeps/x86_64/multiarch/avx-rtm-vecs.h
076f82
+++ b/sysdeps/x86_64/multiarch/avx-rtm-vecs.h
076f82
@@ -20,6 +20,7 @@
076f82
 #ifndef _AVX_RTM_VECS_H
076f82
 #define _AVX_RTM_VECS_H			1
076f82
 
076f82
+#define COND_VZEROUPPER			COND_VZEROUPPER_XTEST
076f82
 #define ZERO_UPPER_VEC_REGISTERS_RETURN	\
076f82
 	ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
076f82
 
076f82
diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h
076f82
index 7bebdeb21095eda0..93e44be22e2275f1 100644
076f82
--- a/sysdeps/x86_64/sysdep.h
076f82
+++ b/sysdeps/x86_64/sysdep.h
076f82
@@ -106,6 +106,24 @@ lose:									      \
076f82
 	vzeroupper;						\
076f82
 	ret
076f82
 
076f82
+/* Can be used to replace vzeroupper that is not directly before a
076f82
+   return.  This is useful when hoisting a vzeroupper from multiple
076f82
+   return paths to decrease the total number of vzerouppers and code
076f82
+   size.  */
076f82
+#define COND_VZEROUPPER_XTEST							\
076f82
+    xtest;							\
076f82
+    jz 1f;							\
076f82
+    vzeroall;							\
076f82
+    jmp 2f;							\
076f82
+1:							\
076f82
+    vzeroupper;							\
076f82
+2:
076f82
+
076f82
+/* In RTM define this as COND_VZEROUPPER_XTEST.  */
076f82
+#ifndef COND_VZEROUPPER
076f82
+# define COND_VZEROUPPER vzeroupper
076f82
+#endif
076f82
+
076f82
 /* Zero upper vector registers and return.  */
076f82
 #ifndef ZERO_UPPER_VEC_REGISTERS_RETURN
076f82
 # define ZERO_UPPER_VEC_REGISTERS_RETURN \