Blob Blame History Raw
commit 3ea8d4327003c3cefe8e82c59be8e92dcfe1a60f
Author: Carl Love <cel@us.ibm.com>
Date:   Fri Jan 14 23:04:44 2022 +0000

    Assorted changes to protect from side affects from the feature checking code.
    
    Patch contributed by Will Schmidt <will_schmidt@vnet.ibm.com>
    
    This problem was initially reported by Tulio, he assisted me in
    identifying the underlying issue here.
    
    This was discovered on a Power10, and occurs since the ISA 3.1 support
    check uses the brh instruction via a hardcoded ".long 0x7f1401b6" asm stanza.
    That encoding writes to r20, and since the stanza does not contain a clobber
    the compiler did not know to save or restore that register upon entry or exit.
    The junk value remaining in r20 subsequently caused a segfault.
    
    This patch adds clobber masks to the instruction stanzas, as well as
    updates the associated comments to clarify which registers are being
    used.
        As part of this change I've also
        - updated the .long for the cnttzw instruction to write to r20, and
          zeroed the reserved bits from that instruction so it is properly
          decoded by the disassembler.
        - updated the .long for the dadd instruction to write to f0.
    
        I've inspected the current codegen with these changes in place, and
        confirm that r20 is now saved and restored on entry and exit from the
        machine_get_hwcaps() function.
    
    bugzilla 447995   Valgrind segfault on power10 due to hwcap checking code

diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
index 0b60ecc0f..089acee64 100644
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -1246,7 +1246,7 @@ Bool VG_(machine_get_hwcaps)( void )
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
         have_isa_3_0 = False;
      } else {
-        __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
+        __asm__ __volatile__(".long 00x7f140434":::"r20"); /* cnttzw r20,r24 */
      }
 
      // ISA 3.1 not supported on 32-bit systems
@@ -1358,7 +1358,7 @@ Bool VG_(machine_get_hwcaps)( void )
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
         have_V = False;
      } else {
-        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
+        __asm__ __volatile__(".long 0x10000484"); /* vor v0,v0,v0 */
      }
 
      /* General-Purpose optional (fsqrt, fsqrts) */
@@ -1366,7 +1366,7 @@ Bool VG_(machine_get_hwcaps)( void )
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
         have_FX = False;
      } else {
-        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
+        __asm__ __volatile__(".long 0xFC00002C"); /* fsqrt f0,f0 */
      }
 
      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
@@ -1374,7 +1374,7 @@ Bool VG_(machine_get_hwcaps)( void )
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
         have_GX = False;
      } else {
-        __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
+        __asm__ __volatile__(".long 0xFC000034"); /* frsqrte f0,f0 */
      }
 
      /* VSX support implies Power ISA 2.06 */
@@ -1382,7 +1382,7 @@ Bool VG_(machine_get_hwcaps)( void )
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
         have_VX = False;
      } else {
-        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
+        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp vs0,vs0 */
      }
 
      /* Check for Decimal Floating Point (DFP) support. */
@@ -1390,7 +1390,7 @@ Bool VG_(machine_get_hwcaps)( void )
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
         have_DFP = False;
      } else {
-        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
+        __asm__ __volatile__(".long 0xec0e8005"); /* dadd f0,f14,f16 */
      }
 
      /* Check for ISA 2.07 support. */
@@ -1398,7 +1398,7 @@ Bool VG_(machine_get_hwcaps)( void )
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
         have_isa_2_07 = False;
      } else {
-        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
+        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd f0,r0 */
      }
 
      /* Check for ISA 3.0 support. */
@@ -1406,7 +1406,7 @@ Bool VG_(machine_get_hwcaps)( void )
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
         have_isa_3_0 = False;
      } else {
-        __asm__ __volatile__(".long  0x7d205434"); /* cnttzw RT, RB */
+        __asm__ __volatile__(".long 0x7f140434":::"r20"); /* cnttzw r20,r24 */
      }
 
      /* Check for ISA 3.1 support. */
@@ -1414,7 +1414,7 @@ Bool VG_(machine_get_hwcaps)( void )
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
         have_isa_3_1 = False;
      } else {
-        __asm__ __volatile__(".long 0x7f1401b6"); /* brh  RA, RS */
+        __asm__ __volatile__(".long 0x7f1401b6":::"r20"); /* brh r20,r24 */
      }
 
      /* Check if Host supports scv instruction */
@@ -1424,9 +1424,9 @@ Bool VG_(machine_get_hwcaps)( void )
      } else {
         /* Set r0 to 13 for the system time call.  Don't want to make a random
            system call.  */
-        __asm__ __volatile__(".long 0x7c000278"); /* clear r0 */
-        __asm__ __volatile__(".long 0x6009000d"); /* set r0 to 13 */
-        __asm__ __volatile__(".long 0x44000001"); /* scv */
+        __asm__ __volatile__(".long 0x7c000278"); /* clear r0 with xor r0,r0,r0 */
+        __asm__ __volatile__(".long 0x6000000d"); /* set r0 to 13 with ori r0,r0,13 */
+        __asm__ __volatile__(".long 0x44000001"); /* scv 0 */
      }
 
      /* determine dcbz/dcbzl sizes while we still have the signal