Mark Wielaard 5fd4dd
commit 3ea8d4327003c3cefe8e82c59be8e92dcfe1a60f
Mark Wielaard 5fd4dd
Author: Carl Love <cel@us.ibm.com>
Mark Wielaard 5fd4dd
Date:   Fri Jan 14 23:04:44 2022 +0000
Mark Wielaard 2a6d55
Mark Wielaard 5fd4dd
    Assorted changes to protect from side affects from the feature checking code.
Mark Wielaard 5fd4dd
    
Mark Wielaard 5fd4dd
    Patch contributed by Will Schmidt <will_schmidt@vnet.ibm.com>
Mark Wielaard 2a6d55
    
Mark Wielaard 2a6d55
    This problem was initially reported by Tulio, he assisted me in
Mark Wielaard 2a6d55
    identifying the underlying issue here.
Mark Wielaard 2a6d55
    
Mark Wielaard 2a6d55
    This was discovered on a Power10, and occurs since the ISA 3.1 support
Mark Wielaard 2a6d55
    check uses the brh instruction via a hardcoded ".long 0x7f1401b6" asm stanza.
Mark Wielaard 2a6d55
    That encoding writes to r20, and since the stanza does not contain a clobber
Mark Wielaard 2a6d55
    the compiler did not know to save or restore that register upon entry or exit.
Mark Wielaard 2a6d55
    The junk value remaining in r20 subsequently caused a segfault.
Mark Wielaard 2a6d55
    
Mark Wielaard 2a6d55
    This patch adds clobber masks to the instruction stanzas, as well as
Mark Wielaard 2a6d55
    updates the associated comments to clarify which registers are being
Mark Wielaard 2a6d55
    used.
Mark Wielaard 5fd4dd
        As part of this change I've also
Mark Wielaard 5fd4dd
        - updated the .long for the cnttzw instruction to write to r20, and
Mark Wielaard 5fd4dd
          zeroed the reserved bits from that instruction so it is properly
Mark Wielaard 5fd4dd
          decoded by the disassembler.
Mark Wielaard 5fd4dd
        - updated the .long for the dadd instruction to write to f0.
Mark Wielaard 5fd4dd
    
Mark Wielaard 5fd4dd
        I've inspected the current codegen with these changes in place, and
Mark Wielaard 5fd4dd
        confirm that r20 is now saved and restored on entry and exit from the
Mark Wielaard 5fd4dd
        machine_get_hwcaps() function.
Mark Wielaard 2a6d55
    
Mark Wielaard 5fd4dd
    bugzilla 447995   Valgrind segfault on power10 due to hwcap checking code
Mark Wielaard 2a6d55
Mark Wielaard 2a6d55
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
Mark Wielaard 5fd4dd
index 0b60ecc0f..089acee64 100644
Mark Wielaard 2a6d55
--- a/coregrind/m_machine.c
Mark Wielaard 2a6d55
+++ b/coregrind/m_machine.c
Mark Wielaard 5fd4dd
@@ -1246,7 +1246,7 @@ Bool VG_(machine_get_hwcaps)( void )
Mark Wielaard 2a6d55
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
Mark Wielaard 2a6d55
         have_isa_3_0 = False;
Mark Wielaard 2a6d55
      } else {
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
Mark Wielaard 97e828
+        __asm__ __volatile__(".long 0x7f140434":::"r20"); /* cnttzw r20,r24 */
Mark Wielaard 2a6d55
      }
Mark Wielaard 2a6d55
 
Mark Wielaard 2a6d55
      // ISA 3.1 not supported on 32-bit systems
Mark Wielaard 5fd4dd
@@ -1358,7 +1358,7 @@ Bool VG_(machine_get_hwcaps)( void )
Mark Wielaard 2a6d55
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
Mark Wielaard 2a6d55
         have_V = False;
Mark Wielaard 2a6d55
      } else {
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
Mark Wielaard 2a6d55
+        __asm__ __volatile__(".long 0x10000484"); /* vor v0,v0,v0 */
Mark Wielaard 2a6d55
      }
Mark Wielaard 2a6d55
 
Mark Wielaard 2a6d55
      /* General-Purpose optional (fsqrt, fsqrts) */
Mark Wielaard 5fd4dd
@@ -1366,7 +1366,7 @@ Bool VG_(machine_get_hwcaps)( void )
Mark Wielaard 2a6d55
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
Mark Wielaard 2a6d55
         have_FX = False;
Mark Wielaard 2a6d55
      } else {
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
Mark Wielaard 2a6d55
+        __asm__ __volatile__(".long 0xFC00002C"); /* fsqrt f0,f0 */
Mark Wielaard 2a6d55
      }
Mark Wielaard 2a6d55
 
Mark Wielaard 2a6d55
      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
Mark Wielaard 5fd4dd
@@ -1374,7 +1374,7 @@ Bool VG_(machine_get_hwcaps)( void )
Mark Wielaard 2a6d55
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
Mark Wielaard 2a6d55
         have_GX = False;
Mark Wielaard 2a6d55
      } else {
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
Mark Wielaard 2a6d55
+        __asm__ __volatile__(".long 0xFC000034"); /* frsqrte f0,f0 */
Mark Wielaard 2a6d55
      }
Mark Wielaard 2a6d55
 
Mark Wielaard 2a6d55
      /* VSX support implies Power ISA 2.06 */
Mark Wielaard 5fd4dd
@@ -1382,7 +1382,7 @@ Bool VG_(machine_get_hwcaps)( void )
Mark Wielaard 2a6d55
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
Mark Wielaard 2a6d55
         have_VX = False;
Mark Wielaard 2a6d55
      } else {
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
Mark Wielaard 2a6d55
+        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp vs0,vs0 */
Mark Wielaard 2a6d55
      }
Mark Wielaard 2a6d55
 
Mark Wielaard 2a6d55
      /* Check for Decimal Floating Point (DFP) support. */
Mark Wielaard 5fd4dd
@@ -1390,7 +1390,7 @@ Bool VG_(machine_get_hwcaps)( void )
Mark Wielaard 2a6d55
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
Mark Wielaard 2a6d55
         have_DFP = False;
Mark Wielaard 2a6d55
      } else {
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
Mark Wielaard 2a6d55
+        __asm__ __volatile__(".long 0xec0e8005"); /* dadd f0,f14,f16 */
Mark Wielaard 2a6d55
      }
Mark Wielaard 2a6d55
 
Mark Wielaard 2a6d55
      /* Check for ISA 2.07 support. */
Mark Wielaard 5fd4dd
@@ -1398,7 +1398,7 @@ Bool VG_(machine_get_hwcaps)( void )
Mark Wielaard 2a6d55
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
Mark Wielaard 2a6d55
         have_isa_2_07 = False;
Mark Wielaard 2a6d55
      } else {
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
Mark Wielaard 2a6d55
+        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd f0,r0 */
Mark Wielaard 2a6d55
      }
Mark Wielaard 2a6d55
 
Mark Wielaard 2a6d55
      /* Check for ISA 3.0 support. */
Mark Wielaard 5fd4dd
@@ -1406,7 +1406,7 @@ Bool VG_(machine_get_hwcaps)( void )
Mark Wielaard 2a6d55
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
Mark Wielaard 2a6d55
         have_isa_3_0 = False;
Mark Wielaard 2a6d55
      } else {
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long  0x7d205434"); /* cnttzw RT, RB */
Mark Wielaard 2a6d55
+        __asm__ __volatile__(".long 0x7f140434":::"r20"); /* cnttzw r20,r24 */
Mark Wielaard 2a6d55
      }
Mark Wielaard 2a6d55
 
Mark Wielaard 2a6d55
      /* Check for ISA 3.1 support. */
Mark Wielaard 5fd4dd
@@ -1414,7 +1414,7 @@ Bool VG_(machine_get_hwcaps)( void )
Mark Wielaard 2a6d55
      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
Mark Wielaard 2a6d55
         have_isa_3_1 = False;
Mark Wielaard 2a6d55
      } else {
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long 0x7f1401b6"); /* brh  RA, RS */
Mark Wielaard 2a6d55
+        __asm__ __volatile__(".long 0x7f1401b6":::"r20"); /* brh r20,r24 */
Mark Wielaard 2a6d55
      }
Mark Wielaard 2a6d55
 
Mark Wielaard 2a6d55
      /* Check if Host supports scv instruction */
Mark Wielaard 5fd4dd
@@ -1424,9 +1424,9 @@ Bool VG_(machine_get_hwcaps)( void )
Mark Wielaard 2a6d55
      } else {
Mark Wielaard 2a6d55
         /* Set r0 to 13 for the system time call.  Don't want to make a random
Mark Wielaard 2a6d55
            system call.  */
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long 0x7c000278"); /* clear r0 */
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long 0x6009000d"); /* set r0 to 13 */
Mark Wielaard 2a6d55
-        __asm__ __volatile__(".long 0x44000001"); /* scv */
Mark Wielaard 2a6d55
+        __asm__ __volatile__(".long 0x7c000278"); /* clear r0 with xor r0,r0,r0 */
Mark Wielaard 5fd4dd
+        __asm__ __volatile__(".long 0x6000000d"); /* set r0 to 13 with ori r0,r0,13 */
Mark Wielaard 2a6d55
+        __asm__ __volatile__(".long 0x44000001"); /* scv 0 */
Mark Wielaard 2a6d55
      }
Mark Wielaard 2a6d55
 
Mark Wielaard 2a6d55
      /* determine dcbz/dcbzl sizes while we still have the signal