Blame SOURCES/valgrind-3.15.0-avx-rdrand-f16c.patch

baee74
commit 791fe5ecf909d573bcbf353b677b9404f9da0ed4
baee74
Author: Mark Wielaard <mark@klomp.org>
baee74
Date:   Mon May 27 22:19:27 2019 +0200
baee74
baee74
    Expose rdrand and f16c through cpuid also if the host only has avx.
baee74
    
baee74
    The amd64 CPUID dirtyhelpers are mostly static since they emulate some
baee74
    existing CPU "family". The avx2 ("i7-4910MQ") CPUID variant however
baee74
    can "dynamicly" enable rdrand and/or f16c if the host supports them.
baee74
    Do the same for the avx_and_cx16 ("i5-2300") CPUID variant.
baee74
    
baee74
    https://bugs.kde.org/show_bug.cgi?id=408009
baee74
baee74
diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h
baee74
index 4f34b41..a5de527 100644
baee74
--- a/VEX/priv/guest_amd64_defs.h
baee74
+++ b/VEX/priv/guest_amd64_defs.h
baee74
@@ -165,7 +165,9 @@ extern void  amd64g_dirtyhelper_storeF80le ( Addr/*addr*/, ULong/*data*/ );
baee74
 extern void  amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st );
baee74
 extern void  amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st );
baee74
 extern void  amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st );
baee74
-extern void  amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st );
baee74
+extern void  amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st,
baee74
+                                                     ULong hasF16C,
baee74
+                                                     ULong hasRDRAND );
baee74
 extern void  amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st,
baee74
                                              ULong hasF16C, ULong hasRDRAND );
baee74
 
baee74
diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c
baee74
index e4cf7e2..182bae0 100644
baee74
--- a/VEX/priv/guest_amd64_helpers.c
baee74
+++ b/VEX/priv/guest_amd64_helpers.c
baee74
@@ -3141,8 +3141,11 @@ void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
baee74
    address sizes   : 36 bits physical, 48 bits virtual
baee74
    power management:
baee74
 */
baee74
-void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st )
baee74
+void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st,
baee74
+                                             ULong hasF16C, ULong hasRDRAND )
baee74
 {
baee74
+   vassert((hasF16C >> 1) == 0ULL);
baee74
+   vassert((hasRDRAND >> 1) == 0ULL);
baee74
 #  define SET_ABCD(_a,_b,_c,_d)                \
baee74
       do { st->guest_RAX = (ULong)(_a);        \
baee74
            st->guest_RBX = (ULong)(_b);        \
baee74
@@ -3157,9 +3160,14 @@ void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st )
baee74
       case 0x00000000:
baee74
          SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
baee74
          break;
baee74
-      case 0x00000001:
baee74
-         SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff);
baee74
+      case 0x00000001: {
baee74
+         // As a baseline, advertise neither F16C (ecx:29) nor RDRAND (ecx:30),
baee74
+         // but patch in support for them as directed by the caller.
baee74
+         UInt ecx_extra
baee74
+            = (hasF16C ? (1U << 29) : 0) | (hasRDRAND ? (1U << 30) : 0);
baee74
+         SET_ABCD(0x000206a7, 0x00100800, (0x1f9ae3bf | ecx_extra), 0xbfebfbff);
baee74
          break;
baee74
+      }
baee74
       case 0x00000002:
baee74
          SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
baee74
          break;
baee74
diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c
baee74
index 56e992c..96dee38 100644
baee74
--- a/VEX/priv/guest_amd64_toIR.c
baee74
+++ b/VEX/priv/guest_amd64_toIR.c
baee74
@@ -22007,7 +22007,8 @@ Long dis_ESC_0F (
baee74
 
baee74
       vassert(fName); vassert(fAddr);
baee74
       IRExpr** args = NULL;
baee74
-      if (fAddr == &amd64g_dirtyhelper_CPUID_avx2) {
baee74
+      if (fAddr == &amd64g_dirtyhelper_CPUID_avx2
baee74
+          || fAddr == &amd64g_dirtyhelper_CPUID_avx_and_cx16) {
baee74
          Bool hasF16C   = (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C) != 0;
baee74
          Bool hasRDRAND = (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND) != 0;
baee74
          args = mkIRExprVec_3(IRExpr_GSPTR(),
baee74
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
baee74
index 3536e57..56a28d1 100644
baee74
--- a/coregrind/m_machine.c
baee74
+++ b/coregrind/m_machine.c
baee74
@@ -1076,10 +1076,10 @@ Bool VG_(machine_get_hwcaps)( void )
baee74
         have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
baee74
      }
baee74
 
baee74
-     /* Sanity check for RDRAND and F16C.  These don't actually *need* AVX2, but
baee74
-        it's convenient to restrict them to the AVX2 case since the simulated
baee74
-        CPUID we'll offer them on has AVX2 as a base. */
baee74
-     if (!have_avx2) {
baee74
+     /* Sanity check for RDRAND and F16C.  These don't actually *need* AVX, but
baee74
+        it's convenient to restrict them to the AVX case since the simulated
baee74
+        CPUID we'll offer them on has AVX as a base. */
baee74
+     if (!have_avx) {
baee74
         have_f16c   = False;
baee74
         have_rdrand = False;
baee74
      }