Blame SOURCES/valgrind-3.15.0-avx-rdrand-f16c.patch

2a1fab
commit 791fe5ecf909d573bcbf353b677b9404f9da0ed4
2a1fab
Author: Mark Wielaard <mark@klomp.org>
2a1fab
Date:   Mon May 27 22:19:27 2019 +0200
2a1fab
2a1fab
    Expose rdrand and f16c through cpuid also if the host only has avx.
2a1fab
    
2a1fab
    The amd64 CPUID dirtyhelpers are mostly static since they emulate some
2a1fab
    existing CPU "family". The avx2 ("i7-4910MQ") CPUID variant however
2a1fab
    can "dynamicly" enable rdrand and/or f16c if the host supports them.
2a1fab
    Do the same for the avx_and_cx16 ("i5-2300") CPUID variant.
2a1fab
    
2a1fab
    https://bugs.kde.org/show_bug.cgi?id=408009
2a1fab
2a1fab
diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h
2a1fab
index 4f34b41..a5de527 100644
2a1fab
--- a/VEX/priv/guest_amd64_defs.h
2a1fab
+++ b/VEX/priv/guest_amd64_defs.h
2a1fab
@@ -165,7 +165,9 @@ extern void  amd64g_dirtyhelper_storeF80le ( Addr/*addr*/, ULong/*data*/ );
2a1fab
 extern void  amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st );
2a1fab
 extern void  amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st );
2a1fab
 extern void  amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st );
2a1fab
-extern void  amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st );
2a1fab
+extern void  amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st,
2a1fab
+                                                     ULong hasF16C,
2a1fab
+                                                     ULong hasRDRAND );
2a1fab
 extern void  amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st,
2a1fab
                                              ULong hasF16C, ULong hasRDRAND );
2a1fab
 
2a1fab
diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c
2a1fab
index e4cf7e2..182bae0 100644
2a1fab
--- a/VEX/priv/guest_amd64_helpers.c
2a1fab
+++ b/VEX/priv/guest_amd64_helpers.c
2a1fab
@@ -3141,8 +3141,11 @@ void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
2a1fab
    address sizes   : 36 bits physical, 48 bits virtual
2a1fab
    power management:
2a1fab
 */
2a1fab
-void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st )
2a1fab
+void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st,
2a1fab
+                                             ULong hasF16C, ULong hasRDRAND )
2a1fab
 {
2a1fab
+   vassert((hasF16C >> 1) == 0ULL);
2a1fab
+   vassert((hasRDRAND >> 1) == 0ULL);
2a1fab
 #  define SET_ABCD(_a,_b,_c,_d)                \
2a1fab
       do { st->guest_RAX = (ULong)(_a);        \
2a1fab
            st->guest_RBX = (ULong)(_b);        \
2a1fab
@@ -3157,9 +3160,14 @@ void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st )
2a1fab
       case 0x00000000:
2a1fab
          SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
2a1fab
          break;
2a1fab
-      case 0x00000001:
2a1fab
-         SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff);
2a1fab
+      case 0x00000001: {
2a1fab
+         // As a baseline, advertise neither F16C (ecx:29) nor RDRAND (ecx:30),
2a1fab
+         // but patch in support for them as directed by the caller.
2a1fab
+         UInt ecx_extra
2a1fab
+            = (hasF16C ? (1U << 29) : 0) | (hasRDRAND ? (1U << 30) : 0);
2a1fab
+         SET_ABCD(0x000206a7, 0x00100800, (0x1f9ae3bf | ecx_extra), 0xbfebfbff);
2a1fab
          break;
2a1fab
+      }
2a1fab
       case 0x00000002:
2a1fab
          SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
2a1fab
          break;
2a1fab
diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c
2a1fab
index 56e992c..96dee38 100644
2a1fab
--- a/VEX/priv/guest_amd64_toIR.c
2a1fab
+++ b/VEX/priv/guest_amd64_toIR.c
2a1fab
@@ -22007,7 +22007,8 @@ Long dis_ESC_0F (
2a1fab
 
2a1fab
       vassert(fName); vassert(fAddr);
2a1fab
       IRExpr** args = NULL;
2a1fab
-      if (fAddr == &amd64g_dirtyhelper_CPUID_avx2) {
2a1fab
+      if (fAddr == &amd64g_dirtyhelper_CPUID_avx2
2a1fab
+          || fAddr == &amd64g_dirtyhelper_CPUID_avx_and_cx16) {
2a1fab
          Bool hasF16C   = (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C) != 0;
2a1fab
          Bool hasRDRAND = (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND) != 0;
2a1fab
          args = mkIRExprVec_3(IRExpr_GSPTR(),
2a1fab
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
2a1fab
index 3536e57..56a28d1 100644
2a1fab
--- a/coregrind/m_machine.c
2a1fab
+++ b/coregrind/m_machine.c
2a1fab
@@ -1076,10 +1076,10 @@ Bool VG_(machine_get_hwcaps)( void )
2a1fab
         have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
2a1fab
      }
2a1fab
 
2a1fab
-     /* Sanity check for RDRAND and F16C.  These don't actually *need* AVX2, but
2a1fab
-        it's convenient to restrict them to the AVX2 case since the simulated
2a1fab
-        CPUID we'll offer them on has AVX2 as a base. */
2a1fab
-     if (!have_avx2) {
2a1fab
+     /* Sanity check for RDRAND and F16C.  These don't actually *need* AVX, but
2a1fab
+        it's convenient to restrict them to the AVX case since the simulated
2a1fab
+        CPUID we'll offer them on has AVX as a base. */
2a1fab
+     if (!have_avx) {
2a1fab
         have_f16c   = False;
2a1fab
         have_rdrand = False;
2a1fab
      }