|
Mark Wielaard |
d3173f |
commit a4b7b67db47021c424c18a5729f250016d34df27
|
|
Mark Wielaard |
d3173f |
Author: mjw <mjw@8f6e269a-dfd6-0310-a8e1-e2731360e62c>
|
|
Mark Wielaard |
d3173f |
Date: Tue Aug 27 10:19:03 2013 +0000
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
Support mmxext (integer sse) subset on i386 (athlon).
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
Some processors like the AMD Athlon "Classic" support mmxext,
|
|
Mark Wielaard |
d3173f |
a sse1 subset. This subset is not properly detected by VEX.
|
|
Mark Wielaard |
d3173f |
The subset uses the same encoding as the sse1 instructions.
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
The subset is described at:
|
|
Mark Wielaard |
d3173f |
http://support.amd.com/us/Embedded_TechDocs/22466.pdf
|
|
Mark Wielaard |
d3173f |
https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
This introduces a new VEX_HWCAPS_X86_MMXEXT that sits between
|
|
Mark Wielaard |
d3173f |
the baseline (0) and VEX_HWCAPS_X86_SSE1. There is also a new
|
|
Mark Wielaard |
d3173f |
x86g_dirtyhelper_CPUID_mmxext to mimics a Athlon "Classic"
|
|
Mark Wielaard |
d3173f |
(Model 2, K75 "Pluto/Orion").
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
Groups all mmxext instructions together in one block.
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
git-svn-id: svn://svn.valgrind.org/vex/trunk@2745 8f6e269a-dfd6-0310-a8e1-e2731360e62c
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
diff --git a/VEX/priv/guest_x86_defs.h b/VEX/priv/guest_x86_defs.h
|
|
Mark Wielaard |
d3173f |
index 389e6bb..1a16a0b 100644
|
|
Mark Wielaard |
d3173f |
--- a/VEX/priv/guest_x86_defs.h
|
|
Mark Wielaard |
d3173f |
+++ b/VEX/priv/guest_x86_defs.h
|
|
Mark Wielaard |
d3173f |
@@ -144,6 +144,7 @@ extern ULong x86g_dirtyhelper_loadF80le ( UInt );
|
|
Mark Wielaard |
d3173f |
extern void x86g_dirtyhelper_storeF80le ( UInt, ULong );
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
extern void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* );
|
|
Mark Wielaard |
d3173f |
+extern void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* );
|
|
Mark Wielaard |
d3173f |
extern void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* );
|
|
Mark Wielaard |
d3173f |
extern void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* );
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
diff --git a/VEX/priv/guest_x86_helpers.c b/VEX/priv/guest_x86_helpers.c
|
|
Mark Wielaard |
d3173f |
index 9c26794..e87e89f 100644
|
|
Mark Wielaard |
d3173f |
--- a/VEX/priv/guest_x86_helpers.c
|
|
Mark Wielaard |
d3173f |
+++ b/VEX/priv/guest_x86_helpers.c
|
|
Mark Wielaard |
d3173f |
@@ -2207,6 +2207,63 @@ void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
/* CALLED FROM GENERATED CODE */
|
|
Mark Wielaard |
d3173f |
/* DIRTY HELPER (modifies guest state) */
|
|
Mark Wielaard |
d3173f |
+/* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
|
|
Mark Wielaard |
d3173f |
+/* But without 3DNow support (weird, but we really don't support it). */
|
|
Mark Wielaard |
d3173f |
+void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
|
|
Mark Wielaard |
d3173f |
+{
|
|
Mark Wielaard |
d3173f |
+ switch (st->guest_EAX) {
|
|
Mark Wielaard |
d3173f |
+ /* vendor ID */
|
|
Mark Wielaard |
d3173f |
+ case 0:
|
|
Mark Wielaard |
d3173f |
+ st->guest_EAX = 0x1;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EBX = 0x68747541;
|
|
Mark Wielaard |
d3173f |
+ st->guest_ECX = 0x444d4163;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EDX = 0x69746e65;
|
|
Mark Wielaard |
d3173f |
+ break;
|
|
Mark Wielaard |
d3173f |
+ /* feature bits */
|
|
Mark Wielaard |
d3173f |
+ case 1:
|
|
Mark Wielaard |
d3173f |
+ st->guest_EAX = 0x621;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EBX = 0x0;
|
|
Mark Wielaard |
d3173f |
+ st->guest_ECX = 0x0;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EDX = 0x183f9ff;
|
|
Mark Wielaard |
d3173f |
+ break;
|
|
Mark Wielaard |
d3173f |
+ /* Highest Extended Function Supported (0x80000004 brand string) */
|
|
Mark Wielaard |
d3173f |
+ case 0x80000000:
|
|
Mark Wielaard |
d3173f |
+ st->guest_EAX = 0x80000004;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EBX = 0x68747541;
|
|
Mark Wielaard |
d3173f |
+ st->guest_ECX = 0x444d4163;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EDX = 0x69746e65;
|
|
Mark Wielaard |
d3173f |
+ break;
|
|
Mark Wielaard |
d3173f |
+ /* Extended Processor Info and Feature Bits */
|
|
Mark Wielaard |
d3173f |
+ case 0x80000001:
|
|
Mark Wielaard |
d3173f |
+ st->guest_EAX = 0x721;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EBX = 0x0;
|
|
Mark Wielaard |
d3173f |
+ st->guest_ECX = 0x0;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
|
|
Mark Wielaard |
d3173f |
+ break;
|
|
Mark Wielaard |
d3173f |
+ /* Processor Brand String "AMD Athlon(tm) Processor" */
|
|
Mark Wielaard |
d3173f |
+ case 0x80000002:
|
|
Mark Wielaard |
d3173f |
+ st->guest_EAX = 0x20444d41;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EBX = 0x6c687441;
|
|
Mark Wielaard |
d3173f |
+ st->guest_ECX = 0x74286e6f;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EDX = 0x5020296d;
|
|
Mark Wielaard |
d3173f |
+ break;
|
|
Mark Wielaard |
d3173f |
+ case 0x80000003:
|
|
Mark Wielaard |
d3173f |
+ st->guest_EAX = 0x65636f72;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EBX = 0x726f7373;
|
|
Mark Wielaard |
d3173f |
+ st->guest_ECX = 0x0;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EDX = 0x0;
|
|
Mark Wielaard |
d3173f |
+ break;
|
|
Mark Wielaard |
d3173f |
+ default:
|
|
Mark Wielaard |
d3173f |
+ st->guest_EAX = 0x0;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EBX = 0x0;
|
|
Mark Wielaard |
d3173f |
+ st->guest_ECX = 0x0;
|
|
Mark Wielaard |
d3173f |
+ st->guest_EDX = 0x0;
|
|
Mark Wielaard |
d3173f |
+ break;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+}
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+/* CALLED FROM GENERATED CODE */
|
|
Mark Wielaard |
d3173f |
+/* DIRTY HELPER (modifies guest state) */
|
|
Mark Wielaard |
d3173f |
/* Claim to be the following SSE1-capable CPU:
|
|
Mark Wielaard |
d3173f |
vendor_id : GenuineIntel
|
|
Mark Wielaard |
d3173f |
cpu family : 6
|
|
Mark Wielaard |
d3173f |
diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c
|
|
Mark Wielaard |
d3173f |
index 90499b0..e98f19c 100644
|
|
Mark Wielaard |
d3173f |
--- a/VEX/priv/guest_x86_toIR.c
|
|
Mark Wielaard |
d3173f |
+++ b/VEX/priv/guest_x86_toIR.c
|
|
Mark Wielaard |
d3173f |
@@ -8318,7 +8318,18 @@ DisResult disInstr_X86_WRK (
|
|
Mark Wielaard |
d3173f |
guest subarchitecture. */
|
|
Mark Wielaard |
d3173f |
if (archinfo->hwcaps == 0/*baseline, no sse at all*/)
|
|
Mark Wielaard |
d3173f |
goto after_sse_decoders;
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* With mmxext only some extended MMX instructions are recognized.
|
|
Mark Wielaard |
d3173f |
+ The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW
|
|
Mark Wielaard |
d3173f |
+ PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB
|
|
Mark Wielaard |
d3173f |
+ PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ http://support.amd.com/us/Embedded_TechDocs/22466.pdf
|
|
Mark Wielaard |
d3173f |
+ https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
|
|
Mark Wielaard |
d3173f |
+ goto mmxext;
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
/* Otherwise we must be doing sse1 or sse2, so we can at least try
|
|
Mark Wielaard |
d3173f |
for SSE1 here. */
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
@@ -8627,6 +8638,11 @@ DisResult disInstr_X86_WRK (
|
|
Mark Wielaard |
d3173f |
goto decode_success;
|
|
Mark Wielaard |
d3173f |
}
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* mmxext sse1 subset starts here. mmxext only arches will parse
|
|
Mark Wielaard |
d3173f |
+ only this subset of the sse1 instructions. */
|
|
Mark Wielaard |
d3173f |
+ mmxext:
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
/* ***--- this is an MMX class insn introduced in SSE1 ---*** */
|
|
Mark Wielaard |
d3173f |
/* 0F F7 = MASKMOVQ -- 8x8 masked store */
|
|
Mark Wielaard |
d3173f |
if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) {
|
|
Mark Wielaard |
d3173f |
@@ -8637,203 +8653,6 @@ DisResult disInstr_X86_WRK (
|
|
Mark Wielaard |
d3173f |
goto decode_success;
|
|
Mark Wielaard |
d3173f |
}
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
- /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
|
|
Mark Wielaard |
d3173f |
- delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
|
|
Mark Wielaard |
d3173f |
- vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
|
|
Mark Wielaard |
d3173f |
- delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
|
|
Mark Wielaard |
d3173f |
- vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
|
|
Mark Wielaard |
d3173f |
- /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
|
|
Mark Wielaard |
d3173f |
- if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
|
|
Mark Wielaard |
d3173f |
- modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
- if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
- putXMMReg( gregOfRM(modrm),
|
|
Mark Wielaard |
d3173f |
- getXMMReg( eregOfRM(modrm) ));
|
|
Mark Wielaard |
d3173f |
- DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
- nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
- delta += 2+1;
|
|
Mark Wielaard |
d3173f |
- } else {
|
|
Mark Wielaard |
d3173f |
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
|
|
Mark Wielaard |
d3173f |
- if (insn[1] == 0x28/*movaps*/)
|
|
Mark Wielaard |
d3173f |
- gen_SEGV_if_not_16_aligned( addr );
|
|
Mark Wielaard |
d3173f |
- putXMMReg( gregOfRM(modrm),
|
|
Mark Wielaard |
d3173f |
- loadLE(Ity_V128, mkexpr(addr)) );
|
|
Mark Wielaard |
d3173f |
- DIP("mov[ua]ps %s,%s\n", dis_buf,
|
|
Mark Wielaard |
d3173f |
- nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
- delta += 2+alen;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
|
|
Mark Wielaard |
d3173f |
- /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
|
|
Mark Wielaard |
d3173f |
- if (sz == 4 && insn[0] == 0x0F
|
|
Mark Wielaard |
d3173f |
- && (insn[1] == 0x29 || insn[1] == 0x11)) {
|
|
Mark Wielaard |
d3173f |
- modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
- if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
- /* fall through; awaiting test case */
|
|
Mark Wielaard |
d3173f |
- } else {
|
|
Mark Wielaard |
d3173f |
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
|
|
Mark Wielaard |
d3173f |
- if (insn[1] == 0x29/*movaps*/)
|
|
Mark Wielaard |
d3173f |
- gen_SEGV_if_not_16_aligned( addr );
|
|
Mark Wielaard |
d3173f |
- storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
|
|
Mark Wielaard |
d3173f |
- DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
- dis_buf );
|
|
Mark Wielaard |
d3173f |
- delta += 2+alen;
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
|
|
Mark Wielaard |
d3173f |
- /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
|
|
Mark Wielaard |
d3173f |
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
|
|
Mark Wielaard |
d3173f |
- modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
- if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
- delta += 2+1;
|
|
Mark Wielaard |
d3173f |
- putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
|
|
Mark Wielaard |
d3173f |
- getXMMRegLane64( eregOfRM(modrm), 0 ) );
|
|
Mark Wielaard |
d3173f |
- DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
- nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
- } else {
|
|
Mark Wielaard |
d3173f |
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
|
|
Mark Wielaard |
d3173f |
- delta += 2+alen;
|
|
Mark Wielaard |
d3173f |
- putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
|
|
Mark Wielaard |
d3173f |
- loadLE(Ity_I64, mkexpr(addr)) );
|
|
Mark Wielaard |
d3173f |
- DIP("movhps %s,%s\n", dis_buf,
|
|
Mark Wielaard |
d3173f |
- nameXMMReg( gregOfRM(modrm) ));
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
|
|
Mark Wielaard |
d3173f |
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
|
|
Mark Wielaard |
d3173f |
- if (!epartIsReg(insn[2])) {
|
|
Mark Wielaard |
d3173f |
- delta += 2;
|
|
Mark Wielaard |
d3173f |
- addr = disAMode ( &alen, sorb, delta, dis_buf );
|
|
Mark Wielaard |
d3173f |
- delta += alen;
|
|
Mark Wielaard |
d3173f |
- storeLE( mkexpr(addr),
|
|
Mark Wielaard |
d3173f |
- getXMMRegLane64( gregOfRM(insn[2]),
|
|
Mark Wielaard |
d3173f |
- 1/*upper lane*/ ) );
|
|
Mark Wielaard |
d3173f |
- DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
|
|
Mark Wielaard |
d3173f |
- dis_buf);
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
- /* else fall through */
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
|
|
Mark Wielaard |
d3173f |
- /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
|
|
Mark Wielaard |
d3173f |
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
|
|
Mark Wielaard |
d3173f |
- modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
- if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
- delta += 2+1;
|
|
Mark Wielaard |
d3173f |
- putXMMRegLane64( gregOfRM(modrm),
|
|
Mark Wielaard |
d3173f |
- 0/*lower lane*/,
|
|
Mark Wielaard |
d3173f |
- getXMMRegLane64( eregOfRM(modrm), 1 ));
|
|
Mark Wielaard |
d3173f |
- DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
- nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
- } else {
|
|
Mark Wielaard |
d3173f |
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
|
|
Mark Wielaard |
d3173f |
- delta += 2+alen;
|
|
Mark Wielaard |
d3173f |
- putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
|
|
Mark Wielaard |
d3173f |
- loadLE(Ity_I64, mkexpr(addr)) );
|
|
Mark Wielaard |
d3173f |
- DIP("movlps %s, %s\n",
|
|
Mark Wielaard |
d3173f |
- dis_buf, nameXMMReg( gregOfRM(modrm) ));
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
|
|
Mark Wielaard |
d3173f |
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
|
|
Mark Wielaard |
d3173f |
- if (!epartIsReg(insn[2])) {
|
|
Mark Wielaard |
d3173f |
- delta += 2;
|
|
Mark Wielaard |
d3173f |
- addr = disAMode ( &alen, sorb, delta, dis_buf );
|
|
Mark Wielaard |
d3173f |
- delta += alen;
|
|
Mark Wielaard |
d3173f |
- storeLE( mkexpr(addr),
|
|
Mark Wielaard |
d3173f |
- getXMMRegLane64( gregOfRM(insn[2]),
|
|
Mark Wielaard |
d3173f |
- 0/*lower lane*/ ) );
|
|
Mark Wielaard |
d3173f |
- DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
|
|
Mark Wielaard |
d3173f |
- dis_buf);
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
- /* else fall through */
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
|
|
Mark Wielaard |
d3173f |
- to 4 lowest bits of ireg(G) */
|
|
Mark Wielaard |
d3173f |
- if (insn[0] == 0x0F && insn[1] == 0x50) {
|
|
Mark Wielaard |
d3173f |
- modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
- if (sz == 4 && epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
- Int src;
|
|
Mark Wielaard |
d3173f |
- t0 = newTemp(Ity_I32);
|
|
Mark Wielaard |
d3173f |
- t1 = newTemp(Ity_I32);
|
|
Mark Wielaard |
d3173f |
- t2 = newTemp(Ity_I32);
|
|
Mark Wielaard |
d3173f |
- t3 = newTemp(Ity_I32);
|
|
Mark Wielaard |
d3173f |
- delta += 2+1;
|
|
Mark Wielaard |
d3173f |
- src = eregOfRM(modrm);
|
|
Mark Wielaard |
d3173f |
- assign( t0, binop( Iop_And32,
|
|
Mark Wielaard |
d3173f |
- binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
|
|
Mark Wielaard |
d3173f |
- mkU32(1) ));
|
|
Mark Wielaard |
d3173f |
- assign( t1, binop( Iop_And32,
|
|
Mark Wielaard |
d3173f |
- binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
|
|
Mark Wielaard |
d3173f |
- mkU32(2) ));
|
|
Mark Wielaard |
d3173f |
- assign( t2, binop( Iop_And32,
|
|
Mark Wielaard |
d3173f |
- binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
|
|
Mark Wielaard |
d3173f |
- mkU32(4) ));
|
|
Mark Wielaard |
d3173f |
- assign( t3, binop( Iop_And32,
|
|
Mark Wielaard |
d3173f |
- binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
|
|
Mark Wielaard |
d3173f |
- mkU32(8) ));
|
|
Mark Wielaard |
d3173f |
- putIReg(4, gregOfRM(modrm),
|
|
Mark Wielaard |
d3173f |
- binop(Iop_Or32,
|
|
Mark Wielaard |
d3173f |
- binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
|
|
Mark Wielaard |
d3173f |
- binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
|
|
Mark Wielaard |
d3173f |
- )
|
|
Mark Wielaard |
d3173f |
- );
|
|
Mark Wielaard |
d3173f |
- DIP("movmskps %s,%s\n", nameXMMReg(src),
|
|
Mark Wielaard |
d3173f |
- nameIReg(4, gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
- /* else fall through */
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
|
|
Mark Wielaard |
d3173f |
- /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
|
|
Mark Wielaard |
d3173f |
- if (insn[0] == 0x0F && insn[1] == 0x2B) {
|
|
Mark Wielaard |
d3173f |
- modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
- if (!epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
|
|
Mark Wielaard |
d3173f |
- gen_SEGV_if_not_16_aligned( addr );
|
|
Mark Wielaard |
d3173f |
- storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
|
|
Mark Wielaard |
d3173f |
- DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
|
|
Mark Wielaard |
d3173f |
- dis_buf,
|
|
Mark Wielaard |
d3173f |
- nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
- delta += 2+alen;
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
- /* else fall through */
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
/* ***--- this is an MMX class insn introduced in SSE1 ---*** */
|
|
Mark Wielaard |
d3173f |
/* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
|
|
Mark Wielaard |
d3173f |
Intel manual does not say anything about the usual business of
|
|
Mark Wielaard |
d3173f |
@@ -8854,70 +8673,6 @@ DisResult disInstr_X86_WRK (
|
|
Mark Wielaard |
d3173f |
/* else fall through */
|
|
Mark Wielaard |
d3173f |
}
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
- /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
|
|
Mark Wielaard |
d3173f |
- (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
|
|
Mark Wielaard |
d3173f |
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
|
|
Mark Wielaard |
d3173f |
- vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
- modrm = getIByte(delta+3);
|
|
Mark Wielaard |
d3173f |
- if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
- putXMMRegLane32( gregOfRM(modrm), 0,
|
|
Mark Wielaard |
d3173f |
- getXMMRegLane32( eregOfRM(modrm), 0 ));
|
|
Mark Wielaard |
d3173f |
- DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
- nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
- delta += 3+1;
|
|
Mark Wielaard |
d3173f |
- } else {
|
|
Mark Wielaard |
d3173f |
- addr = disAMode ( &alen, sorb, delta+3, dis_buf );
|
|
Mark Wielaard |
d3173f |
- /* zero bits 127:64 */
|
|
Mark Wielaard |
d3173f |
- putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
|
|
Mark Wielaard |
d3173f |
- /* zero bits 63:32 */
|
|
Mark Wielaard |
d3173f |
- putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
|
|
Mark Wielaard |
d3173f |
- /* write bits 31:0 */
|
|
Mark Wielaard |
d3173f |
- putXMMRegLane32( gregOfRM(modrm), 0,
|
|
Mark Wielaard |
d3173f |
- loadLE(Ity_I32, mkexpr(addr)) );
|
|
Mark Wielaard |
d3173f |
- DIP("movss %s,%s\n", dis_buf,
|
|
Mark Wielaard |
d3173f |
- nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
- delta += 3+alen;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
|
|
Mark Wielaard |
d3173f |
- or lo 1/4 xmm). */
|
|
Mark Wielaard |
d3173f |
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
|
|
Mark Wielaard |
d3173f |
- vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
- modrm = getIByte(delta+3);
|
|
Mark Wielaard |
d3173f |
- if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
- /* fall through, we don't yet have a test case */
|
|
Mark Wielaard |
d3173f |
- } else {
|
|
Mark Wielaard |
d3173f |
- addr = disAMode ( &alen, sorb, delta+3, dis_buf );
|
|
Mark Wielaard |
d3173f |
- storeLE( mkexpr(addr),
|
|
Mark Wielaard |
d3173f |
- getXMMRegLane32(gregOfRM(modrm), 0) );
|
|
Mark Wielaard |
d3173f |
- DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
- dis_buf);
|
|
Mark Wielaard |
d3173f |
- delta += 3+alen;
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
|
|
Mark Wielaard |
d3173f |
- delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
|
|
Mark Wielaard |
d3173f |
- vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
- /* 0F 56 = ORPS -- G = G and E */
|
|
Mark Wielaard |
d3173f |
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
|
|
Mark Wielaard |
d3173f |
- delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
/* ***--- this is an MMX class insn introduced in SSE1 ---*** */
|
|
Mark Wielaard |
d3173f |
/* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
|
|
Mark Wielaard |
d3173f |
if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) {
|
|
Mark Wielaard |
d3173f |
@@ -9173,6 +8928,284 @@ DisResult disInstr_X86_WRK (
|
|
Mark Wielaard |
d3173f |
goto decode_success;
|
|
Mark Wielaard |
d3173f |
}
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
+ /* 0F AE /7 = SFENCE -- flush pending operations to memory */
|
|
Mark Wielaard |
d3173f |
+ if (insn[0] == 0x0F && insn[1] == 0xAE
|
|
Mark Wielaard |
d3173f |
+ && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
|
|
Mark Wielaard |
d3173f |
+ vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
+ delta += 3;
|
|
Mark Wielaard |
d3173f |
+ /* Insert a memory fence. It's sometimes important that these
|
|
Mark Wielaard |
d3173f |
+ are carried through to the generated code. */
|
|
Mark Wielaard |
d3173f |
+ stmt( IRStmt_MBE(Imbe_Fence) );
|
|
Mark Wielaard |
d3173f |
+ DIP("sfence\n");
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */
|
|
Mark Wielaard |
d3173f |
+ if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
|
|
Mark Wielaard |
d3173f |
+ goto after_sse_decoders;
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
|
|
Mark Wielaard |
d3173f |
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
|
|
Mark Wielaard |
d3173f |
+ vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
|
|
Mark Wielaard |
d3173f |
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
|
|
Mark Wielaard |
d3173f |
+ vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
|
|
Mark Wielaard |
d3173f |
+ /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
|
|
Mark Wielaard |
d3173f |
+ if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
|
|
Mark Wielaard |
d3173f |
+ modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
+ if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
+ putXMMReg( gregOfRM(modrm),
|
|
Mark Wielaard |
d3173f |
+ getXMMReg( eregOfRM(modrm) ));
|
|
Mark Wielaard |
d3173f |
+ DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
+ nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
+ delta += 2+1;
|
|
Mark Wielaard |
d3173f |
+ } else {
|
|
Mark Wielaard |
d3173f |
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
|
|
Mark Wielaard |
d3173f |
+ if (insn[1] == 0x28/*movaps*/)
|
|
Mark Wielaard |
d3173f |
+ gen_SEGV_if_not_16_aligned( addr );
|
|
Mark Wielaard |
d3173f |
+ putXMMReg( gregOfRM(modrm),
|
|
Mark Wielaard |
d3173f |
+ loadLE(Ity_V128, mkexpr(addr)) );
|
|
Mark Wielaard |
d3173f |
+ DIP("mov[ua]ps %s,%s\n", dis_buf,
|
|
Mark Wielaard |
d3173f |
+ nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
+ delta += 2+alen;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
|
|
Mark Wielaard |
d3173f |
+ /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
|
|
Mark Wielaard |
d3173f |
+ if (sz == 4 && insn[0] == 0x0F
|
|
Mark Wielaard |
d3173f |
+ && (insn[1] == 0x29 || insn[1] == 0x11)) {
|
|
Mark Wielaard |
d3173f |
+ modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
+ if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
+ /* fall through; awaiting test case */
|
|
Mark Wielaard |
d3173f |
+ } else {
|
|
Mark Wielaard |
d3173f |
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
|
|
Mark Wielaard |
d3173f |
+ if (insn[1] == 0x29/*movaps*/)
|
|
Mark Wielaard |
d3173f |
+ gen_SEGV_if_not_16_aligned( addr );
|
|
Mark Wielaard |
d3173f |
+ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
|
|
Mark Wielaard |
d3173f |
+ DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
+ dis_buf );
|
|
Mark Wielaard |
d3173f |
+ delta += 2+alen;
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
|
|
Mark Wielaard |
d3173f |
+ /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
|
|
Mark Wielaard |
d3173f |
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
|
|
Mark Wielaard |
d3173f |
+ modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
+ if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
+ delta += 2+1;
|
|
Mark Wielaard |
d3173f |
+ putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
|
|
Mark Wielaard |
d3173f |
+ getXMMRegLane64( eregOfRM(modrm), 0 ) );
|
|
Mark Wielaard |
d3173f |
+ DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
+ nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
+ } else {
|
|
Mark Wielaard |
d3173f |
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
|
|
Mark Wielaard |
d3173f |
+ delta += 2+alen;
|
|
Mark Wielaard |
d3173f |
+ putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
|
|
Mark Wielaard |
d3173f |
+ loadLE(Ity_I64, mkexpr(addr)) );
|
|
Mark Wielaard |
d3173f |
+ DIP("movhps %s,%s\n", dis_buf,
|
|
Mark Wielaard |
d3173f |
+ nameXMMReg( gregOfRM(modrm) ));
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
|
|
Mark Wielaard |
d3173f |
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
|
|
Mark Wielaard |
d3173f |
+ if (!epartIsReg(insn[2])) {
|
|
Mark Wielaard |
d3173f |
+ delta += 2;
|
|
Mark Wielaard |
d3173f |
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
|
|
Mark Wielaard |
d3173f |
+ delta += alen;
|
|
Mark Wielaard |
d3173f |
+ storeLE( mkexpr(addr),
|
|
Mark Wielaard |
d3173f |
+ getXMMRegLane64( gregOfRM(insn[2]),
|
|
Mark Wielaard |
d3173f |
+ 1/*upper lane*/ ) );
|
|
Mark Wielaard |
d3173f |
+ DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
|
|
Mark Wielaard |
d3173f |
+ dis_buf);
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+ /* else fall through */
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
|
|
Mark Wielaard |
d3173f |
+ /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
|
|
Mark Wielaard |
d3173f |
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
|
|
Mark Wielaard |
d3173f |
+ modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
+ if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
+ delta += 2+1;
|
|
Mark Wielaard |
d3173f |
+ putXMMRegLane64( gregOfRM(modrm),
|
|
Mark Wielaard |
d3173f |
+ 0/*lower lane*/,
|
|
Mark Wielaard |
d3173f |
+ getXMMRegLane64( eregOfRM(modrm), 1 ));
|
|
Mark Wielaard |
d3173f |
+ DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
+ nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
+ } else {
|
|
Mark Wielaard |
d3173f |
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
|
|
Mark Wielaard |
d3173f |
+ delta += 2+alen;
|
|
Mark Wielaard |
d3173f |
+ putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
|
|
Mark Wielaard |
d3173f |
+ loadLE(Ity_I64, mkexpr(addr)) );
|
|
Mark Wielaard |
d3173f |
+ DIP("movlps %s, %s\n",
|
|
Mark Wielaard |
d3173f |
+ dis_buf, nameXMMReg( gregOfRM(modrm) ));
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
|
|
Mark Wielaard |
d3173f |
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
|
|
Mark Wielaard |
d3173f |
+ if (!epartIsReg(insn[2])) {
|
|
Mark Wielaard |
d3173f |
+ delta += 2;
|
|
Mark Wielaard |
d3173f |
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
|
|
Mark Wielaard |
d3173f |
+ delta += alen;
|
|
Mark Wielaard |
d3173f |
+ storeLE( mkexpr(addr),
|
|
Mark Wielaard |
d3173f |
+ getXMMRegLane64( gregOfRM(insn[2]),
|
|
Mark Wielaard |
d3173f |
+ 0/*lower lane*/ ) );
|
|
Mark Wielaard |
d3173f |
+ DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
|
|
Mark Wielaard |
d3173f |
+ dis_buf);
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+ /* else fall through */
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
|
|
Mark Wielaard |
d3173f |
+ to 4 lowest bits of ireg(G) */
|
|
Mark Wielaard |
d3173f |
+ if (insn[0] == 0x0F && insn[1] == 0x50) {
|
|
Mark Wielaard |
d3173f |
+ modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
+ if (sz == 4 && epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
+ Int src;
|
|
Mark Wielaard |
d3173f |
+ t0 = newTemp(Ity_I32);
|
|
Mark Wielaard |
d3173f |
+ t1 = newTemp(Ity_I32);
|
|
Mark Wielaard |
d3173f |
+ t2 = newTemp(Ity_I32);
|
|
Mark Wielaard |
d3173f |
+ t3 = newTemp(Ity_I32);
|
|
Mark Wielaard |
d3173f |
+ delta += 2+1;
|
|
Mark Wielaard |
d3173f |
+ src = eregOfRM(modrm);
|
|
Mark Wielaard |
d3173f |
+ assign( t0, binop( Iop_And32,
|
|
Mark Wielaard |
d3173f |
+ binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
|
|
Mark Wielaard |
d3173f |
+ mkU32(1) ));
|
|
Mark Wielaard |
d3173f |
+ assign( t1, binop( Iop_And32,
|
|
Mark Wielaard |
d3173f |
+ binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
|
|
Mark Wielaard |
d3173f |
+ mkU32(2) ));
|
|
Mark Wielaard |
d3173f |
+ assign( t2, binop( Iop_And32,
|
|
Mark Wielaard |
d3173f |
+ binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
|
|
Mark Wielaard |
d3173f |
+ mkU32(4) ));
|
|
Mark Wielaard |
d3173f |
+ assign( t3, binop( Iop_And32,
|
|
Mark Wielaard |
d3173f |
+ binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
|
|
Mark Wielaard |
d3173f |
+ mkU32(8) ));
|
|
Mark Wielaard |
d3173f |
+ putIReg(4, gregOfRM(modrm),
|
|
Mark Wielaard |
d3173f |
+ binop(Iop_Or32,
|
|
Mark Wielaard |
d3173f |
+ binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
|
|
Mark Wielaard |
d3173f |
+ binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
|
|
Mark Wielaard |
d3173f |
+ )
|
|
Mark Wielaard |
d3173f |
+ );
|
|
Mark Wielaard |
d3173f |
+ DIP("movmskps %s,%s\n", nameXMMReg(src),
|
|
Mark Wielaard |
d3173f |
+ nameIReg(4, gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+ /* else fall through */
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
|
|
Mark Wielaard |
d3173f |
+ /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
|
|
Mark Wielaard |
d3173f |
+ if (insn[0] == 0x0F && insn[1] == 0x2B) {
|
|
Mark Wielaard |
d3173f |
+ modrm = getIByte(delta+2);
|
|
Mark Wielaard |
d3173f |
+ if (!epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
|
|
Mark Wielaard |
d3173f |
+ gen_SEGV_if_not_16_aligned( addr );
|
|
Mark Wielaard |
d3173f |
+ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
|
|
Mark Wielaard |
d3173f |
+ DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
|
|
Mark Wielaard |
d3173f |
+ dis_buf,
|
|
Mark Wielaard |
d3173f |
+ nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
+ delta += 2+alen;
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+ /* else fall through */
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
|
|
Mark Wielaard |
d3173f |
+ (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
|
|
Mark Wielaard |
d3173f |
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
|
|
Mark Wielaard |
d3173f |
+ vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
+ modrm = getIByte(delta+3);
|
|
Mark Wielaard |
d3173f |
+ if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
+ putXMMRegLane32( gregOfRM(modrm), 0,
|
|
Mark Wielaard |
d3173f |
+ getXMMRegLane32( eregOfRM(modrm), 0 ));
|
|
Mark Wielaard |
d3173f |
+ DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
+ nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
+ delta += 3+1;
|
|
Mark Wielaard |
d3173f |
+ } else {
|
|
Mark Wielaard |
d3173f |
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
|
|
Mark Wielaard |
d3173f |
+ /* zero bits 127:64 */
|
|
Mark Wielaard |
d3173f |
+ putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
|
|
Mark Wielaard |
d3173f |
+ /* zero bits 63:32 */
|
|
Mark Wielaard |
d3173f |
+ putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
|
|
Mark Wielaard |
d3173f |
+ /* write bits 31:0 */
|
|
Mark Wielaard |
d3173f |
+ putXMMRegLane32( gregOfRM(modrm), 0,
|
|
Mark Wielaard |
d3173f |
+ loadLE(Ity_I32, mkexpr(addr)) );
|
|
Mark Wielaard |
d3173f |
+ DIP("movss %s,%s\n", dis_buf,
|
|
Mark Wielaard |
d3173f |
+ nameXMMReg(gregOfRM(modrm)));
|
|
Mark Wielaard |
d3173f |
+ delta += 3+alen;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
|
|
Mark Wielaard |
d3173f |
+ or lo 1/4 xmm). */
|
|
Mark Wielaard |
d3173f |
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
|
|
Mark Wielaard |
d3173f |
+ vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
+ modrm = getIByte(delta+3);
|
|
Mark Wielaard |
d3173f |
+ if (epartIsReg(modrm)) {
|
|
Mark Wielaard |
d3173f |
+ /* fall through, we don't yet have a test case */
|
|
Mark Wielaard |
d3173f |
+ } else {
|
|
Mark Wielaard |
d3173f |
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
|
|
Mark Wielaard |
d3173f |
+ storeLE( mkexpr(addr),
|
|
Mark Wielaard |
d3173f |
+ getXMMRegLane32(gregOfRM(modrm), 0) );
|
|
Mark Wielaard |
d3173f |
+ DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
|
|
Mark Wielaard |
d3173f |
+ dis_buf);
|
|
Mark Wielaard |
d3173f |
+ delta += 3+alen;
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
|
|
Mark Wielaard |
d3173f |
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
|
|
Mark Wielaard |
d3173f |
+ vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* 0F 56 = ORPS -- G = G and E */
|
|
Mark Wielaard |
d3173f |
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
|
|
Mark Wielaard |
d3173f |
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
|
|
Mark Wielaard |
d3173f |
+ goto decode_success;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
/* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
|
|
Mark Wielaard |
d3173f |
if (insn[0] == 0x0F && insn[1] == 0x53) {
|
|
Mark Wielaard |
d3173f |
vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
@@ -9205,18 +9238,6 @@ DisResult disInstr_X86_WRK (
|
|
Mark Wielaard |
d3173f |
goto decode_success;
|
|
Mark Wielaard |
d3173f |
}
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
- /* 0F AE /7 = SFENCE -- flush pending operations to memory */
|
|
Mark Wielaard |
d3173f |
- if (insn[0] == 0x0F && insn[1] == 0xAE
|
|
Mark Wielaard |
d3173f |
- && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
|
|
Mark Wielaard |
d3173f |
- vassert(sz == 4);
|
|
Mark Wielaard |
d3173f |
- delta += 3;
|
|
Mark Wielaard |
d3173f |
- /* Insert a memory fence. It's sometimes important that these
|
|
Mark Wielaard |
d3173f |
- are carried through to the generated code. */
|
|
Mark Wielaard |
d3173f |
- stmt( IRStmt_MBE(Imbe_Fence) );
|
|
Mark Wielaard |
d3173f |
- DIP("sfence\n");
|
|
Mark Wielaard |
d3173f |
- goto decode_success;
|
|
Mark Wielaard |
d3173f |
- }
|
|
Mark Wielaard |
d3173f |
-
|
|
Mark Wielaard |
d3173f |
/* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
|
|
Mark Wielaard |
d3173f |
if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) {
|
|
Mark Wielaard |
d3173f |
Int select;
|
|
Mark Wielaard |
d3173f |
@@ -14674,6 +14695,11 @@ DisResult disInstr_X86_WRK (
|
|
Mark Wielaard |
d3173f |
fAddr = &x86g_dirtyhelper_CPUID_sse1;
|
|
Mark Wielaard |
d3173f |
}
|
|
Mark Wielaard |
d3173f |
else
|
|
Mark Wielaard |
d3173f |
+ if (archinfo->hwcaps & VEX_HWCAPS_X86_MMXEXT) {
|
|
Mark Wielaard |
d3173f |
+ fName = "x86g_dirtyhelper_CPUID_mmxext";
|
|
Mark Wielaard |
d3173f |
+ fAddr = &x86g_dirtyhelper_CPUID_mmxext;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+ else
|
|
Mark Wielaard |
d3173f |
if (archinfo->hwcaps == 0/*no SSE*/) {
|
|
Mark Wielaard |
d3173f |
fName = "x86g_dirtyhelper_CPUID_sse0";
|
|
Mark Wielaard |
d3173f |
fAddr = &x86g_dirtyhelper_CPUID_sse0;
|
|
Mark Wielaard |
d3173f |
diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c
|
|
Mark Wielaard |
d3173f |
index 21a05a9..693eaa2 100644
|
|
Mark Wielaard |
d3173f |
--- a/VEX/priv/host_x86_defs.c
|
|
Mark Wielaard |
d3173f |
+++ b/VEX/priv/host_x86_defs.c
|
|
Mark Wielaard |
d3173f |
@@ -727,7 +727,8 @@ X86Instr* X86Instr_MFence ( UInt hwcaps ) {
|
|
Mark Wielaard |
d3173f |
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
|
|
Mark Wielaard |
d3173f |
i->tag = Xin_MFence;
|
|
Mark Wielaard |
d3173f |
i->Xin.MFence.hwcaps = hwcaps;
|
|
Mark Wielaard |
d3173f |
- vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1
|
|
Mark Wielaard |
d3173f |
+ vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT
|
|
Mark Wielaard |
d3173f |
+ |VEX_HWCAPS_X86_SSE1
|
|
Mark Wielaard |
d3173f |
|VEX_HWCAPS_X86_SSE2
|
|
Mark Wielaard |
d3173f |
|VEX_HWCAPS_X86_SSE3
|
|
Mark Wielaard |
d3173f |
|VEX_HWCAPS_X86_LZCNT)));
|
|
Mark Wielaard |
d3173f |
@@ -2695,7 +2696,7 @@ Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
|
|
Mark Wielaard |
d3173f |
*p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
|
|
Mark Wielaard |
d3173f |
goto done;
|
|
Mark Wielaard |
d3173f |
}
|
|
Mark Wielaard |
d3173f |
- if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) {
|
|
Mark Wielaard |
d3173f |
+ if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) {
|
|
Mark Wielaard |
d3173f |
/* sfence */
|
|
Mark Wielaard |
d3173f |
*p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
|
|
Mark Wielaard |
d3173f |
/* lock addl $0,0(%esp) */
|
|
Mark Wielaard |
d3173f |
diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h
|
|
Mark Wielaard |
d3173f |
index f810ab4..e03becf 100644
|
|
Mark Wielaard |
d3173f |
--- a/VEX/priv/host_x86_defs.h
|
|
Mark Wielaard |
d3173f |
+++ b/VEX/priv/host_x86_defs.h
|
|
Mark Wielaard |
d3173f |
@@ -360,7 +360,7 @@ typedef
|
|
Mark Wielaard |
d3173f |
Xin_Store, /* store 16/8 bit value in memory */
|
|
Mark Wielaard |
d3173f |
Xin_Set32, /* convert condition code to 32-bit value */
|
|
Mark Wielaard |
d3173f |
Xin_Bsfr32, /* 32-bit bsf/bsr */
|
|
Mark Wielaard |
d3173f |
- Xin_MFence, /* mem fence (not just sse2, but sse0 and 1 too) */
|
|
Mark Wielaard |
d3173f |
+ Xin_MFence, /* mem fence (not just sse2, but sse0 and 1/mmxext too) */
|
|
Mark Wielaard |
d3173f |
Xin_ACAS, /* 8/16/32-bit lock;cmpxchg */
|
|
Mark Wielaard |
d3173f |
Xin_DACAS, /* lock;cmpxchg8b (doubleword ACAS, 2 x 32-bit only) */
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
@@ -508,13 +508,13 @@ typedef
|
|
Mark Wielaard |
d3173f |
HReg src;
|
|
Mark Wielaard |
d3173f |
HReg dst;
|
|
Mark Wielaard |
d3173f |
} Bsfr32;
|
|
Mark Wielaard |
d3173f |
- /* Mem fence (not just sse2, but sse0 and 1 too). In short,
|
|
Mark Wielaard |
d3173f |
- an insn which flushes all preceding loads and stores as
|
|
Mark Wielaard |
d3173f |
- much as possible before continuing. On SSE2 we emit a
|
|
Mark Wielaard |
d3173f |
- real "mfence", on SSE1 "sfence ; lock addl $0,0(%esp)" and
|
|
Mark Wielaard |
d3173f |
- on SSE0 "lock addl $0,0(%esp)". This insn therefore
|
|
Mark Wielaard |
d3173f |
- carries the host's hwcaps so the assembler knows what to
|
|
Mark Wielaard |
d3173f |
- emit. */
|
|
Mark Wielaard |
d3173f |
+ /* Mem fence (not just sse2, but sse0 and sse1/mmxext too).
|
|
Mark Wielaard |
d3173f |
+ In short, an insn which flushes all preceding loads and
|
|
Mark Wielaard |
d3173f |
+ stores as much as possible before continuing. On SSE2
|
|
Mark Wielaard |
d3173f |
+ we emit a real "mfence", on SSE1 or the MMXEXT subset
|
|
Mark Wielaard |
d3173f |
+ "sfence ; lock addl $0,0(%esp)" and on SSE0
|
|
Mark Wielaard |
d3173f |
+ "lock addl $0,0(%esp)". This insn therefore carries the
|
|
Mark Wielaard |
d3173f |
+ host's hwcaps so the assembler knows what to emit. */
|
|
Mark Wielaard |
d3173f |
struct {
|
|
Mark Wielaard |
d3173f |
UInt hwcaps;
|
|
Mark Wielaard |
d3173f |
} MFence;
|
|
Mark Wielaard |
d3173f |
diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c
|
|
Mark Wielaard |
d3173f |
index 086aefc..90bc563 100644
|
|
Mark Wielaard |
d3173f |
--- a/VEX/priv/host_x86_isel.c
|
|
Mark Wielaard |
d3173f |
+++ b/VEX/priv/host_x86_isel.c
|
|
Mark Wielaard |
d3173f |
@@ -3251,7 +3251,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
|
|
Mark Wielaard |
d3173f |
{
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
# define REQUIRE_SSE1 \
|
|
Mark Wielaard |
d3173f |
- do { if (env->hwcaps == 0/*baseline, no sse*/) \
|
|
Mark Wielaard |
d3173f |
+ do { if (env->hwcaps == 0/*baseline, no sse*/ \
|
|
Mark Wielaard |
d3173f |
+ || env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
|
|
Mark Wielaard |
d3173f |
goto vec_fail; \
|
|
Mark Wielaard |
d3173f |
} while (0)
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
@@ -4388,7 +4389,8 @@ HInstrArray* iselSB_X86 ( IRSB* bb,
|
|
Mark Wielaard |
d3173f |
/* sanity ... */
|
|
Mark Wielaard |
d3173f |
vassert(arch_host == VexArchX86);
|
|
Mark Wielaard |
d3173f |
vassert(0 == (hwcaps_host
|
|
Mark Wielaard |
d3173f |
- & ~(VEX_HWCAPS_X86_SSE1
|
|
Mark Wielaard |
d3173f |
+ & ~(VEX_HWCAPS_X86_MMXEXT
|
|
Mark Wielaard |
d3173f |
+ | VEX_HWCAPS_X86_SSE1
|
|
Mark Wielaard |
d3173f |
| VEX_HWCAPS_X86_SSE2
|
|
Mark Wielaard |
d3173f |
| VEX_HWCAPS_X86_SSE3
|
|
Mark Wielaard |
d3173f |
| VEX_HWCAPS_X86_LZCNT)));
|
|
Mark Wielaard |
d3173f |
diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
|
|
Mark Wielaard |
d3173f |
index e425950..5bb762f 100644
|
|
Mark Wielaard |
d3173f |
--- a/VEX/priv/main_main.c
|
|
Mark Wielaard |
d3173f |
+++ b/VEX/priv/main_main.c
|
|
Mark Wielaard |
d3173f |
@@ -1086,23 +1086,25 @@
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
static HChar* show_hwcaps_x86 ( UInt hwcaps )
|
|
Mark Wielaard |
d3173f |
{
|
|
Mark Wielaard |
d3173f |
- /* Monotonic, SSE3 > SSE2 > SSE1 > baseline. */
|
|
Mark Wielaard |
d3173f |
+ /* Monotonic, LZCNT > SSE3 > SSE2 > SSE1 > MMXEXT > baseline. */
|
|
Mark Wielaard |
d3173f |
switch (hwcaps) {
|
|
Mark Wielaard |
d3173f |
case 0:
|
|
Mark Wielaard |
d3173f |
return "x86-sse0";
|
|
Mark Wielaard |
d3173f |
- case VEX_HWCAPS_X86_SSE1:
|
|
Mark Wielaard |
d3173f |
- return "x86-sse1";
|
|
Mark Wielaard |
d3173f |
- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2:
|
|
Mark Wielaard |
d3173f |
- return "x86-sse1-sse2";
|
|
Mark Wielaard |
d3173f |
- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
|
|
Mark Wielaard |
d3173f |
+ case VEX_HWCAPS_X86_MMXEXT:
|
|
Mark Wielaard |
d3173f |
+ return "x86-mmxext";
|
|
Mark Wielaard |
d3173f |
+ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1:
|
|
Mark Wielaard |
d3173f |
+ return "x86-mmxext-sse1";
|
|
Mark Wielaard |
d3173f |
+ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2:
|
|
Mark Wielaard |
d3173f |
+ return "x86-mmxext-sse1-sse2";
|
|
Mark Wielaard |
d3173f |
+ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
|
|
Mark Wielaard |
d3173f |
| VEX_HWCAPS_X86_LZCNT:
|
|
Mark Wielaard |
d3173f |
- return "x86-sse1-sse2-lzcnt";
|
|
Mark Wielaard |
d3173f |
- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
|
|
Mark Wielaard |
d3173f |
+ return "x86-mmxext-sse1-sse2-lzcnt";
|
|
Mark Wielaard |
d3173f |
+ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
|
|
Mark Wielaard |
d3173f |
| VEX_HWCAPS_X86_SSE3:
|
|
Mark Wielaard |
d3173f |
- return "x86-sse1-sse2-sse3";
|
|
Mark Wielaard |
d3173f |
- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
|
|
Mark Wielaard |
d3173f |
+ return "x86-mmxext-sse1-sse2-sse3";
|
|
Mark Wielaard |
d3173f |
+ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
|
|
Mark Wielaard |
d3173f |
| VEX_HWCAPS_X86_SSE3 | VEX_HWCAPS_X86_LZCNT:
|
|
Mark Wielaard |
d3173f |
- return "x86-sse1-sse2-sse3-lzcnt";
|
|
Mark Wielaard |
d3173f |
+ return "x86-mmxext-sse1-sse2-sse3-lzcnt";
|
|
Mark Wielaard |
d3173f |
default:
|
|
Mark Wielaard |
d3173f |
return NULL;
|
|
Mark Wielaard |
d3173f |
}
|
|
Mark Wielaard |
d3173f |
diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h
|
|
Mark Wielaard |
d3173f |
index 4b36727..c8b5892 100644
|
|
Mark Wielaard |
d3173f |
--- a/VEX/pub/libvex.h
|
|
Mark Wielaard |
d3173f |
+++ b/VEX/pub/libvex.h
|
|
Mark Wielaard |
d3173f |
@@ -71,11 +71,12 @@ typedef
|
|
Mark Wielaard |
d3173f |
combinations. */
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
/* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE), with
|
|
Mark Wielaard |
d3173f |
- cmpxchg8b. */
|
|
Mark Wielaard |
d3173f |
-#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */
|
|
Mark Wielaard |
d3173f |
-#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */
|
|
Mark Wielaard |
d3173f |
-#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */
|
|
Mark Wielaard |
d3173f |
-#define VEX_HWCAPS_X86_LZCNT (1<<4) /* SSE4a LZCNT insn */
|
|
Mark Wielaard |
d3173f |
+ cmpxchg8b. MMXEXT is a special AMD only subset of SSE1 (Integer SSE). */
|
|
Mark Wielaard |
d3173f |
+#define VEX_HWCAPS_X86_MMXEXT (1<<1) /* A subset of SSE1 on early AMD */
|
|
Mark Wielaard |
d3173f |
+#define VEX_HWCAPS_X86_SSE1 (1<<2) /* SSE1 support (Pentium III) */
|
|
Mark Wielaard |
d3173f |
+#define VEX_HWCAPS_X86_SSE2 (1<<3) /* SSE2 support (Pentium 4) */
|
|
Mark Wielaard |
d3173f |
+#define VEX_HWCAPS_X86_SSE3 (1<<4) /* SSE3 support (>= Prescott) */
|
|
Mark Wielaard |
d3173f |
+#define VEX_HWCAPS_X86_LZCNT (1<<5) /* SSE4a LZCNT insn */
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
/* amd64: baseline capability is SSE2, with cmpxchg8b but not
|
|
Mark Wielaard |
d3173f |
cmpxchg16b. */
|
|
Mark Wielaard |
d3173f |
commit 4c6f0638553e69b7f70c17a64a8f60114d6f6230
|
|
Mark Wielaard |
d3173f |
Author: mjw <mjw@a5019735-40e9-0310-863c-91ae7b9d1cf9>
|
|
Mark Wielaard |
d3173f |
Date: Tue Aug 27 10:23:23 2013 +0000
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
Support mmxext (integer sse) subset on i386 (athlon). Bug #323713
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
Some processors like the AMD Athlon "Classic" support mmxext,
|
|
Mark Wielaard |
d3173f |
a sse1 subset. This subset is not properly detected by VEX.
|
|
Mark Wielaard |
d3173f |
The subset uses the same encoding as the sse1 instructions.
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
The subset is described at:
|
|
Mark Wielaard |
d3173f |
http://support.amd.com/us/Embedded_TechDocs/22466.pdf
|
|
Mark Wielaard |
d3173f |
https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
Detects mmxext subset from cpuid information (and enables it
|
|
Mark Wielaard |
d3173f |
when full sse1 is found). Also fixes the prereq of
|
|
Mark Wielaard |
d3173f |
none/tests/x86/insn_mmxext.vgtest so that it also runs when
|
|
Mark Wielaard |
d3173f |
full sse1 (and not just the mmxext subset) is found.
|
|
Mark Wielaard |
d3173f |
It already passed on such configurations. With the VEX patch
|
|
Mark Wielaard |
d3173f |
(r2745) it also passes with just the mmxext subset.
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13515 a5019735-40e9-0310-863c-91ae7b9d1cf9
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
|
|
Mark Wielaard |
d3173f |
index 353c05b..2fd5f07 100644
|
|
Mark Wielaard |
d3173f |
--- a/coregrind/m_machine.c
|
|
Mark Wielaard |
d3173f |
+++ b/coregrind/m_machine.c
|
|
Mark Wielaard |
d3173f |
@@ -685,7 +685,7 @@
|
|
Mark Wielaard |
d3173f |
LibVEX_default_VexArchInfo(&vai;;
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
#if defined(VGA_x86)
|
|
Mark Wielaard |
d3173f |
- { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
|
|
Mark Wielaard |
d3173f |
+ { Bool have_sse1, have_sse2, have_cx8, have_lzcnt, have_mmxext;
|
|
Mark Wielaard |
d3173f |
UInt eax, ebx, ecx, edx, max_extended;
|
|
Mark Wielaard |
d3173f |
UChar vstr[13];
|
|
Mark Wielaard |
d3173f |
vstr[0] = 0;
|
|
Mark Wielaard |
d3173f |
@@ -722,17 +722,27 @@
|
|
Mark Wielaard |
d3173f |
if (!have_cx8)
|
|
Mark Wielaard |
d3173f |
return False;
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
- /* Figure out if this is an AMD that can do LZCNT. */
|
|
Mark Wielaard |
d3173f |
+ /* Figure out if this is an AMD that can do mmxext and/or LZCNT. */
|
|
Mark Wielaard |
d3173f |
+ have_mmxext = False;
|
|
Mark Wielaard |
d3173f |
have_lzcnt = False;
|
|
Mark Wielaard |
d3173f |
if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
|
|
Mark Wielaard |
d3173f |
&& max_extended >= 0x80000001) {
|
|
Mark Wielaard |
d3173f |
VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx;;
|
|
Mark Wielaard |
d3173f |
have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ /* Some older AMD processors support a sse1 subset (Integer SSE). */
|
|
Mark Wielaard |
d3173f |
+ have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
|
|
Mark Wielaard |
d3173f |
}
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
- if (have_sse2 && have_sse1) {
|
|
Mark Wielaard |
d3173f |
+ /* Intel processors don't define the mmxext extension, but since it
|
|
Mark Wielaard |
d3173f |
+ is just a sse1 subset always define it when we have sse1. */
|
|
Mark Wielaard |
d3173f |
+ if (have_sse1)
|
|
Mark Wielaard |
d3173f |
+ have_mmxext = True;
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
+ if (have_sse2 && have_sse1 && have_mmxext) {
|
|
Mark Wielaard |
d3173f |
va = VexArchX86;
|
|
Mark Wielaard |
d3173f |
- vai.hwcaps = VEX_HWCAPS_X86_SSE1;
|
|
Mark Wielaard |
d3173f |
+ vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
|
|
Mark Wielaard |
d3173f |
+ vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
|
|
Mark Wielaard |
d3173f |
vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
|
|
Mark Wielaard |
d3173f |
if (have_lzcnt)
|
|
Mark Wielaard |
d3173f |
vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
|
|
Mark Wielaard |
d3173f |
@@ -740,13 +750,21 @@
|
|
Mark Wielaard |
d3173f |
return True;
|
|
Mark Wielaard |
d3173f |
}
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
- if (have_sse1) {
|
|
Mark Wielaard |
d3173f |
+ if (have_sse1 && have_mmxext) {
|
|
Mark Wielaard |
d3173f |
va = VexArchX86;
|
|
Mark Wielaard |
d3173f |
- vai.hwcaps = VEX_HWCAPS_X86_SSE1;
|
|
Mark Wielaard |
d3173f |
+ vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
|
|
Mark Wielaard |
d3173f |
+ vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
|
|
Mark Wielaard |
d3173f |
VG_(machine_x86_have_mxcsr) = 1;
|
|
Mark Wielaard |
d3173f |
return True;
|
|
Mark Wielaard |
d3173f |
}
|
|
Mark Wielaard |
d3173f |
|
|
Mark Wielaard |
d3173f |
+ if (have_mmxext) {
|
|
Mark Wielaard |
d3173f |
+ va = VexArchX86;
|
|
Mark Wielaard |
d3173f |
+ vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
|
|
Mark Wielaard |
d3173f |
+ VG_(machine_x86_have_mxcsr) = 0;
|
|
Mark Wielaard |
d3173f |
+ return True;
|
|
Mark Wielaard |
d3173f |
+ }
|
|
Mark Wielaard |
d3173f |
+
|
|
Mark Wielaard |
d3173f |
va = VexArchX86;
|
|
Mark Wielaard |
d3173f |
vai.hwcaps = 0; /*baseline - no sse at all*/
|
|
Mark Wielaard |
d3173f |
VG_(machine_x86_have_mxcsr) = 0;
|
|
Mark Wielaard |
d3173f |
diff --git a/none/tests/x86/insn_mmxext.vgtest b/none/tests/x86/insn_mmxext.vgtest
|
|
Mark Wielaard |
d3173f |
index ad48b6e..e3627d6 100644
|
|
Mark Wielaard |
d3173f |
--- a/none/tests/x86/insn_mmxext.vgtest
|
|
Mark Wielaard |
d3173f |
+++ b/none/tests/x86/insn_mmxext.vgtest
|
|
Mark Wielaard |
d3173f |
@@ -1,3 +1,4 @@
|
|
Mark Wielaard |
d3173f |
prog: ../../../none/tests/x86/insn_mmxext
|
|
Mark Wielaard |
d3173f |
-prereq: ../../../tests/x86_amd64_features x86-mmxext
|
|
Mark Wielaard |
d3173f |
+# mmxext is an old AMD subset of sse1, so either will do.
|
|
Mark Wielaard |
d3173f |
+prereq: ../../../tests/x86_amd64_features x86-mmxext || ../../../tests/x86_amd64_features x86-sse
|
|
Mark Wielaard |
d3173f |
vgopts: -q
|