e2f3ee
From d6f29071cce7c162df253a8fbfd6de691be5fff9 Mon Sep 17 00:00:00 2001
e2f3ee
From: David Edmondson <david.edmondson@oracle.com>
e2f3ee
Date: Mon, 5 Jul 2021 11:46:30 +0100
e2f3ee
Subject: [PATCH 6/7] target/i386: Observe XSAVE state area offsets
e2f3ee
e2f3ee
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
e2f3ee
RH-MergeRequest: 113: non-av 8.5z: Fix XSAVE on newer CPUs
e2f3ee
RH-Commit: [6/7] 3741a121957cd10e4d160da22c056ff81b6bc62f
e2f3ee
RH-Bugzilla: 2065239
e2f3ee
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
e2f3ee
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
e2f3ee
RH-Acked-by: Bandan Das <None>
e2f3ee
e2f3ee
Rather than relying on the X86XSaveArea structure definition directly,
e2f3ee
the routines that manipulate the XSAVE state area should observe the
e2f3ee
offsets declared in the x86_ext_save_areas array.
e2f3ee
e2f3ee
Currently the offsets declared in the array are derived from the
e2f3ee
structure definition, resulting in no functional change.
e2f3ee
e2f3ee
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
e2f3ee
Message-Id: <20210705104632.2902400-7-david.edmondson@oracle.com>
e2f3ee
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
e2f3ee
(cherry picked from commit 3568987f78faff90829ea6c885bbdd5b083dc86c)
e2f3ee
---
e2f3ee
 target/i386/xsave_helper.c | 262 ++++++++++++++++++++++++++++---------
e2f3ee
 1 file changed, 200 insertions(+), 62 deletions(-)
e2f3ee
e2f3ee
diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c
e2f3ee
index b16c6ac0fe..ac61a96344 100644
e2f3ee
--- a/target/i386/xsave_helper.c
e2f3ee
+++ b/target/i386/xsave_helper.c
e2f3ee
@@ -9,13 +9,20 @@
e2f3ee
 void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
e2f3ee
 {
e2f3ee
     CPUX86State *env = &cpu->env;
e2f3ee
-    X86XSaveArea *xsave = buf;
e2f3ee
-    uint16_t cwd, swd, twd;
e2f3ee
+    const ExtSaveArea *e, *f;
e2f3ee
     int i;
e2f3ee
 
e2f3ee
-    assert(buflen >= sizeof(*xsave));
e2f3ee
+    X86LegacyXSaveArea *legacy;
e2f3ee
+    X86XSaveHeader *header;
e2f3ee
+    uint16_t cwd, swd, twd;
e2f3ee
+
e2f3ee
+    memset(buf, 0, buflen);
e2f3ee
+
e2f3ee
+    e = &x86_ext_save_areas[XSTATE_FP_BIT];
e2f3ee
+
e2f3ee
+    legacy = buf + e->offset;
e2f3ee
+    header = buf + e->offset + sizeof(*legacy);
e2f3ee
 
e2f3ee
-    memset(xsave, 0, buflen);
e2f3ee
     twd = 0;
e2f3ee
     swd = env->fpus & ~(7 << 11);
e2f3ee
     swd |= (env->fpstt & 7) << 11;
e2f3ee
@@ -23,91 +30,222 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
e2f3ee
     for (i = 0; i < 8; ++i) {
e2f3ee
         twd |= (!env->fptags[i]) << i;
e2f3ee
     }
e2f3ee
-    xsave->legacy.fcw = cwd;
e2f3ee
-    xsave->legacy.fsw = swd;
e2f3ee
-    xsave->legacy.ftw = twd;
e2f3ee
-    xsave->legacy.fpop = env->fpop;
e2f3ee
-    xsave->legacy.fpip = env->fpip;
e2f3ee
-    xsave->legacy.fpdp = env->fpdp;
e2f3ee
-    memcpy(&xsave->legacy.fpregs, env->fpregs,
e2f3ee
-            sizeof env->fpregs);
e2f3ee
-    xsave->legacy.mxcsr = env->mxcsr;
e2f3ee
-    xsave->header.xstate_bv = env->xstate_bv;
e2f3ee
-    memcpy(&xsave->bndreg_state.bnd_regs, env->bnd_regs,
e2f3ee
-            sizeof env->bnd_regs);
e2f3ee
-    xsave->bndcsr_state.bndcsr = env->bndcs_regs;
e2f3ee
-    memcpy(&xsave->opmask_state.opmask_regs, env->opmask_regs,
e2f3ee
-            sizeof env->opmask_regs);
e2f3ee
+    legacy->fcw = cwd;
e2f3ee
+    legacy->fsw = swd;
e2f3ee
+    legacy->ftw = twd;
e2f3ee
+    legacy->fpop = env->fpop;
e2f3ee
+    legacy->fpip = env->fpip;
e2f3ee
+    legacy->fpdp = env->fpdp;
e2f3ee
+    memcpy(&legacy->fpregs, env->fpregs,
e2f3ee
+           sizeof(env->fpregs));
e2f3ee
+    legacy->mxcsr = env->mxcsr;
e2f3ee
 
e2f3ee
     for (i = 0; i < CPU_NB_REGS; i++) {
e2f3ee
-        uint8_t *xmm = xsave->legacy.xmm_regs[i];
e2f3ee
-        uint8_t *ymmh = xsave->avx_state.ymmh[i];
e2f3ee
-        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
e2f3ee
+        uint8_t *xmm = legacy->xmm_regs[i];
e2f3ee
+
e2f3ee
         stq_p(xmm,     env->xmm_regs[i].ZMM_Q(0));
e2f3ee
-        stq_p(xmm+8,   env->xmm_regs[i].ZMM_Q(1));
e2f3ee
-        stq_p(ymmh,    env->xmm_regs[i].ZMM_Q(2));
e2f3ee
-        stq_p(ymmh+8,  env->xmm_regs[i].ZMM_Q(3));
e2f3ee
-        stq_p(zmmh,    env->xmm_regs[i].ZMM_Q(4));
e2f3ee
-        stq_p(zmmh+8,  env->xmm_regs[i].ZMM_Q(5));
e2f3ee
-        stq_p(zmmh+16, env->xmm_regs[i].ZMM_Q(6));
e2f3ee
-        stq_p(zmmh+24, env->xmm_regs[i].ZMM_Q(7));
e2f3ee
+        stq_p(xmm + 8, env->xmm_regs[i].ZMM_Q(1));
e2f3ee
+    }
e2f3ee
+
e2f3ee
+    header->xstate_bv = env->xstate_bv;
e2f3ee
+
e2f3ee
+    e = &x86_ext_save_areas[XSTATE_YMM_BIT];
e2f3ee
+    if (e->size && e->offset) {
e2f3ee
+        XSaveAVX *avx;
e2f3ee
+
e2f3ee
+        avx = buf + e->offset;
e2f3ee
+
e2f3ee
+        for (i = 0; i < CPU_NB_REGS; i++) {
e2f3ee
+            uint8_t *ymmh = avx->ymmh[i];
e2f3ee
+
e2f3ee
+            stq_p(ymmh,     env->xmm_regs[i].ZMM_Q(2));
e2f3ee
+            stq_p(ymmh + 8, env->xmm_regs[i].ZMM_Q(3));
e2f3ee
+        }
e2f3ee
+    }
e2f3ee
+
e2f3ee
+    e = &x86_ext_save_areas[XSTATE_BNDREGS_BIT];
e2f3ee
+    if (e->size && e->offset) {
e2f3ee
+        XSaveBNDREG *bndreg;
e2f3ee
+        XSaveBNDCSR *bndcsr;
e2f3ee
+
e2f3ee
+        f = &x86_ext_save_areas[XSTATE_BNDCSR_BIT];
e2f3ee
+        assert(f->size);
e2f3ee
+        assert(f->offset);
e2f3ee
+
e2f3ee
+        bndreg = buf + e->offset;
e2f3ee
+        bndcsr = buf + f->offset;
e2f3ee
+
e2f3ee
+        memcpy(&bndreg->bnd_regs, env->bnd_regs,
e2f3ee
+               sizeof(env->bnd_regs));
e2f3ee
+        bndcsr->bndcsr = env->bndcs_regs;
e2f3ee
     }
e2f3ee
 
e2f3ee
+    e = &x86_ext_save_areas[XSTATE_OPMASK_BIT];
e2f3ee
+    if (e->size && e->offset) {
e2f3ee
+        XSaveOpmask *opmask;
e2f3ee
+        XSaveZMM_Hi256 *zmm_hi256;
e2f3ee
+#ifdef TARGET_X86_64
e2f3ee
+        XSaveHi16_ZMM *hi16_zmm;
e2f3ee
+#endif
e2f3ee
+
e2f3ee
+        f = &x86_ext_save_areas[XSTATE_ZMM_Hi256_BIT];
e2f3ee
+        assert(f->size);
e2f3ee
+        assert(f->offset);
e2f3ee
+
e2f3ee
+        opmask = buf + e->offset;
e2f3ee
+        zmm_hi256 = buf + f->offset;
e2f3ee
+
e2f3ee
+        memcpy(&opmask->opmask_regs, env->opmask_regs,
e2f3ee
+               sizeof(env->opmask_regs));
e2f3ee
+
e2f3ee
+        for (i = 0; i < CPU_NB_REGS; i++) {
e2f3ee
+            uint8_t *zmmh = zmm_hi256->zmm_hi256[i];
e2f3ee
+
e2f3ee
+            stq_p(zmmh,      env->xmm_regs[i].ZMM_Q(4));
e2f3ee
+            stq_p(zmmh + 8,  env->xmm_regs[i].ZMM_Q(5));
e2f3ee
+            stq_p(zmmh + 16, env->xmm_regs[i].ZMM_Q(6));
e2f3ee
+            stq_p(zmmh + 24, env->xmm_regs[i].ZMM_Q(7));
e2f3ee
+        }
e2f3ee
+
e2f3ee
 #ifdef TARGET_X86_64
e2f3ee
-    memcpy(&xsave->hi16_zmm_state.hi16_zmm, &env->xmm_regs[16],
e2f3ee
-            16 * sizeof env->xmm_regs[16]);
e2f3ee
-    memcpy(&xsave->pkru_state, &env->pkru, sizeof env->pkru);
e2f3ee
+        f = &x86_ext_save_areas[XSTATE_Hi16_ZMM_BIT];
e2f3ee
+        assert(f->size);
e2f3ee
+        assert(f->offset);
e2f3ee
+
e2f3ee
+        hi16_zmm = buf + f->offset;
e2f3ee
+
e2f3ee
+        memcpy(&hi16_zmm->hi16_zmm, &env->xmm_regs[16],
e2f3ee
+               16 * sizeof(env->xmm_regs[16]));
e2f3ee
+#endif
e2f3ee
+    }
e2f3ee
+
e2f3ee
+#ifdef TARGET_X86_64
e2f3ee
+    e = &x86_ext_save_areas[XSTATE_PKRU_BIT];
e2f3ee
+    if (e->size && e->offset) {
e2f3ee
+        XSavePKRU *pkru = buf + e->offset;
e2f3ee
+
e2f3ee
+        memcpy(pkru, &env->pkru, sizeof(env->pkru));
e2f3ee
+    }
e2f3ee
 #endif
e2f3ee
 }
e2f3ee
 
e2f3ee
 void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen)
e2f3ee
 {
e2f3ee
     CPUX86State *env = &cpu->env;
e2f3ee
-    const X86XSaveArea *xsave = buf;
e2f3ee
+    const ExtSaveArea *e, *f, *g;
e2f3ee
     int i;
e2f3ee
+
e2f3ee
+    const X86LegacyXSaveArea *legacy;
e2f3ee
+    const X86XSaveHeader *header;
e2f3ee
     uint16_t cwd, swd, twd;
e2f3ee
 
e2f3ee
-    assert(buflen >= sizeof(*xsave));
e2f3ee
+    e = &x86_ext_save_areas[XSTATE_FP_BIT];
e2f3ee
 
e2f3ee
-    cwd = xsave->legacy.fcw;
e2f3ee
-    swd = xsave->legacy.fsw;
e2f3ee
-    twd = xsave->legacy.ftw;
e2f3ee
-    env->fpop = xsave->legacy.fpop;
e2f3ee
+    legacy = buf + e->offset;
e2f3ee
+    header = buf + e->offset + sizeof(*legacy);
e2f3ee
+
e2f3ee
+    cwd = legacy->fcw;
e2f3ee
+    swd = legacy->fsw;
e2f3ee
+    twd = legacy->ftw;
e2f3ee
+    env->fpop = legacy->fpop;
e2f3ee
     env->fpstt = (swd >> 11) & 7;
e2f3ee
     env->fpus = swd;
e2f3ee
     env->fpuc = cwd;
e2f3ee
     for (i = 0; i < 8; ++i) {
e2f3ee
         env->fptags[i] = !((twd >> i) & 1);
e2f3ee
     }
e2f3ee
-    env->fpip = xsave->legacy.fpip;
e2f3ee
-    env->fpdp = xsave->legacy.fpdp;
e2f3ee
-    env->mxcsr = xsave->legacy.mxcsr;
e2f3ee
-    memcpy(env->fpregs, &xsave->legacy.fpregs,
e2f3ee
-            sizeof env->fpregs);
e2f3ee
-    env->xstate_bv = xsave->header.xstate_bv;
e2f3ee
-    memcpy(env->bnd_regs, &xsave->bndreg_state.bnd_regs,
e2f3ee
-            sizeof env->bnd_regs);
e2f3ee
-    env->bndcs_regs = xsave->bndcsr_state.bndcsr;
e2f3ee
-    memcpy(env->opmask_regs, &xsave->opmask_state.opmask_regs,
e2f3ee
-            sizeof env->opmask_regs);
e2f3ee
+    env->fpip = legacy->fpip;
e2f3ee
+    env->fpdp = legacy->fpdp;
e2f3ee
+    env->mxcsr = legacy->mxcsr;
e2f3ee
+    memcpy(env->fpregs, &legacy->fpregs,
e2f3ee
+           sizeof(env->fpregs));
e2f3ee
 
e2f3ee
     for (i = 0; i < CPU_NB_REGS; i++) {
e2f3ee
-        const uint8_t *xmm = xsave->legacy.xmm_regs[i];
e2f3ee
-        const uint8_t *ymmh = xsave->avx_state.ymmh[i];
e2f3ee
-        const uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
e2f3ee
+        const uint8_t *xmm = legacy->xmm_regs[i];
e2f3ee
+
e2f3ee
         env->xmm_regs[i].ZMM_Q(0) = ldq_p(xmm);
e2f3ee
-        env->xmm_regs[i].ZMM_Q(1) = ldq_p(xmm+8);
e2f3ee
-        env->xmm_regs[i].ZMM_Q(2) = ldq_p(ymmh);
e2f3ee
-        env->xmm_regs[i].ZMM_Q(3) = ldq_p(ymmh+8);
e2f3ee
-        env->xmm_regs[i].ZMM_Q(4) = ldq_p(zmmh);
e2f3ee
-        env->xmm_regs[i].ZMM_Q(5) = ldq_p(zmmh+8);
e2f3ee
-        env->xmm_regs[i].ZMM_Q(6) = ldq_p(zmmh+16);
e2f3ee
-        env->xmm_regs[i].ZMM_Q(7) = ldq_p(zmmh+24);
e2f3ee
+        env->xmm_regs[i].ZMM_Q(1) = ldq_p(xmm + 8);
e2f3ee
+    }
e2f3ee
+
e2f3ee
+    env->xstate_bv = header->xstate_bv;
e2f3ee
+
e2f3ee
+    e = &x86_ext_save_areas[XSTATE_YMM_BIT];
e2f3ee
+    if (e->size && e->offset) {
e2f3ee
+        const XSaveAVX *avx;
e2f3ee
+
e2f3ee
+        avx = buf + e->offset;
e2f3ee
+        for (i = 0; i < CPU_NB_REGS; i++) {
e2f3ee
+            const uint8_t *ymmh = avx->ymmh[i];
e2f3ee
+
e2f3ee
+            env->xmm_regs[i].ZMM_Q(2) = ldq_p(ymmh);
e2f3ee
+            env->xmm_regs[i].ZMM_Q(3) = ldq_p(ymmh + 8);
e2f3ee
+        }
e2f3ee
+    }
e2f3ee
+
e2f3ee
+    e = &x86_ext_save_areas[XSTATE_BNDREGS_BIT];
e2f3ee
+    if (e->size && e->offset) {
e2f3ee
+        const XSaveBNDREG *bndreg;
e2f3ee
+        const XSaveBNDCSR *bndcsr;
e2f3ee
+
e2f3ee
+        f = &x86_ext_save_areas[XSTATE_BNDCSR_BIT];
e2f3ee
+        assert(f->size);
e2f3ee
+        assert(f->offset);
e2f3ee
+
e2f3ee
+        bndreg = buf + e->offset;
e2f3ee
+        bndcsr = buf + f->offset;
e2f3ee
+
e2f3ee
+        memcpy(env->bnd_regs, &bndreg->bnd_regs,
e2f3ee
+               sizeof(env->bnd_regs));
e2f3ee
+        env->bndcs_regs = bndcsr->bndcsr;
e2f3ee
     }
e2f3ee
 
e2f3ee
+    e = &x86_ext_save_areas[XSTATE_OPMASK_BIT];
e2f3ee
+    if (e->size && e->offset) {
e2f3ee
+        const XSaveOpmask *opmask;
e2f3ee
+        const XSaveZMM_Hi256 *zmm_hi256;
e2f3ee
 #ifdef TARGET_X86_64
e2f3ee
-    memcpy(&env->xmm_regs[16], &xsave->hi16_zmm_state.hi16_zmm,
e2f3ee
-           16 * sizeof env->xmm_regs[16]);
e2f3ee
-    memcpy(&env->pkru, &xsave->pkru_state, sizeof env->pkru);
e2f3ee
+        const XSaveHi16_ZMM *hi16_zmm;
e2f3ee
+#endif
e2f3ee
+
e2f3ee
+        f = &x86_ext_save_areas[XSTATE_ZMM_Hi256_BIT];
e2f3ee
+        assert(f->size);
e2f3ee
+        assert(f->offset);
e2f3ee
+
e2f3ee
+        g = &x86_ext_save_areas[XSTATE_Hi16_ZMM_BIT];
e2f3ee
+        assert(g->size);
e2f3ee
+        assert(g->offset);
e2f3ee
+
e2f3ee
+        opmask = buf + e->offset;
e2f3ee
+        zmm_hi256 = buf + f->offset;
e2f3ee
+#ifdef TARGET_X86_64
e2f3ee
+        hi16_zmm = buf + g->offset;
e2f3ee
+#endif
e2f3ee
+
e2f3ee
+        memcpy(env->opmask_regs, &opmask->opmask_regs,
e2f3ee
+               sizeof(env->opmask_regs));
e2f3ee
+
e2f3ee
+        for (i = 0; i < CPU_NB_REGS; i++) {
e2f3ee
+            const uint8_t *zmmh = zmm_hi256->zmm_hi256[i];
e2f3ee
+
e2f3ee
+            env->xmm_regs[i].ZMM_Q(4) = ldq_p(zmmh);
e2f3ee
+            env->xmm_regs[i].ZMM_Q(5) = ldq_p(zmmh + 8);
e2f3ee
+            env->xmm_regs[i].ZMM_Q(6) = ldq_p(zmmh + 16);
e2f3ee
+            env->xmm_regs[i].ZMM_Q(7) = ldq_p(zmmh + 24);
e2f3ee
+        }
e2f3ee
+
e2f3ee
+#ifdef TARGET_X86_64
e2f3ee
+        memcpy(&env->xmm_regs[16], &hi16_zmm->hi16_zmm,
e2f3ee
+               16 * sizeof(env->xmm_regs[16]));
e2f3ee
+#endif
e2f3ee
+    }
e2f3ee
+
e2f3ee
+#ifdef TARGET_X86_64
e2f3ee
+    e = &x86_ext_save_areas[XSTATE_PKRU_BIT];
e2f3ee
+    if (e->size && e->offset) {
e2f3ee
+        const XSavePKRU *pkru;
e2f3ee
+
e2f3ee
+        pkru = buf + e->offset;
e2f3ee
+        memcpy(&env->pkru, pkru, sizeof(env->pkru));
e2f3ee
+    }
e2f3ee
 #endif
e2f3ee
 }
e2f3ee
-- 
e2f3ee
2.27.0
e2f3ee