Pablo Greco e6a3ae
From 0a1fd178d9b7c054d229b60540b7d12d87eb8070 Mon Sep 17 00:00:00 2001
Pablo Greco e6a3ae
From: Paolo Bonzini <pbonzini@redhat.com>
Pablo Greco e6a3ae
Date: Mon, 22 Jul 2019 18:22:15 +0100
Pablo Greco e6a3ae
Subject: [PATCH 34/39] target/i386: kvm: Add support for save and restore
Pablo Greco e6a3ae
 nested state
Pablo Greco e6a3ae
Pablo Greco e6a3ae
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
Pablo Greco e6a3ae
Message-id: <20190722182220.19374-14-pbonzini@redhat.com>
Pablo Greco e6a3ae
Patchwork-id: 89629
Pablo Greco e6a3ae
O-Subject: [RHEL-8.1.0 PATCH qemu-kvm v3 13/18] target/i386: kvm: Add support for save and restore nested state
Pablo Greco e6a3ae
Bugzilla: 1689269
Pablo Greco e6a3ae
RH-Acked-by: Peter Xu <zhexu@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
From: Liran Alon <liran.alon@oracle.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Kernel commit 8fcc4b5923af ("kvm: nVMX: Introduce KVM_CAP_NESTED_STATE")
Pablo Greco e6a3ae
introduced new IOCTLs to extract and restore vCPU state related to
Pablo Greco e6a3ae
Intel VMX & AMD SVM.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Utilize these IOCTLs to add support for migration of VMs which are
Pablo Greco e6a3ae
running nested hypervisors.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
Pablo Greco e6a3ae
Reviewed-by: Maran Wilson <maran.wilson@oracle.com>
Pablo Greco e6a3ae
Tested-by: Maran Wilson <maran.wilson@oracle.com>
Pablo Greco e6a3ae
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Pablo Greco e6a3ae
Message-Id: <20190619162140.133674-9-liran.alon@oracle.com>
Pablo Greco e6a3ae
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Pablo Greco e6a3ae
(cherry picked from commit ebbfef2f34cfc749c045a4569dedb4f748ec024a)
Pablo Greco e6a3ae
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
Pablo Greco e6a3ae
---
Pablo Greco e6a3ae
 accel/kvm/kvm-all.c   |   8 ++
Pablo Greco e6a3ae
 include/sysemu/kvm.h  |   1 +
Pablo Greco e6a3ae
 target/i386/cpu.h     |   3 +
Pablo Greco e6a3ae
 target/i386/kvm.c     |  80 ++++++++++++++++++++
Pablo Greco e6a3ae
 target/i386/machine.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++++++
Pablo Greco e6a3ae
 5 files changed, 290 insertions(+)
Pablo Greco e6a3ae
Pablo Greco e6a3ae
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
Pablo Greco e6a3ae
index a939b26..2130fcb 100644
Pablo Greco e6a3ae
--- a/accel/kvm/kvm-all.c
Pablo Greco e6a3ae
+++ b/accel/kvm/kvm-all.c
Pablo Greco e6a3ae
@@ -87,6 +87,7 @@ struct KVMState
Pablo Greco e6a3ae
 #ifdef KVM_CAP_SET_GUEST_DEBUG
Pablo Greco e6a3ae
     struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
+    int max_nested_state_len;
Pablo Greco e6a3ae
     int many_ioeventfds;
Pablo Greco e6a3ae
     int intx_set_mask;
Pablo Greco e6a3ae
     bool sync_mmu;
Pablo Greco e6a3ae
@@ -1646,6 +1647,8 @@ static int kvm_init(MachineState *ms)
Pablo Greco e6a3ae
     s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+    s->max_nested_state_len = kvm_check_extension(s, KVM_CAP_NESTED_STATE);
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
 #ifdef KVM_CAP_IRQ_ROUTING
Pablo Greco e6a3ae
     kvm_direct_msi_allowed = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
@@ -2207,6 +2210,11 @@ int kvm_has_debugregs(void)
Pablo Greco e6a3ae
     return kvm_state->debugregs;
Pablo Greco e6a3ae
 }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+int kvm_max_nested_state_length(void)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    return kvm_state->max_nested_state_len;
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
 int kvm_has_many_ioeventfds(void)
Pablo Greco e6a3ae
 {
Pablo Greco e6a3ae
     if (!kvm_enabled()) {
Pablo Greco e6a3ae
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
Pablo Greco e6a3ae
index a5a6dff..3cf04cf 100644
Pablo Greco e6a3ae
--- a/include/sysemu/kvm.h
Pablo Greco e6a3ae
+++ b/include/sysemu/kvm.h
Pablo Greco e6a3ae
@@ -211,6 +211,7 @@ bool kvm_has_sync_mmu(void);
Pablo Greco e6a3ae
 int kvm_has_vcpu_events(void);
Pablo Greco e6a3ae
 int kvm_has_robust_singlestep(void);
Pablo Greco e6a3ae
 int kvm_has_debugregs(void);
Pablo Greco e6a3ae
+int kvm_max_nested_state_length(void);
Pablo Greco e6a3ae
 int kvm_has_pit_state2(void);
Pablo Greco e6a3ae
 int kvm_has_many_ioeventfds(void);
Pablo Greco e6a3ae
 int kvm_has_gsi_routing(void);
Pablo Greco e6a3ae
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
Pablo Greco e6a3ae
index f595fc3..86f3d98 100644
Pablo Greco e6a3ae
--- a/target/i386/cpu.h
Pablo Greco e6a3ae
+++ b/target/i386/cpu.h
Pablo Greco e6a3ae
@@ -1335,6 +1335,9 @@ typedef struct CPUX86State {
Pablo Greco e6a3ae
     int64_t tsc_khz;
Pablo Greco e6a3ae
     int64_t user_tsc_khz; /* for sanity check only */
Pablo Greco e6a3ae
     void *kvm_xsave_buf;
Pablo Greco e6a3ae
+#if defined(CONFIG_KVM)
Pablo Greco e6a3ae
+    struct kvm_nested_state *nested_state;
Pablo Greco e6a3ae
+#endif
Pablo Greco e6a3ae
 #if defined(CONFIG_HVF)
Pablo Greco e6a3ae
     HVFX86EmulatorState *hvf_emul;
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
Pablo Greco e6a3ae
index 8a6da90..ddceb7d 100644
Pablo Greco e6a3ae
--- a/target/i386/kvm.c
Pablo Greco e6a3ae
+++ b/target/i386/kvm.c
Pablo Greco e6a3ae
@@ -789,6 +789,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
Pablo Greco e6a3ae
     struct kvm_cpuid_entry2 *c;
Pablo Greco e6a3ae
     uint32_t signature[3];
Pablo Greco e6a3ae
     int kvm_base = KVM_CPUID_SIGNATURE;
Pablo Greco e6a3ae
+    int max_nested_state_len;
Pablo Greco e6a3ae
     int r;
Pablo Greco e6a3ae
     Error *local_err = NULL;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
@@ -1180,6 +1181,24 @@ int kvm_arch_init_vcpu(CPUState *cs)
Pablo Greco e6a3ae
     if (has_xsave) {
Pablo Greco e6a3ae
         env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    max_nested_state_len = kvm_max_nested_state_length();
Pablo Greco e6a3ae
+    if (max_nested_state_len > 0) {
Pablo Greco e6a3ae
+        assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data));
Pablo Greco e6a3ae
+        env->nested_state = g_malloc0(max_nested_state_len);
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+        env->nested_state->size = max_nested_state_len;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+        if (IS_INTEL_CPU(env)) {
Pablo Greco e6a3ae
+            struct kvm_vmx_nested_state_hdr *vmx_hdr =
Pablo Greco e6a3ae
+                &env->nested_state->hdr.vmx;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+            env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
Pablo Greco e6a3ae
+            vmx_hdr->vmxon_pa = -1ull;
Pablo Greco e6a3ae
+            vmx_hdr->vmcs12_pa = -1ull;
Pablo Greco e6a3ae
+        }
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
     cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE);
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) {
Pablo Greco e6a3ae
@@ -1199,12 +1218,18 @@ int kvm_arch_init_vcpu(CPUState *cs)
Pablo Greco e6a3ae
 int kvm_arch_destroy_vcpu(CPUState *cs)
Pablo Greco e6a3ae
 {
Pablo Greco e6a3ae
     X86CPU *cpu = X86_CPU(cs);
Pablo Greco e6a3ae
+    CPUX86State *env = &cpu->env;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     if (cpu->kvm_msr_buf) {
Pablo Greco e6a3ae
         g_free(cpu->kvm_msr_buf);
Pablo Greco e6a3ae
         cpu->kvm_msr_buf = NULL;
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+    if (env->nested_state) {
Pablo Greco e6a3ae
+        g_free(env->nested_state);
Pablo Greco e6a3ae
+        env->nested_state = NULL;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
     return 0;
Pablo Greco e6a3ae
 }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
@@ -2875,6 +2900,52 @@ static int kvm_get_debugregs(X86CPU *cpu)
Pablo Greco e6a3ae
     return 0;
Pablo Greco e6a3ae
 }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+static int kvm_put_nested_state(X86CPU *cpu)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    CPUX86State *env = &cpu->env;
Pablo Greco e6a3ae
+    int max_nested_state_len = kvm_max_nested_state_length();
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    if (max_nested_state_len <= 0) {
Pablo Greco e6a3ae
+        return 0;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    assert(env->nested_state->size <= max_nested_state_len);
Pablo Greco e6a3ae
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, env->nested_state);
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static int kvm_get_nested_state(X86CPU *cpu)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    CPUX86State *env = &cpu->env;
Pablo Greco e6a3ae
+    int max_nested_state_len = kvm_max_nested_state_length();
Pablo Greco e6a3ae
+    int ret;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    if (max_nested_state_len <= 0) {
Pablo Greco e6a3ae
+        return 0;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    /*
Pablo Greco e6a3ae
+     * It is possible that migration restored a smaller size into
Pablo Greco e6a3ae
+     * nested_state->hdr.size than what our kernel support.
Pablo Greco e6a3ae
+     * We preserve migration origin nested_state->hdr.size for
Pablo Greco e6a3ae
+     * call to KVM_SET_NESTED_STATE but wish that our next call
Pablo Greco e6a3ae
+     * to KVM_GET_NESTED_STATE will use max size our kernel support.
Pablo Greco e6a3ae
+     */
Pablo Greco e6a3ae
+    env->nested_state->size = max_nested_state_len;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_NESTED_STATE, env->nested_state);
Pablo Greco e6a3ae
+    if (ret < 0) {
Pablo Greco e6a3ae
+        return ret;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    if (env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE) {
Pablo Greco e6a3ae
+        env->hflags |= HF_GUEST_MASK;
Pablo Greco e6a3ae
+    } else {
Pablo Greco e6a3ae
+        env->hflags &= ~HF_GUEST_MASK;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    return ret;
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
 int kvm_arch_put_registers(CPUState *cpu, int level)
Pablo Greco e6a3ae
 {
Pablo Greco e6a3ae
     X86CPU *x86_cpu = X86_CPU(cpu);
Pablo Greco e6a3ae
@@ -2882,6 +2953,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+    ret = kvm_put_nested_state(x86_cpu);
Pablo Greco e6a3ae
+    if (ret < 0) {
Pablo Greco e6a3ae
+        return ret;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
     if (level >= KVM_PUT_RESET_STATE) {
Pablo Greco e6a3ae
         ret = kvm_put_msr_feature_control(x86_cpu);
Pablo Greco e6a3ae
         if (ret < 0) {
Pablo Greco e6a3ae
@@ -2997,6 +3073,10 @@ int kvm_arch_get_registers(CPUState *cs)
Pablo Greco e6a3ae
     if (ret < 0) {
Pablo Greco e6a3ae
         goto out;
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
+    ret = kvm_get_nested_state(cpu);
Pablo Greco e6a3ae
+    if (ret < 0) {
Pablo Greco e6a3ae
+        goto out;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
     ret = 0;
Pablo Greco e6a3ae
  out:
Pablo Greco e6a3ae
     cpu_sync_bndcs_hflags(&cpu->env);
Pablo Greco e6a3ae
diff --git a/target/i386/machine.c b/target/i386/machine.c
Pablo Greco e6a3ae
index 561d4a5..a2ddbba 100644
Pablo Greco e6a3ae
--- a/target/i386/machine.c
Pablo Greco e6a3ae
+++ b/target/i386/machine.c
Pablo Greco e6a3ae
@@ -230,6 +230,15 @@ static int cpu_pre_save(void *opaque)
Pablo Greco e6a3ae
         env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+#ifdef CONFIG_KVM
Pablo Greco e6a3ae
+    /* Verify we have nested virtualization state from kernel if required */
Pablo Greco e6a3ae
+    if (kvm_enabled() && cpu_has_vmx(env) && !env->nested_state) {
Pablo Greco e6a3ae
+        error_report("Guest enabled nested virtualization but kernel "
Pablo Greco e6a3ae
+                "does not support saving of nested state");
Pablo Greco e6a3ae
+        return -EINVAL;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+#endif
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
     return 0;
Pablo Greco e6a3ae
 }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
@@ -277,6 +286,16 @@ static int cpu_post_load(void *opaque, int version_id)
Pablo Greco e6a3ae
     env->hflags &= ~HF_CPL_MASK;
Pablo Greco e6a3ae
     env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+#ifdef CONFIG_KVM
Pablo Greco e6a3ae
+    if ((env->hflags & HF_GUEST_MASK) &&
Pablo Greco e6a3ae
+        (!env->nested_state ||
Pablo Greco e6a3ae
+        !(env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE))) {
Pablo Greco e6a3ae
+        error_report("vCPU set in guest-mode inconsistent with "
Pablo Greco e6a3ae
+                     "migrated kernel nested state");
Pablo Greco e6a3ae
+        return -EINVAL;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+#endif
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
     env->fpstt = (env->fpus_vmstate >> 11) & 7;
Pablo Greco e6a3ae
     env->fpus = env->fpus_vmstate & ~0x3800;
Pablo Greco e6a3ae
     env->fptag_vmstate ^= 0xff;
Pablo Greco e6a3ae
@@ -819,6 +838,182 @@ static const VMStateDescription vmstate_tsc_khz = {
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 };
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+#ifdef CONFIG_KVM
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static bool vmx_vmcs12_needed(void *opaque)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    struct kvm_nested_state *nested_state = opaque;
Pablo Greco e6a3ae
+    return (nested_state->size >
Pablo Greco e6a3ae
+            offsetof(struct kvm_nested_state, data.vmx[0].vmcs12));
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static const VMStateDescription vmstate_vmx_vmcs12 = {
Pablo Greco e6a3ae
+    .name = "cpu/kvm_nested_state/vmx/vmcs12",
Pablo Greco e6a3ae
+    .version_id = 1,
Pablo Greco e6a3ae
+    .minimum_version_id = 1,
Pablo Greco e6a3ae
+    .needed = vmx_vmcs12_needed,
Pablo Greco e6a3ae
+    .fields = (VMStateField[]) {
Pablo Greco e6a3ae
+        VMSTATE_UINT8_ARRAY(data.vmx[0].vmcs12,
Pablo Greco e6a3ae
+                            struct kvm_nested_state,
Pablo Greco e6a3ae
+                            KVM_STATE_NESTED_VMX_VMCS_SIZE),
Pablo Greco e6a3ae
+        VMSTATE_END_OF_LIST()
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+};
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static bool vmx_shadow_vmcs12_needed(void *opaque)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    struct kvm_nested_state *nested_state = opaque;
Pablo Greco e6a3ae
+    return (nested_state->size >
Pablo Greco e6a3ae
+            offsetof(struct kvm_nested_state, data.vmx[0].shadow_vmcs12));
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static const VMStateDescription vmstate_vmx_shadow_vmcs12 = {
Pablo Greco e6a3ae
+    .name = "cpu/kvm_nested_state/vmx/shadow_vmcs12",
Pablo Greco e6a3ae
+    .version_id = 1,
Pablo Greco e6a3ae
+    .minimum_version_id = 1,
Pablo Greco e6a3ae
+    .needed = vmx_shadow_vmcs12_needed,
Pablo Greco e6a3ae
+    .fields = (VMStateField[]) {
Pablo Greco e6a3ae
+        VMSTATE_UINT8_ARRAY(data.vmx[0].shadow_vmcs12,
Pablo Greco e6a3ae
+                            struct kvm_nested_state,
Pablo Greco e6a3ae
+                            KVM_STATE_NESTED_VMX_VMCS_SIZE),
Pablo Greco e6a3ae
+        VMSTATE_END_OF_LIST()
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+};
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static bool vmx_nested_state_needed(void *opaque)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    struct kvm_nested_state *nested_state = opaque;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    return ((nested_state->format == KVM_STATE_NESTED_FORMAT_VMX) &&
Pablo Greco e6a3ae
+            ((nested_state->hdr.vmx.vmxon_pa != -1ull) ||
Pablo Greco e6a3ae
+             (nested_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)));
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static const VMStateDescription vmstate_vmx_nested_state = {
Pablo Greco e6a3ae
+    .name = "cpu/kvm_nested_state/vmx",
Pablo Greco e6a3ae
+    .version_id = 1,
Pablo Greco e6a3ae
+    .minimum_version_id = 1,
Pablo Greco e6a3ae
+    .needed = vmx_nested_state_needed,
Pablo Greco e6a3ae
+    .fields = (VMStateField[]) {
Pablo Greco e6a3ae
+        VMSTATE_U64(hdr.vmx.vmxon_pa, struct kvm_nested_state),
Pablo Greco e6a3ae
+        VMSTATE_U64(hdr.vmx.vmcs12_pa, struct kvm_nested_state),
Pablo Greco e6a3ae
+        VMSTATE_U16(hdr.vmx.smm.flags, struct kvm_nested_state),
Pablo Greco e6a3ae
+        VMSTATE_END_OF_LIST()
Pablo Greco e6a3ae
+    },
Pablo Greco e6a3ae
+    .subsections = (const VMStateDescription*[]) {
Pablo Greco e6a3ae
+        &vmstate_vmx_vmcs12,
Pablo Greco e6a3ae
+        &vmstate_vmx_shadow_vmcs12,
Pablo Greco e6a3ae
+        NULL,
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+};
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static bool svm_nested_state_needed(void *opaque)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    struct kvm_nested_state *nested_state = opaque;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    return (nested_state->format == KVM_STATE_NESTED_FORMAT_SVM);
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static const VMStateDescription vmstate_svm_nested_state = {
Pablo Greco e6a3ae
+    .name = "cpu/kvm_nested_state/svm",
Pablo Greco e6a3ae
+    .version_id = 1,
Pablo Greco e6a3ae
+    .minimum_version_id = 1,
Pablo Greco e6a3ae
+    .needed = svm_nested_state_needed,
Pablo Greco e6a3ae
+    .fields = (VMStateField[]) {
Pablo Greco e6a3ae
+        VMSTATE_END_OF_LIST()
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+};
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static bool nested_state_needed(void *opaque)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    X86CPU *cpu = opaque;
Pablo Greco e6a3ae
+    CPUX86State *env = &cpu->env;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    return (env->nested_state &&
Pablo Greco e6a3ae
+            (vmx_nested_state_needed(env->nested_state) ||
Pablo Greco e6a3ae
+             svm_nested_state_needed(env->nested_state)));
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static int nested_state_post_load(void *opaque, int version_id)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    X86CPU *cpu = opaque;
Pablo Greco e6a3ae
+    CPUX86State *env = &cpu->env;
Pablo Greco e6a3ae
+    struct kvm_nested_state *nested_state = env->nested_state;
Pablo Greco e6a3ae
+    int min_nested_state_len = offsetof(struct kvm_nested_state, data);
Pablo Greco e6a3ae
+    int max_nested_state_len = kvm_max_nested_state_length();
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    /*
Pablo Greco e6a3ae
+     * If our kernel don't support setting nested state
Pablo Greco e6a3ae
+     * and we have received nested state from migration stream,
Pablo Greco e6a3ae
+     * we need to fail migration
Pablo Greco e6a3ae
+     */
Pablo Greco e6a3ae
+    if (max_nested_state_len <= 0) {
Pablo Greco e6a3ae
+        error_report("Received nested state when kernel cannot restore it");
Pablo Greco e6a3ae
+        return -EINVAL;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    /*
Pablo Greco e6a3ae
+     * Verify that the size of received nested_state struct
Pablo Greco e6a3ae
+     * at least cover required header and is not larger
Pablo Greco e6a3ae
+     * than the max size that our kernel support
Pablo Greco e6a3ae
+     */
Pablo Greco e6a3ae
+    if (nested_state->size < min_nested_state_len) {
Pablo Greco e6a3ae
+        error_report("Received nested state size less than min: "
Pablo Greco e6a3ae
+                     "len=%d, min=%d",
Pablo Greco e6a3ae
+                     nested_state->size, min_nested_state_len);
Pablo Greco e6a3ae
+        return -EINVAL;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+    if (nested_state->size > max_nested_state_len) {
Pablo Greco e6a3ae
+        error_report("Recieved unsupported nested state size: "
Pablo Greco e6a3ae
+                     "nested_state->size=%d, max=%d",
Pablo Greco e6a3ae
+                     nested_state->size, max_nested_state_len);
Pablo Greco e6a3ae
+        return -EINVAL;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    /* Verify format is valid */
Pablo Greco e6a3ae
+    if ((nested_state->format != KVM_STATE_NESTED_FORMAT_VMX) &&
Pablo Greco e6a3ae
+        (nested_state->format != KVM_STATE_NESTED_FORMAT_SVM)) {
Pablo Greco e6a3ae
+        error_report("Received invalid nested state format: %d",
Pablo Greco e6a3ae
+                     nested_state->format);
Pablo Greco e6a3ae
+        return -EINVAL;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    return 0;
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static const VMStateDescription vmstate_kvm_nested_state = {
Pablo Greco e6a3ae
+    .name = "cpu/kvm_nested_state",
Pablo Greco e6a3ae
+    .version_id = 1,
Pablo Greco e6a3ae
+    .minimum_version_id = 1,
Pablo Greco e6a3ae
+    .fields = (VMStateField[]) {
Pablo Greco e6a3ae
+        VMSTATE_U16(flags, struct kvm_nested_state),
Pablo Greco e6a3ae
+        VMSTATE_U16(format, struct kvm_nested_state),
Pablo Greco e6a3ae
+        VMSTATE_U32(size, struct kvm_nested_state),
Pablo Greco e6a3ae
+        VMSTATE_END_OF_LIST()
Pablo Greco e6a3ae
+    },
Pablo Greco e6a3ae
+    .subsections = (const VMStateDescription*[]) {
Pablo Greco e6a3ae
+        &vmstate_vmx_nested_state,
Pablo Greco e6a3ae
+        &vmstate_svm_nested_state,
Pablo Greco e6a3ae
+        NULL
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+};
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static const VMStateDescription vmstate_nested_state = {
Pablo Greco e6a3ae
+    .name = "cpu/nested_state",
Pablo Greco e6a3ae
+    .version_id = 1,
Pablo Greco e6a3ae
+    .minimum_version_id = 1,
Pablo Greco e6a3ae
+    .needed = nested_state_needed,
Pablo Greco e6a3ae
+    .post_load = nested_state_post_load,
Pablo Greco e6a3ae
+    .fields = (VMStateField[]) {
Pablo Greco e6a3ae
+        VMSTATE_STRUCT_POINTER(env.nested_state, X86CPU,
Pablo Greco e6a3ae
+                vmstate_kvm_nested_state,
Pablo Greco e6a3ae
+                struct kvm_nested_state),
Pablo Greco e6a3ae
+        VMSTATE_END_OF_LIST()
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+};
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+#endif
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
 static bool mcg_ext_ctl_needed(void *opaque)
Pablo Greco e6a3ae
 {
Pablo Greco e6a3ae
     X86CPU *cpu = opaque;
Pablo Greco e6a3ae
@@ -1080,6 +1275,9 @@ VMStateDescription vmstate_x86_cpu = {
Pablo Greco e6a3ae
 #ifndef TARGET_X86_64
Pablo Greco e6a3ae
         &vmstate_efer32,
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
+#ifdef CONFIG_KVM
Pablo Greco e6a3ae
+        &vmstate_nested_state,
Pablo Greco e6a3ae
+#endif
Pablo Greco e6a3ae
         NULL
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 };
Pablo Greco e6a3ae
-- 
Pablo Greco e6a3ae
1.8.3.1
Pablo Greco e6a3ae