yeahuh / rpms / qemu-kvm

Forked from rpms/qemu-kvm 2 years ago
Clone

Blame SOURCES/kvm-target-i386-kvm-Add-support-for-save-and-restore-nes.patch

4ec855
From 0a1fd178d9b7c054d229b60540b7d12d87eb8070 Mon Sep 17 00:00:00 2001
4ec855
From: Paolo Bonzini <pbonzini@redhat.com>
4ec855
Date: Mon, 22 Jul 2019 18:22:15 +0100
4ec855
Subject: [PATCH 34/39] target/i386: kvm: Add support for save and restore
4ec855
 nested state
4ec855
4ec855
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
4ec855
Message-id: <20190722182220.19374-14-pbonzini@redhat.com>
4ec855
Patchwork-id: 89629
4ec855
O-Subject: [RHEL-8.1.0 PATCH qemu-kvm v3 13/18] target/i386: kvm: Add support for save and restore nested state
4ec855
Bugzilla: 1689269
4ec855
RH-Acked-by: Peter Xu <zhexu@redhat.com>
4ec855
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
4ec855
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
4ec855
4ec855
From: Liran Alon <liran.alon@oracle.com>
4ec855
4ec855
Kernel commit 8fcc4b5923af ("kvm: nVMX: Introduce KVM_CAP_NESTED_STATE")
4ec855
introduced new IOCTLs to extract and restore vCPU state related to
4ec855
Intel VMX & AMD SVM.
4ec855
4ec855
Utilize these IOCTLs to add support for migration of VMs which are
4ec855
running nested hypervisors.
4ec855
4ec855
Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
4ec855
Reviewed-by: Maran Wilson <maran.wilson@oracle.com>
4ec855
Tested-by: Maran Wilson <maran.wilson@oracle.com>
4ec855
Signed-off-by: Liran Alon <liran.alon@oracle.com>
4ec855
Message-Id: <20190619162140.133674-9-liran.alon@oracle.com>
4ec855
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
4ec855
(cherry picked from commit ebbfef2f34cfc749c045a4569dedb4f748ec024a)
4ec855
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
4ec855
---
4ec855
 accel/kvm/kvm-all.c   |   8 ++
4ec855
 include/sysemu/kvm.h  |   1 +
4ec855
 target/i386/cpu.h     |   3 +
4ec855
 target/i386/kvm.c     |  80 ++++++++++++++++++++
4ec855
 target/i386/machine.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++++++
4ec855
 5 files changed, 290 insertions(+)
4ec855
4ec855
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
4ec855
index a939b26..2130fcb 100644
4ec855
--- a/accel/kvm/kvm-all.c
4ec855
+++ b/accel/kvm/kvm-all.c
4ec855
@@ -87,6 +87,7 @@ struct KVMState
4ec855
 #ifdef KVM_CAP_SET_GUEST_DEBUG
4ec855
     struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
4ec855
 #endif
4ec855
+    int max_nested_state_len;
4ec855
     int many_ioeventfds;
4ec855
     int intx_set_mask;
4ec855
     bool sync_mmu;
4ec855
@@ -1646,6 +1647,8 @@ static int kvm_init(MachineState *ms)
4ec855
     s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
4ec855
 #endif
4ec855
 
4ec855
+    s->max_nested_state_len = kvm_check_extension(s, KVM_CAP_NESTED_STATE);
4ec855
+
4ec855
 #ifdef KVM_CAP_IRQ_ROUTING
4ec855
     kvm_direct_msi_allowed = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
4ec855
 #endif
4ec855
@@ -2207,6 +2210,11 @@ int kvm_has_debugregs(void)
4ec855
     return kvm_state->debugregs;
4ec855
 }
4ec855
 
4ec855
+int kvm_max_nested_state_length(void)
4ec855
+{
4ec855
+    return kvm_state->max_nested_state_len;
4ec855
+}
4ec855
+
4ec855
 int kvm_has_many_ioeventfds(void)
4ec855
 {
4ec855
     if (!kvm_enabled()) {
4ec855
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
4ec855
index a5a6dff..3cf04cf 100644
4ec855
--- a/include/sysemu/kvm.h
4ec855
+++ b/include/sysemu/kvm.h
4ec855
@@ -211,6 +211,7 @@ bool kvm_has_sync_mmu(void);
4ec855
 int kvm_has_vcpu_events(void);
4ec855
 int kvm_has_robust_singlestep(void);
4ec855
 int kvm_has_debugregs(void);
4ec855
+int kvm_max_nested_state_length(void);
4ec855
 int kvm_has_pit_state2(void);
4ec855
 int kvm_has_many_ioeventfds(void);
4ec855
 int kvm_has_gsi_routing(void);
4ec855
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
4ec855
index f595fc3..86f3d98 100644
4ec855
--- a/target/i386/cpu.h
4ec855
+++ b/target/i386/cpu.h
4ec855
@@ -1335,6 +1335,9 @@ typedef struct CPUX86State {
4ec855
     int64_t tsc_khz;
4ec855
     int64_t user_tsc_khz; /* for sanity check only */
4ec855
     void *kvm_xsave_buf;
4ec855
+#if defined(CONFIG_KVM)
4ec855
+    struct kvm_nested_state *nested_state;
4ec855
+#endif
4ec855
 #if defined(CONFIG_HVF)
4ec855
     HVFX86EmulatorState *hvf_emul;
4ec855
 #endif
4ec855
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
4ec855
index 8a6da90..ddceb7d 100644
4ec855
--- a/target/i386/kvm.c
4ec855
+++ b/target/i386/kvm.c
4ec855
@@ -789,6 +789,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
4ec855
     struct kvm_cpuid_entry2 *c;
4ec855
     uint32_t signature[3];
4ec855
     int kvm_base = KVM_CPUID_SIGNATURE;
4ec855
+    int max_nested_state_len;
4ec855
     int r;
4ec855
     Error *local_err = NULL;
4ec855
 
4ec855
@@ -1180,6 +1181,24 @@ int kvm_arch_init_vcpu(CPUState *cs)
4ec855
     if (has_xsave) {
4ec855
         env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
4ec855
     }
4ec855
+
4ec855
+    max_nested_state_len = kvm_max_nested_state_length();
4ec855
+    if (max_nested_state_len > 0) {
4ec855
+        assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data));
4ec855
+        env->nested_state = g_malloc0(max_nested_state_len);
4ec855
+
4ec855
+        env->nested_state->size = max_nested_state_len;
4ec855
+
4ec855
+        if (IS_INTEL_CPU(env)) {
4ec855
+            struct kvm_vmx_nested_state_hdr *vmx_hdr =
4ec855
+                &env->nested_state->hdr.vmx;
4ec855
+
4ec855
+            env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
4ec855
+            vmx_hdr->vmxon_pa = -1ull;
4ec855
+            vmx_hdr->vmcs12_pa = -1ull;
4ec855
+        }
4ec855
+    }
4ec855
+
4ec855
     cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE);
4ec855
 
4ec855
     if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) {
4ec855
@@ -1199,12 +1218,18 @@ int kvm_arch_init_vcpu(CPUState *cs)
4ec855
 int kvm_arch_destroy_vcpu(CPUState *cs)
4ec855
 {
4ec855
     X86CPU *cpu = X86_CPU(cs);
4ec855
+    CPUX86State *env = &cpu->env;
4ec855
 
4ec855
     if (cpu->kvm_msr_buf) {
4ec855
         g_free(cpu->kvm_msr_buf);
4ec855
         cpu->kvm_msr_buf = NULL;
4ec855
     }
4ec855
 
4ec855
+    if (env->nested_state) {
4ec855
+        g_free(env->nested_state);
4ec855
+        env->nested_state = NULL;
4ec855
+    }
4ec855
+
4ec855
     return 0;
4ec855
 }
4ec855
 
4ec855
@@ -2875,6 +2900,52 @@ static int kvm_get_debugregs(X86CPU *cpu)
4ec855
     return 0;
4ec855
 }
4ec855
 
4ec855
+static int kvm_put_nested_state(X86CPU *cpu)
4ec855
+{
4ec855
+    CPUX86State *env = &cpu->env;
4ec855
+    int max_nested_state_len = kvm_max_nested_state_length();
4ec855
+
4ec855
+    if (max_nested_state_len <= 0) {
4ec855
+        return 0;
4ec855
+    }
4ec855
+
4ec855
+    assert(env->nested_state->size <= max_nested_state_len);
4ec855
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, env->nested_state);
4ec855
+}
4ec855
+
4ec855
+static int kvm_get_nested_state(X86CPU *cpu)
4ec855
+{
4ec855
+    CPUX86State *env = &cpu->env;
4ec855
+    int max_nested_state_len = kvm_max_nested_state_length();
4ec855
+    int ret;
4ec855
+
4ec855
+    if (max_nested_state_len <= 0) {
4ec855
+        return 0;
4ec855
+    }
4ec855
+
4ec855
+    /*
4ec855
+     * It is possible that migration restored a smaller size into
4ec855
+     * nested_state->hdr.size than what our kernel support.
4ec855
+     * We preserve migration origin nested_state->hdr.size for
4ec855
+     * call to KVM_SET_NESTED_STATE but wish that our next call
4ec855
+     * to KVM_GET_NESTED_STATE will use max size our kernel support.
4ec855
+     */
4ec855
+    env->nested_state->size = max_nested_state_len;
4ec855
+
4ec855
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_NESTED_STATE, env->nested_state);
4ec855
+    if (ret < 0) {
4ec855
+        return ret;
4ec855
+    }
4ec855
+
4ec855
+    if (env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE) {
4ec855
+        env->hflags |= HF_GUEST_MASK;
4ec855
+    } else {
4ec855
+        env->hflags &= ~HF_GUEST_MASK;
4ec855
+    }
4ec855
+
4ec855
+    return ret;
4ec855
+}
4ec855
+
4ec855
 int kvm_arch_put_registers(CPUState *cpu, int level)
4ec855
 {
4ec855
     X86CPU *x86_cpu = X86_CPU(cpu);
4ec855
@@ -2882,6 +2953,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
4ec855
 
4ec855
     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
4ec855
 
4ec855
+    ret = kvm_put_nested_state(x86_cpu);
4ec855
+    if (ret < 0) {
4ec855
+        return ret;
4ec855
+    }
4ec855
+
4ec855
     if (level >= KVM_PUT_RESET_STATE) {
4ec855
         ret = kvm_put_msr_feature_control(x86_cpu);
4ec855
         if (ret < 0) {
4ec855
@@ -2997,6 +3073,10 @@ int kvm_arch_get_registers(CPUState *cs)
4ec855
     if (ret < 0) {
4ec855
         goto out;
4ec855
     }
4ec855
+    ret = kvm_get_nested_state(cpu);
4ec855
+    if (ret < 0) {
4ec855
+        goto out;
4ec855
+    }
4ec855
     ret = 0;
4ec855
  out:
4ec855
     cpu_sync_bndcs_hflags(&cpu->env);
4ec855
diff --git a/target/i386/machine.c b/target/i386/machine.c
4ec855
index 561d4a5..a2ddbba 100644
4ec855
--- a/target/i386/machine.c
4ec855
+++ b/target/i386/machine.c
4ec855
@@ -230,6 +230,15 @@ static int cpu_pre_save(void *opaque)
4ec855
         env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
4ec855
     }
4ec855
 
4ec855
+#ifdef CONFIG_KVM
4ec855
+    /* Verify we have nested virtualization state from kernel if required */
4ec855
+    if (kvm_enabled() && cpu_has_vmx(env) && !env->nested_state) {
4ec855
+        error_report("Guest enabled nested virtualization but kernel "
4ec855
+                "does not support saving of nested state");
4ec855
+        return -EINVAL;
4ec855
+    }
4ec855
+#endif
4ec855
+
4ec855
     return 0;
4ec855
 }
4ec855
 
4ec855
@@ -277,6 +286,16 @@ static int cpu_post_load(void *opaque, int version_id)
4ec855
     env->hflags &= ~HF_CPL_MASK;
4ec855
     env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
4ec855
 
4ec855
+#ifdef CONFIG_KVM
4ec855
+    if ((env->hflags & HF_GUEST_MASK) &&
4ec855
+        (!env->nested_state ||
4ec855
+        !(env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE))) {
4ec855
+        error_report("vCPU set in guest-mode inconsistent with "
4ec855
+                     "migrated kernel nested state");
4ec855
+        return -EINVAL;
4ec855
+    }
4ec855
+#endif
4ec855
+
4ec855
     env->fpstt = (env->fpus_vmstate >> 11) & 7;
4ec855
     env->fpus = env->fpus_vmstate & ~0x3800;
4ec855
     env->fptag_vmstate ^= 0xff;
4ec855
@@ -819,6 +838,182 @@ static const VMStateDescription vmstate_tsc_khz = {
4ec855
     }
4ec855
 };
4ec855
 
4ec855
+#ifdef CONFIG_KVM
4ec855
+
4ec855
+static bool vmx_vmcs12_needed(void *opaque)
4ec855
+{
4ec855
+    struct kvm_nested_state *nested_state = opaque;
4ec855
+    return (nested_state->size >
4ec855
+            offsetof(struct kvm_nested_state, data.vmx[0].vmcs12));
4ec855
+}
4ec855
+
4ec855
+static const VMStateDescription vmstate_vmx_vmcs12 = {
4ec855
+    .name = "cpu/kvm_nested_state/vmx/vmcs12",
4ec855
+    .version_id = 1,
4ec855
+    .minimum_version_id = 1,
4ec855
+    .needed = vmx_vmcs12_needed,
4ec855
+    .fields = (VMStateField[]) {
4ec855
+        VMSTATE_UINT8_ARRAY(data.vmx[0].vmcs12,
4ec855
+                            struct kvm_nested_state,
4ec855
+                            KVM_STATE_NESTED_VMX_VMCS_SIZE),
4ec855
+        VMSTATE_END_OF_LIST()
4ec855
+    }
4ec855
+};
4ec855
+
4ec855
+static bool vmx_shadow_vmcs12_needed(void *opaque)
4ec855
+{
4ec855
+    struct kvm_nested_state *nested_state = opaque;
4ec855
+    return (nested_state->size >
4ec855
+            offsetof(struct kvm_nested_state, data.vmx[0].shadow_vmcs12));
4ec855
+}
4ec855
+
4ec855
+static const VMStateDescription vmstate_vmx_shadow_vmcs12 = {
4ec855
+    .name = "cpu/kvm_nested_state/vmx/shadow_vmcs12",
4ec855
+    .version_id = 1,
4ec855
+    .minimum_version_id = 1,
4ec855
+    .needed = vmx_shadow_vmcs12_needed,
4ec855
+    .fields = (VMStateField[]) {
4ec855
+        VMSTATE_UINT8_ARRAY(data.vmx[0].shadow_vmcs12,
4ec855
+                            struct kvm_nested_state,
4ec855
+                            KVM_STATE_NESTED_VMX_VMCS_SIZE),
4ec855
+        VMSTATE_END_OF_LIST()
4ec855
+    }
4ec855
+};
4ec855
+
4ec855
+static bool vmx_nested_state_needed(void *opaque)
4ec855
+{
4ec855
+    struct kvm_nested_state *nested_state = opaque;
4ec855
+
4ec855
+    return ((nested_state->format == KVM_STATE_NESTED_FORMAT_VMX) &&
4ec855
+            ((nested_state->hdr.vmx.vmxon_pa != -1ull) ||
4ec855
+             (nested_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)));
4ec855
+}
4ec855
+
4ec855
+static const VMStateDescription vmstate_vmx_nested_state = {
4ec855
+    .name = "cpu/kvm_nested_state/vmx",
4ec855
+    .version_id = 1,
4ec855
+    .minimum_version_id = 1,
4ec855
+    .needed = vmx_nested_state_needed,
4ec855
+    .fields = (VMStateField[]) {
4ec855
+        VMSTATE_U64(hdr.vmx.vmxon_pa, struct kvm_nested_state),
4ec855
+        VMSTATE_U64(hdr.vmx.vmcs12_pa, struct kvm_nested_state),
4ec855
+        VMSTATE_U16(hdr.vmx.smm.flags, struct kvm_nested_state),
4ec855
+        VMSTATE_END_OF_LIST()
4ec855
+    },
4ec855
+    .subsections = (const VMStateDescription*[]) {
4ec855
+        &vmstate_vmx_vmcs12,
4ec855
+        &vmstate_vmx_shadow_vmcs12,
4ec855
+        NULL,
4ec855
+    }
4ec855
+};
4ec855
+
4ec855
+static bool svm_nested_state_needed(void *opaque)
4ec855
+{
4ec855
+    struct kvm_nested_state *nested_state = opaque;
4ec855
+
4ec855
+    return (nested_state->format == KVM_STATE_NESTED_FORMAT_SVM);
4ec855
+}
4ec855
+
4ec855
+static const VMStateDescription vmstate_svm_nested_state = {
4ec855
+    .name = "cpu/kvm_nested_state/svm",
4ec855
+    .version_id = 1,
4ec855
+    .minimum_version_id = 1,
4ec855
+    .needed = svm_nested_state_needed,
4ec855
+    .fields = (VMStateField[]) {
4ec855
+        VMSTATE_END_OF_LIST()
4ec855
+    }
4ec855
+};
4ec855
+
4ec855
+static bool nested_state_needed(void *opaque)
4ec855
+{
4ec855
+    X86CPU *cpu = opaque;
4ec855
+    CPUX86State *env = &cpu->env;
4ec855
+
4ec855
+    return (env->nested_state &&
4ec855
+            (vmx_nested_state_needed(env->nested_state) ||
4ec855
+             svm_nested_state_needed(env->nested_state)));
4ec855
+}
4ec855
+
4ec855
+static int nested_state_post_load(void *opaque, int version_id)
4ec855
+{
4ec855
+    X86CPU *cpu = opaque;
4ec855
+    CPUX86State *env = &cpu->env;
4ec855
+    struct kvm_nested_state *nested_state = env->nested_state;
4ec855
+    int min_nested_state_len = offsetof(struct kvm_nested_state, data);
4ec855
+    int max_nested_state_len = kvm_max_nested_state_length();
4ec855
+
4ec855
+    /*
4ec855
+     * If our kernel don't support setting nested state
4ec855
+     * and we have received nested state from migration stream,
4ec855
+     * we need to fail migration
4ec855
+     */
4ec855
+    if (max_nested_state_len <= 0) {
4ec855
+        error_report("Received nested state when kernel cannot restore it");
4ec855
+        return -EINVAL;
4ec855
+    }
4ec855
+
4ec855
+    /*
4ec855
+     * Verify that the size of received nested_state struct
4ec855
+     * at least cover required header and is not larger
4ec855
+     * than the max size that our kernel support
4ec855
+     */
4ec855
+    if (nested_state->size < min_nested_state_len) {
4ec855
+        error_report("Received nested state size less than min: "
4ec855
+                     "len=%d, min=%d",
4ec855
+                     nested_state->size, min_nested_state_len);
4ec855
+        return -EINVAL;
4ec855
+    }
4ec855
+    if (nested_state->size > max_nested_state_len) {
4ec855
+        error_report("Recieved unsupported nested state size: "
4ec855
+                     "nested_state->size=%d, max=%d",
4ec855
+                     nested_state->size, max_nested_state_len);
4ec855
+        return -EINVAL;
4ec855
+    }
4ec855
+
4ec855
+    /* Verify format is valid */
4ec855
+    if ((nested_state->format != KVM_STATE_NESTED_FORMAT_VMX) &&
4ec855
+        (nested_state->format != KVM_STATE_NESTED_FORMAT_SVM)) {
4ec855
+        error_report("Received invalid nested state format: %d",
4ec855
+                     nested_state->format);
4ec855
+        return -EINVAL;
4ec855
+    }
4ec855
+
4ec855
+    return 0;
4ec855
+}
4ec855
+
4ec855
+static const VMStateDescription vmstate_kvm_nested_state = {
4ec855
+    .name = "cpu/kvm_nested_state",
4ec855
+    .version_id = 1,
4ec855
+    .minimum_version_id = 1,
4ec855
+    .fields = (VMStateField[]) {
4ec855
+        VMSTATE_U16(flags, struct kvm_nested_state),
4ec855
+        VMSTATE_U16(format, struct kvm_nested_state),
4ec855
+        VMSTATE_U32(size, struct kvm_nested_state),
4ec855
+        VMSTATE_END_OF_LIST()
4ec855
+    },
4ec855
+    .subsections = (const VMStateDescription*[]) {
4ec855
+        &vmstate_vmx_nested_state,
4ec855
+        &vmstate_svm_nested_state,
4ec855
+        NULL
4ec855
+    }
4ec855
+};
4ec855
+
4ec855
+static const VMStateDescription vmstate_nested_state = {
4ec855
+    .name = "cpu/nested_state",
4ec855
+    .version_id = 1,
4ec855
+    .minimum_version_id = 1,
4ec855
+    .needed = nested_state_needed,
4ec855
+    .post_load = nested_state_post_load,
4ec855
+    .fields = (VMStateField[]) {
4ec855
+        VMSTATE_STRUCT_POINTER(env.nested_state, X86CPU,
4ec855
+                vmstate_kvm_nested_state,
4ec855
+                struct kvm_nested_state),
4ec855
+        VMSTATE_END_OF_LIST()
4ec855
+    }
4ec855
+};
4ec855
+
4ec855
+#endif
4ec855
+
4ec855
 static bool mcg_ext_ctl_needed(void *opaque)
4ec855
 {
4ec855
     X86CPU *cpu = opaque;
4ec855
@@ -1080,6 +1275,9 @@ VMStateDescription vmstate_x86_cpu = {
4ec855
 #ifndef TARGET_X86_64
4ec855
         &vmstate_efer32,
4ec855
 #endif
4ec855
+#ifdef CONFIG_KVM
4ec855
+        &vmstate_nested_state,
4ec855
+#endif
4ec855
         NULL
4ec855
     }
4ec855
 };
4ec855
-- 
4ec855
1.8.3.1
4ec855