Blame SOURCES/kvm-target-i386-kvm-Demand-nested-migration-kernel-capab.patch

b38b0f
From 2427e21de274cf7b56ef79e4a7ba78a08def7a58 Mon Sep 17 00:00:00 2001
b38b0f
From: Paolo Bonzini <pbonzini@redhat.com>
b38b0f
Date: Mon, 22 Jul 2019 18:22:18 +0100
b38b0f
Subject: [PATCH 37/39] target/i386: kvm: Demand nested migration kernel
b38b0f
 capabilities only when vCPU may have enabled VMX
b38b0f
b38b0f
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
b38b0f
Message-id: <20190722182220.19374-17-pbonzini@redhat.com>
b38b0f
Patchwork-id: 89634
b38b0f
O-Subject: [RHEL-8.1.0 PATCH qemu-kvm v3 16/18] target/i386: kvm: Demand nested migration kernel capabilities only when vCPU may have enabled VMX
b38b0f
Bugzilla: 1689269
b38b0f
RH-Acked-by: Peter Xu <zhexu@redhat.com>
b38b0f
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
b38b0f
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
b38b0f
b38b0f
From: Liran Alon <liran.alon@oracle.com>
b38b0f
b38b0f
Previous to this change, a vCPU exposed with VMX running on a kernel
b38b0f
without KVM_CAP_NESTED_STATE or KVM_CAP_EXCEPTION_PAYLOAD resulted in
b38b0f
adding a migration blocker. This was because when the code was written
b38b0f
it was thought there is no way to reliably know if a vCPU is utilising
b38b0f
VMX or not at runtime. However, it turns out that this can be known to
b38b0f
some extent:
b38b0f
b38b0f
In order for a vCPU to enter VMX operation it must have CR4.VMXE set.
b38b0f
Since it was set, CR4.VMXE must remain set as long as the vCPU is in
b38b0f
VMX operation. This is because CR4.VMXE is one of the bits set
b38b0f
in MSR_IA32_VMX_CR4_FIXED1.
b38b0f
There is one exception to the above statement when vCPU enters SMM mode.
b38b0f
When a vCPU enters SMM mode, it temporarily exits VMX operation and
b38b0f
may also reset CR4.VMXE during execution in SMM mode.
b38b0f
When the vCPU exits SMM mode, vCPU state is restored to be in VMX operation
b38b0f
and CR4.VMXE is restored to its original state of being set.
b38b0f
Therefore, when the vCPU is not in SMM mode, we can infer whether
b38b0f
VMX is being used by examining CR4.VMXE. Otherwise, we cannot
b38b0f
know for certain but assume the worse that vCPU may utilise VMX.
b38b0f
b38b0f
Summaring all the above, a vCPU may have enabled VMX in case
b38b0f
CR4.VMXE is set or vCPU is in SMM mode.
b38b0f
b38b0f
Therefore, remove migration blocker and check before migration
b38b0f
(cpu_pre_save()) if the vCPU may have enabled VMX. If true, only then
b38b0f
require relevant kernel capabilities.
b38b0f
b38b0f
While at it, demand KVM_CAP_EXCEPTION_PAYLOAD only when the vCPU is in
b38b0f
guest-mode and there is a pending/injected exception. Otherwise, this
b38b0f
kernel capability is not required for proper migration.
b38b0f
b38b0f
Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
b38b0f
Signed-off-by: Liran Alon <liran.alon@oracle.com>
b38b0f
Reviewed-by: Maran Wilson <maran.wilson@oracle.com>
b38b0f
Tested-by: Maran Wilson <maran.wilson@oracle.com>
b38b0f
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
b38b0f
(cherry picked from commit 79a197ab180e75838523c58973b1221ad7bf51eb)
b38b0f
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
b38b0f
---
b38b0f
 target/i386/cpu.h      | 22 ++++++++++++++++++++++
b38b0f
 target/i386/kvm.c      | 26 ++++++--------------------
b38b0f
 target/i386/kvm_i386.h |  1 +
b38b0f
 target/i386/machine.c  | 24 ++++++++++++++++++++----
b38b0f
 4 files changed, 49 insertions(+), 24 deletions(-)
b38b0f
b38b0f
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
b38b0f
index d120f62..273c90b 100644
b38b0f
--- a/target/i386/cpu.h
b38b0f
+++ b/target/i386/cpu.h
b38b0f
@@ -1848,6 +1848,28 @@ static inline bool cpu_has_vmx(CPUX86State *env)
b38b0f
     return env->features[FEAT_1_ECX] & CPUID_EXT_VMX;
b38b0f
 }
b38b0f
 
b38b0f
+/*
b38b0f
+ * In order for a vCPU to enter VMX operation it must have CR4.VMXE set.
b38b0f
+ * Since it was set, CR4.VMXE must remain set as long as vCPU is in
b38b0f
+ * VMX operation. This is because CR4.VMXE is one of the bits set
b38b0f
+ * in MSR_IA32_VMX_CR4_FIXED1.
b38b0f
+ *
b38b0f
+ * There is one exception to above statement when vCPU enters SMM mode.
b38b0f
+ * When a vCPU enters SMM mode, it temporarily exit VMX operation and
b38b0f
+ * may also reset CR4.VMXE during execution in SMM mode.
b38b0f
+ * When vCPU exits SMM mode, vCPU state is restored to be in VMX operation
b38b0f
+ * and CR4.VMXE is restored to it's original value of being set.
b38b0f
+ *
b38b0f
+ * Therefore, when vCPU is not in SMM mode, we can infer whether
b38b0f
+ * VMX is being used by examining CR4.VMXE. Otherwise, we cannot
b38b0f
+ * know for certain.
b38b0f
+ */
b38b0f
+static inline bool cpu_vmx_maybe_enabled(CPUX86State *env)
b38b0f
+{
b38b0f
+    return cpu_has_vmx(env) &&
b38b0f
+           ((env->cr[4] & CR4_VMXE_MASK) || (env->hflags & HF_SMM_MASK));
b38b0f
+}
b38b0f
+
b38b0f
 /* fpu_helper.c */
b38b0f
 void update_fp_status(CPUX86State *env);
b38b0f
 void update_mxcsr_status(CPUX86State *env);
b38b0f
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
b38b0f
index 0619aba..0bd286e 100644
b38b0f
--- a/target/i386/kvm.c
b38b0f
+++ b/target/i386/kvm.c
b38b0f
@@ -127,6 +127,11 @@ bool kvm_has_adjust_clock_stable(void)
b38b0f
     return (ret == KVM_CLOCK_TSC_STABLE);
b38b0f
 }
b38b0f
 
b38b0f
+bool kvm_has_exception_payload(void)
b38b0f
+{
b38b0f
+    return has_exception_payload;
b38b0f
+}
b38b0f
+
b38b0f
 bool kvm_allows_irq0_override(void)
b38b0f
 {
b38b0f
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
b38b0f
@@ -814,7 +819,6 @@ static int hyperv_handle_properties(CPUState *cs)
b38b0f
 }
b38b0f
 
b38b0f
 static Error *invtsc_mig_blocker;
b38b0f
-static Error *nested_virt_mig_blocker;
b38b0f
 
b38b0f
 #define KVM_MAX_CPUID_ENTRIES  100
b38b0f
 
b38b0f
@@ -1159,22 +1163,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
b38b0f
                                   !!(c->ecx & CPUID_EXT_SMX);
b38b0f
     }
b38b0f
 
b38b0f
-    if (cpu_has_vmx(env) && !nested_virt_mig_blocker &&
b38b0f
-        ((kvm_max_nested_state_length() <= 0) || !has_exception_payload)) {
b38b0f
-        error_setg(&nested_virt_mig_blocker,
b38b0f
-                   "Kernel do not provide required capabilities for "
b38b0f
-                   "nested virtualization migration. "
b38b0f
-                   "(CAP_NESTED_STATE=%d, CAP_EXCEPTION_PAYLOAD=%d)",
b38b0f
-                   kvm_max_nested_state_length() > 0,
b38b0f
-                   has_exception_payload);
b38b0f
-        r = migrate_add_blocker(nested_virt_mig_blocker, &local_err);
b38b0f
-        if (local_err) {
b38b0f
-            error_report_err(local_err);
b38b0f
-            error_free(nested_virt_mig_blocker);
b38b0f
-            return r;
b38b0f
-        }
b38b0f
-    }
b38b0f
-
b38b0f
     if (env->mcg_cap & MCG_LMCE_P) {
b38b0f
         has_msr_mcg_ext_ctl = has_msr_feature_control = true;
b38b0f
     }
b38b0f
@@ -1190,7 +1178,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
b38b0f
             if (local_err) {
b38b0f
                 error_report_err(local_err);
b38b0f
                 error_free(invtsc_mig_blocker);
b38b0f
-                goto fail2;
b38b0f
+                return r;
b38b0f
             }
b38b0f
             /* for savevm */
b38b0f
             vmstate_x86_cpu.unmigratable = 1;
b38b0f
@@ -1256,8 +1244,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
b38b0f
 
b38b0f
  fail:
b38b0f
     migrate_del_blocker(invtsc_mig_blocker);
b38b0f
- fail2:
b38b0f
-    migrate_del_blocker(nested_virt_mig_blocker);
b38b0f
 
b38b0f
     return r;
b38b0f
 }
b38b0f
diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h
b38b0f
index 1de9876..df9bbf3 100644
b38b0f
--- a/target/i386/kvm_i386.h
b38b0f
+++ b/target/i386/kvm_i386.h
b38b0f
@@ -41,6 +41,7 @@
b38b0f
 bool kvm_allows_irq0_override(void);
b38b0f
 bool kvm_has_smm(void);
b38b0f
 bool kvm_has_adjust_clock_stable(void);
b38b0f
+bool kvm_has_exception_payload(void);
b38b0f
 void kvm_synchronize_all_tsc(void);
b38b0f
 void kvm_arch_reset_vcpu(X86CPU *cs);
b38b0f
 void kvm_arch_do_init_vcpu(X86CPU *cs);
b38b0f
diff --git a/target/i386/machine.c b/target/i386/machine.c
b38b0f
index 5ffee8f..8d90d98 100644
b38b0f
--- a/target/i386/machine.c
b38b0f
+++ b/target/i386/machine.c
b38b0f
@@ -7,6 +7,7 @@
b38b0f
 #include "hw/i386/pc.h"
b38b0f
 #include "hw/isa/isa.h"
b38b0f
 #include "migration/cpu.h"
b38b0f
+#include "kvm_i386.h"
b38b0f
 
b38b0f
 #include "sysemu/kvm.h"
b38b0f
 
b38b0f
@@ -231,10 +232,25 @@ static int cpu_pre_save(void *opaque)
b38b0f
     }
b38b0f
 
b38b0f
 #ifdef CONFIG_KVM
b38b0f
-    /* Verify we have nested virtualization state from kernel if required */
b38b0f
-    if (kvm_enabled() && cpu_has_vmx(env) && !env->nested_state) {
b38b0f
-        error_report("Guest enabled nested virtualization but kernel "
b38b0f
-                "does not support saving of nested state");
b38b0f
+    /*
b38b0f
+     * In case vCPU may have enabled VMX, we need to make sure kernel have
b38b0f
+     * required capabilities in order to perform migration correctly:
b38b0f
+     *
b38b0f
+     * 1) We must be able to extract vCPU nested-state from KVM.
b38b0f
+     *
b38b0f
+     * 2) In case vCPU is running in guest-mode and it has a pending exception,
b38b0f
+     * we must be able to determine if it's in a pending or injected state.
b38b0f
+     * Note that in case KVM don't have required capability to do so,
b38b0f
+     * a pending/injected exception will always appear as an
b38b0f
+     * injected exception.
b38b0f
+     */
b38b0f
+    if (kvm_enabled() && cpu_vmx_maybe_enabled(env) &&
b38b0f
+        (!env->nested_state ||
b38b0f
+         (!kvm_has_exception_payload() && (env->hflags & HF_GUEST_MASK) &&
b38b0f
+          env->exception_injected))) {
b38b0f
+        error_report("Guest maybe enabled nested virtualization but kernel "
b38b0f
+                "does not support required capabilities to save vCPU "
b38b0f
+                "nested state");
b38b0f
         return -EINVAL;
b38b0f
     }
b38b0f
 #endif
b38b0f
-- 
b38b0f
1.8.3.1
b38b0f