Pablo Greco e6a3ae
From 05a54f3fc44598f917d72a1f2570c43ec042cdb8 Mon Sep 17 00:00:00 2001
Pablo Greco e6a3ae
From: Paolo Bonzini <pbonzini@redhat.com>
Pablo Greco e6a3ae
Date: Mon, 22 Jul 2019 18:22:16 +0100
Pablo Greco e6a3ae
Subject: [PATCH 35/39] target/i386: kvm: Add support for
Pablo Greco e6a3ae
 KVM_CAP_EXCEPTION_PAYLOAD
Pablo Greco e6a3ae
Pablo Greco e6a3ae
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
Pablo Greco e6a3ae
Message-id: <20190722182220.19374-15-pbonzini@redhat.com>
Pablo Greco e6a3ae
Patchwork-id: 89631
Pablo Greco e6a3ae
O-Subject: [RHEL-8.1.0 PATCH qemu-kvm v3 14/18] target/i386: kvm: Add support for KVM_CAP_EXCEPTION_PAYLOAD
Pablo Greco e6a3ae
Bugzilla: 1689269
Pablo Greco e6a3ae
RH-Acked-by: Peter Xu <zhexu@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
From: Liran Alon <liran.alon@oracle.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Kernel commit c4f55198c7c2 ("kvm: x86: Introduce KVM_CAP_EXCEPTION_PAYLOAD")
Pablo Greco e6a3ae
introduced a new KVM capability which allows userspace to correctly
Pablo Greco e6a3ae
distinguish between pending and injected exceptions.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
This distinguish is important in case of nested virtualization scenarios
Pablo Greco e6a3ae
because a L2 pending exception can still be intercepted by the L1 hypervisor
Pablo Greco e6a3ae
while a L2 injected exception cannot.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Furthermore, when an exception is attempted to be injected by QEMU,
Pablo Greco e6a3ae
QEMU should specify the exception payload (CR2 in case of #PF or
Pablo Greco e6a3ae
DR6 in case of #DB) instead of having the payload already delivered in
Pablo Greco e6a3ae
the respective vCPU register. Because in case exception is injected to
Pablo Greco e6a3ae
L2 guest and is intercepted by L1 hypervisor, then payload needs to be
Pablo Greco e6a3ae
reported to L1 intercept (VMExit handler) while still preserving
Pablo Greco e6a3ae
respective vCPU register unchanged.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
This commit adds support for QEMU to properly utilise this new KVM
Pablo Greco e6a3ae
capability (KVM_CAP_EXCEPTION_PAYLOAD).
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
Pablo Greco e6a3ae
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Pablo Greco e6a3ae
Message-Id: <20190619162140.133674-10-liran.alon@oracle.com>
Pablo Greco e6a3ae
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Pablo Greco e6a3ae
(cherry picked from commit fd13f23b8c95311eff74426921557eee592b0ed3)
Pablo Greco e6a3ae
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
Pablo Greco e6a3ae
---
Pablo Greco e6a3ae
 target/i386/cpu.c        |   6 ++-
Pablo Greco e6a3ae
 target/i386/cpu.h        |   6 ++-
Pablo Greco e6a3ae
 target/i386/hvf/hvf.c    |  10 +++--
Pablo Greco e6a3ae
 target/i386/hvf/x86hvf.c |   4 +-
Pablo Greco e6a3ae
 target/i386/kvm.c        | 101 +++++++++++++++++++++++++++++++++++++++--------
Pablo Greco e6a3ae
 target/i386/machine.c    |  84 ++++++++++++++++++++++++++++++++++++++-
Pablo Greco e6a3ae
 6 files changed, 187 insertions(+), 24 deletions(-)
Pablo Greco e6a3ae
Pablo Greco e6a3ae
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
Pablo Greco e6a3ae
index bd0b784..f71b044 100644
Pablo Greco e6a3ae
--- a/target/i386/cpu.c
Pablo Greco e6a3ae
+++ b/target/i386/cpu.c
Pablo Greco e6a3ae
@@ -4645,7 +4645,11 @@ static void x86_cpu_reset(CPUState *s)
Pablo Greco e6a3ae
     memset(env->mtrr_fixed, 0, sizeof(env->mtrr_fixed));
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     env->interrupt_injected = -1;
Pablo Greco e6a3ae
-    env->exception_injected = -1;
Pablo Greco e6a3ae
+    env->exception_nr = -1;
Pablo Greco e6a3ae
+    env->exception_pending = 0;
Pablo Greco e6a3ae
+    env->exception_injected = 0;
Pablo Greco e6a3ae
+    env->exception_has_payload = false;
Pablo Greco e6a3ae
+    env->exception_payload = 0;
Pablo Greco e6a3ae
     env->nmi_injected = false;
Pablo Greco e6a3ae
 #if !defined(CONFIG_USER_ONLY)
Pablo Greco e6a3ae
     /* We hard-wire the BSP to the first CPU. */
Pablo Greco e6a3ae
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
Pablo Greco e6a3ae
index 86f3d98..d120f62 100644
Pablo Greco e6a3ae
--- a/target/i386/cpu.h
Pablo Greco e6a3ae
+++ b/target/i386/cpu.h
Pablo Greco e6a3ae
@@ -1325,10 +1325,14 @@ typedef struct CPUX86State {
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     /* For KVM */
Pablo Greco e6a3ae
     uint32_t mp_state;
Pablo Greco e6a3ae
-    int32_t exception_injected;
Pablo Greco e6a3ae
+    int32_t exception_nr;
Pablo Greco e6a3ae
     int32_t interrupt_injected;
Pablo Greco e6a3ae
     uint8_t soft_interrupt;
Pablo Greco e6a3ae
+    uint8_t exception_pending;
Pablo Greco e6a3ae
+    uint8_t exception_injected;
Pablo Greco e6a3ae
     uint8_t has_error_code;
Pablo Greco e6a3ae
+    uint8_t exception_has_payload;
Pablo Greco e6a3ae
+    uint64_t exception_payload;
Pablo Greco e6a3ae
     uint32_t ins_len;
Pablo Greco e6a3ae
     uint32_t sipi_vector;
Pablo Greco e6a3ae
     bool tsc_valid;
Pablo Greco e6a3ae
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
Pablo Greco e6a3ae
index c367539..acc0bb9 100644
Pablo Greco e6a3ae
--- a/target/i386/hvf/hvf.c
Pablo Greco e6a3ae
+++ b/target/i386/hvf/hvf.c
Pablo Greco e6a3ae
@@ -617,7 +617,9 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in
Pablo Greco e6a3ae
     X86CPU *x86_cpu = X86_CPU(cpu);
Pablo Greco e6a3ae
     CPUX86State *env = &x86_cpu->env;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
-    env->exception_injected = -1;
Pablo Greco e6a3ae
+    env->exception_nr = -1;
Pablo Greco e6a3ae
+    env->exception_pending = 0;
Pablo Greco e6a3ae
+    env->exception_injected = 0;
Pablo Greco e6a3ae
     env->interrupt_injected = -1;
Pablo Greco e6a3ae
     env->nmi_injected = false;
Pablo Greco e6a3ae
     if (idtvec_info & VMCS_IDT_VEC_VALID) {
Pablo Greco e6a3ae
@@ -631,7 +633,8 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in
Pablo Greco e6a3ae
             break;
Pablo Greco e6a3ae
         case VMCS_IDT_VEC_HWEXCEPTION:
Pablo Greco e6a3ae
         case VMCS_IDT_VEC_SWEXCEPTION:
Pablo Greco e6a3ae
-            env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
Pablo Greco e6a3ae
+            env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
Pablo Greco e6a3ae
+            env->exception_injected = 1;
Pablo Greco e6a3ae
             break;
Pablo Greco e6a3ae
         case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
Pablo Greco e6a3ae
         default:
Pablo Greco e6a3ae
@@ -925,7 +928,8 @@ int hvf_vcpu_exec(CPUState *cpu)
Pablo Greco e6a3ae
             macvm_set_rip(cpu, rip + ins_len);
Pablo Greco e6a3ae
             break;
Pablo Greco e6a3ae
         case VMX_REASON_VMCALL:
Pablo Greco e6a3ae
-            env->exception_injected = EXCP0D_GPF;
Pablo Greco e6a3ae
+            env->exception_nr = EXCP0D_GPF;
Pablo Greco e6a3ae
+            env->exception_injected = 1;
Pablo Greco e6a3ae
             env->has_error_code = true;
Pablo Greco e6a3ae
             env->error_code = 0;
Pablo Greco e6a3ae
             break;
Pablo Greco e6a3ae
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
Pablo Greco e6a3ae
index 6c88939..f0e58a8 100644
Pablo Greco e6a3ae
--- a/target/i386/hvf/x86hvf.c
Pablo Greco e6a3ae
+++ b/target/i386/hvf/x86hvf.c
Pablo Greco e6a3ae
@@ -362,8 +362,8 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
Pablo Greco e6a3ae
     if (env->interrupt_injected != -1) {
Pablo Greco e6a3ae
         vector = env->interrupt_injected;
Pablo Greco e6a3ae
         intr_type = VMCS_INTR_T_SWINTR;
Pablo Greco e6a3ae
-    } else if (env->exception_injected != -1) {
Pablo Greco e6a3ae
-        vector = env->exception_injected;
Pablo Greco e6a3ae
+    } else if (env->exception_nr != -1) {
Pablo Greco e6a3ae
+        vector = env->exception_nr;
Pablo Greco e6a3ae
         if (vector == EXCP03_INT3 || vector == EXCP04_INTO) {
Pablo Greco e6a3ae
             intr_type = VMCS_INTR_T_SWEXCEPTION;
Pablo Greco e6a3ae
         } else {
Pablo Greco e6a3ae
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
Pablo Greco e6a3ae
index ddceb7d..aa2d589 100644
Pablo Greco e6a3ae
--- a/target/i386/kvm.c
Pablo Greco e6a3ae
+++ b/target/i386/kvm.c
Pablo Greco e6a3ae
@@ -103,6 +103,7 @@ static uint32_t num_architectural_pmu_fixed_counters;
Pablo Greco e6a3ae
 static int has_xsave;
Pablo Greco e6a3ae
 static int has_xcrs;
Pablo Greco e6a3ae
 static int has_pit_state2;
Pablo Greco e6a3ae
+static int has_exception_payload;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
 static bool has_msr_mcg_ext_ctl;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
@@ -569,15 +570,56 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
Pablo Greco e6a3ae
     /* Hope we are lucky for AO MCE */
Pablo Greco e6a3ae
 }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+static void kvm_reset_exception(CPUX86State *env)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    env->exception_nr = -1;
Pablo Greco e6a3ae
+    env->exception_pending = 0;
Pablo Greco e6a3ae
+    env->exception_injected = 0;
Pablo Greco e6a3ae
+    env->exception_has_payload = false;
Pablo Greco e6a3ae
+    env->exception_payload = 0;
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static void kvm_queue_exception(CPUX86State *env,
Pablo Greco e6a3ae
+                                int32_t exception_nr,
Pablo Greco e6a3ae
+                                uint8_t exception_has_payload,
Pablo Greco e6a3ae
+                                uint64_t exception_payload)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    assert(env->exception_nr == -1);
Pablo Greco e6a3ae
+    assert(!env->exception_pending);
Pablo Greco e6a3ae
+    assert(!env->exception_injected);
Pablo Greco e6a3ae
+    assert(!env->exception_has_payload);
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    env->exception_nr = exception_nr;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    if (has_exception_payload) {
Pablo Greco e6a3ae
+        env->exception_pending = 1;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+        env->exception_has_payload = exception_has_payload;
Pablo Greco e6a3ae
+        env->exception_payload = exception_payload;
Pablo Greco e6a3ae
+    } else {
Pablo Greco e6a3ae
+        env->exception_injected = 1;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+        if (exception_nr == EXCP01_DB) {
Pablo Greco e6a3ae
+            assert(exception_has_payload);
Pablo Greco e6a3ae
+            env->dr[6] = exception_payload;
Pablo Greco e6a3ae
+        } else if (exception_nr == EXCP0E_PAGE) {
Pablo Greco e6a3ae
+            assert(exception_has_payload);
Pablo Greco e6a3ae
+            env->cr[2] = exception_payload;
Pablo Greco e6a3ae
+        } else {
Pablo Greco e6a3ae
+            assert(!exception_has_payload);
Pablo Greco e6a3ae
+        }
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
 static int kvm_inject_mce_oldstyle(X86CPU *cpu)
Pablo Greco e6a3ae
 {
Pablo Greco e6a3ae
     CPUX86State *env = &cpu->env;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
-    if (!kvm_has_vcpu_events() && env->exception_injected == EXCP12_MCHK) {
Pablo Greco e6a3ae
+    if (!kvm_has_vcpu_events() && env->exception_nr == EXCP12_MCHK) {
Pablo Greco e6a3ae
         unsigned int bank, bank_num = env->mcg_cap & 0xff;
Pablo Greco e6a3ae
         struct kvm_x86_mce mce;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
-        env->exception_injected = -1;
Pablo Greco e6a3ae
+        kvm_reset_exception(env);
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
         /*
Pablo Greco e6a3ae
          * There must be at least one bank in use if an MCE is pending.
Pablo Greco e6a3ae
@@ -1458,6 +1500,16 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
Pablo Greco e6a3ae
     has_pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+    has_exception_payload = kvm_check_extension(s, KVM_CAP_EXCEPTION_PAYLOAD);
Pablo Greco e6a3ae
+    if (has_exception_payload) {
Pablo Greco e6a3ae
+        ret = kvm_vm_enable_cap(s, KVM_CAP_EXCEPTION_PAYLOAD, 0, true);
Pablo Greco e6a3ae
+        if (ret < 0) {
Pablo Greco e6a3ae
+            error_report("kvm: Failed to enable exception payload cap: %s",
Pablo Greco e6a3ae
+                         strerror(-ret));
Pablo Greco e6a3ae
+            return ret;
Pablo Greco e6a3ae
+        }
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
     ret = kvm_get_supported_msrs(s);
Pablo Greco e6a3ae
     if (ret < 0) {
Pablo Greco e6a3ae
         return ret;
Pablo Greco e6a3ae
@@ -2717,8 +2769,16 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
Pablo Greco e6a3ae
         return 0;
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
-    events.exception.injected = (env->exception_injected >= 0);
Pablo Greco e6a3ae
-    events.exception.nr = env->exception_injected;
Pablo Greco e6a3ae
+    events.flags = 0;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    if (has_exception_payload) {
Pablo Greco e6a3ae
+        events.flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
Pablo Greco e6a3ae
+        events.exception.pending = env->exception_pending;
Pablo Greco e6a3ae
+        events.exception_has_payload = env->exception_has_payload;
Pablo Greco e6a3ae
+        events.exception_payload = env->exception_payload;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+    events.exception.nr = env->exception_nr;
Pablo Greco e6a3ae
+    events.exception.injected = env->exception_injected;
Pablo Greco e6a3ae
     events.exception.has_error_code = env->has_error_code;
Pablo Greco e6a3ae
     events.exception.error_code = env->error_code;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
@@ -2731,7 +2791,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
Pablo Greco e6a3ae
     events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     events.sipi_vector = env->sipi_vector;
Pablo Greco e6a3ae
-    events.flags = 0;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     if (has_msr_smbase) {
Pablo Greco e6a3ae
         events.smi.smm = !!(env->hflags & HF_SMM_MASK);
Pablo Greco e6a3ae
@@ -2781,8 +2840,19 @@ static int kvm_get_vcpu_events(X86CPU *cpu)
Pablo Greco e6a3ae
     if (ret < 0) {
Pablo Greco e6a3ae
        return ret;
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
-    env->exception_injected =
Pablo Greco e6a3ae
-       events.exception.injected ? events.exception.nr : -1;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    if (events.flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
Pablo Greco e6a3ae
+        env->exception_pending = events.exception.pending;
Pablo Greco e6a3ae
+        env->exception_has_payload = events.exception_has_payload;
Pablo Greco e6a3ae
+        env->exception_payload = events.exception_payload;
Pablo Greco e6a3ae
+    } else {
Pablo Greco e6a3ae
+        env->exception_pending = 0;
Pablo Greco e6a3ae
+        env->exception_has_payload = false;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+    env->exception_injected = events.exception.injected;
Pablo Greco e6a3ae
+    env->exception_nr =
Pablo Greco e6a3ae
+        (env->exception_pending || env->exception_injected) ?
Pablo Greco e6a3ae
+        events.exception.nr : -1;
Pablo Greco e6a3ae
     env->has_error_code = events.exception.has_error_code;
Pablo Greco e6a3ae
     env->error_code = events.exception.error_code;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
@@ -2834,12 +2904,12 @@ static int kvm_guest_debug_workarounds(X86CPU *cpu)
Pablo Greco e6a3ae
     unsigned long reinject_trap = 0;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     if (!kvm_has_vcpu_events()) {
Pablo Greco e6a3ae
-        if (env->exception_injected == EXCP01_DB) {
Pablo Greco e6a3ae
+        if (env->exception_nr == EXCP01_DB) {
Pablo Greco e6a3ae
             reinject_trap = KVM_GUESTDBG_INJECT_DB;
Pablo Greco e6a3ae
         } else if (env->exception_injected == EXCP03_INT3) {
Pablo Greco e6a3ae
             reinject_trap = KVM_GUESTDBG_INJECT_BP;
Pablo Greco e6a3ae
         }
Pablo Greco e6a3ae
-        env->exception_injected = -1;
Pablo Greco e6a3ae
+        kvm_reset_exception(env);
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     /*
Pablo Greco e6a3ae
@@ -3215,13 +3285,13 @@ int kvm_arch_process_async_events(CPUState *cs)
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
         kvm_cpu_synchronize_state(cs);
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
-        if (env->exception_injected == EXCP08_DBLE) {
Pablo Greco e6a3ae
+        if (env->exception_nr == EXCP08_DBLE) {
Pablo Greco e6a3ae
             /* this means triple fault */
Pablo Greco e6a3ae
             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
Pablo Greco e6a3ae
             cs->exit_request = 1;
Pablo Greco e6a3ae
             return 0;
Pablo Greco e6a3ae
         }
Pablo Greco e6a3ae
-        env->exception_injected = EXCP12_MCHK;
Pablo Greco e6a3ae
+        kvm_queue_exception(env, EXCP12_MCHK, 0, 0);
Pablo Greco e6a3ae
         env->has_error_code = 0;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
         cs->halted = 0;
Pablo Greco e6a3ae
@@ -3436,14 +3506,13 @@ static int kvm_handle_debug(X86CPU *cpu,
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
     if (ret == 0) {
Pablo Greco e6a3ae
         cpu_synchronize_state(cs);
Pablo Greco e6a3ae
-        assert(env->exception_injected == -1);
Pablo Greco e6a3ae
+        assert(env->exception_nr == -1);
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
         /* pass to guest */
Pablo Greco e6a3ae
-        env->exception_injected = arch_info->exception;
Pablo Greco e6a3ae
+        kvm_queue_exception(env, arch_info->exception,
Pablo Greco e6a3ae
+                            arch_info->exception == EXCP01_DB,
Pablo Greco e6a3ae
+                            arch_info->dr6);
Pablo Greco e6a3ae
         env->has_error_code = 0;
Pablo Greco e6a3ae
-        if (arch_info->exception == EXCP01_DB) {
Pablo Greco e6a3ae
-            env->dr[6] = arch_info->dr6;
Pablo Greco e6a3ae
-        }
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     return ret;
Pablo Greco e6a3ae
diff --git a/target/i386/machine.c b/target/i386/machine.c
Pablo Greco e6a3ae
index a2ddbba..5ffee8f 100644
Pablo Greco e6a3ae
--- a/target/i386/machine.c
Pablo Greco e6a3ae
+++ b/target/i386/machine.c
Pablo Greco e6a3ae
@@ -239,6 +239,41 @@ static int cpu_pre_save(void *opaque)
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+    /*
Pablo Greco e6a3ae
+     * When vCPU is running L2 and exception is still pending,
Pablo Greco e6a3ae
+     * it can potentially be intercepted by L1 hypervisor.
Pablo Greco e6a3ae
+     * In contrast to an injected exception which cannot be
Pablo Greco e6a3ae
+     * intercepted anymore.
Pablo Greco e6a3ae
+     *
Pablo Greco e6a3ae
+     * Furthermore, when a L2 exception is intercepted by L1
Pablo Greco e6a3ae
+     * hypervisor, it's exception payload (CR2/DR6 on #PF/#DB)
Pablo Greco e6a3ae
+     * should not be set yet in the respective vCPU register.
Pablo Greco e6a3ae
+     * Thus, in case an exception is pending, it is
Pablo Greco e6a3ae
+     * important to save the exception payload seperately.
Pablo Greco e6a3ae
+     *
Pablo Greco e6a3ae
+     * Therefore, if an exception is not in a pending state
Pablo Greco e6a3ae
+     * or vCPU is not in guest-mode, it is not important to
Pablo Greco e6a3ae
+     * distinguish between a pending and injected exception
Pablo Greco e6a3ae
+     * and we don't need to store seperately the exception payload.
Pablo Greco e6a3ae
+     *
Pablo Greco e6a3ae
+     * In order to preserve better backwards-compatabile migration,
Pablo Greco e6a3ae
+     * convert a pending exception to an injected exception in
Pablo Greco e6a3ae
+     * case it is not important to distingiush between them
Pablo Greco e6a3ae
+     * as described above.
Pablo Greco e6a3ae
+     */
Pablo Greco e6a3ae
+    if (env->exception_pending && !(env->hflags & HF_GUEST_MASK)) {
Pablo Greco e6a3ae
+        env->exception_pending = 0;
Pablo Greco e6a3ae
+        env->exception_injected = 1;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+        if (env->exception_has_payload) {
Pablo Greco e6a3ae
+            if (env->exception_nr == EXCP01_DB) {
Pablo Greco e6a3ae
+                env->dr[6] = env->exception_payload;
Pablo Greco e6a3ae
+            } else if (env->exception_nr == EXCP0E_PAGE) {
Pablo Greco e6a3ae
+                env->cr[2] = env->exception_payload;
Pablo Greco e6a3ae
+            }
Pablo Greco e6a3ae
+        }
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
     return 0;
Pablo Greco e6a3ae
 }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
@@ -296,6 +331,23 @@ static int cpu_post_load(void *opaque, int version_id)
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+    /*
Pablo Greco e6a3ae
+     * There are cases that we can get valid exception_nr with both
Pablo Greco e6a3ae
+     * exception_pending and exception_injected being cleared.
Pablo Greco e6a3ae
+     * This can happen in one of the following scenarios:
Pablo Greco e6a3ae
+     * 1) Source is older QEMU without KVM_CAP_EXCEPTION_PAYLOAD support.
Pablo Greco e6a3ae
+     * 2) Source is running on kernel without KVM_CAP_EXCEPTION_PAYLOAD support.
Pablo Greco e6a3ae
+     * 3) "cpu/exception_info" subsection not sent because there is no exception
Pablo Greco e6a3ae
+     *    pending or guest wasn't running L2 (See comment in cpu_pre_save()).
Pablo Greco e6a3ae
+     *
Pablo Greco e6a3ae
+     * In those cases, we can just deduce that a valid exception_nr means
Pablo Greco e6a3ae
+     * we can treat the exception as already injected.
Pablo Greco e6a3ae
+     */
Pablo Greco e6a3ae
+    if ((env->exception_nr != -1) &&
Pablo Greco e6a3ae
+        !env->exception_pending && !env->exception_injected) {
Pablo Greco e6a3ae
+        env->exception_injected = 1;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
     env->fpstt = (env->fpus_vmstate >> 11) & 7;
Pablo Greco e6a3ae
     env->fpus = env->fpus_vmstate & ~0x3800;
Pablo Greco e6a3ae
     env->fptag_vmstate ^= 0xff;
Pablo Greco e6a3ae
@@ -341,6 +393,35 @@ static bool steal_time_msr_needed(void *opaque)
Pablo Greco e6a3ae
     return cpu->env.steal_time_msr != 0;
Pablo Greco e6a3ae
 }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+static bool exception_info_needed(void *opaque)
Pablo Greco e6a3ae
+{
Pablo Greco e6a3ae
+    X86CPU *cpu = opaque;
Pablo Greco e6a3ae
+    CPUX86State *env = &cpu->env;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    /*
Pablo Greco e6a3ae
+     * It is important to save exception-info only in case
Pablo Greco e6a3ae
+     * we need to distingiush between a pending and injected
Pablo Greco e6a3ae
+     * exception. Which is only required in case there is a
Pablo Greco e6a3ae
+     * pending exception and vCPU is running L2.
Pablo Greco e6a3ae
+     * For more info, refer to comment in cpu_pre_save().
Pablo Greco e6a3ae
+     */
Pablo Greco e6a3ae
+    return env->exception_pending && (env->hflags & HF_GUEST_MASK);
Pablo Greco e6a3ae
+}
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+static const VMStateDescription vmstate_exception_info = {
Pablo Greco e6a3ae
+    .name = "cpu/exception_info",
Pablo Greco e6a3ae
+    .version_id = 1,
Pablo Greco e6a3ae
+    .minimum_version_id = 1,
Pablo Greco e6a3ae
+    .needed = exception_info_needed,
Pablo Greco e6a3ae
+    .fields = (VMStateField[]) {
Pablo Greco e6a3ae
+        VMSTATE_UINT8(env.exception_pending, X86CPU),
Pablo Greco e6a3ae
+        VMSTATE_UINT8(env.exception_injected, X86CPU),
Pablo Greco e6a3ae
+        VMSTATE_UINT8(env.exception_has_payload, X86CPU),
Pablo Greco e6a3ae
+        VMSTATE_UINT64(env.exception_payload, X86CPU),
Pablo Greco e6a3ae
+        VMSTATE_END_OF_LIST()
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+};
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
 static const VMStateDescription vmstate_steal_time_msr = {
Pablo Greco e6a3ae
     .name = "cpu/steal_time_msr",
Pablo Greco e6a3ae
     .version_id = 1,
Pablo Greco e6a3ae
@@ -1219,7 +1300,7 @@ VMStateDescription vmstate_x86_cpu = {
Pablo Greco e6a3ae
         VMSTATE_INT32(env.interrupt_injected, X86CPU),
Pablo Greco e6a3ae
         VMSTATE_UINT32(env.mp_state, X86CPU),
Pablo Greco e6a3ae
         VMSTATE_UINT64(env.tsc, X86CPU),
Pablo Greco e6a3ae
-        VMSTATE_INT32(env.exception_injected, X86CPU),
Pablo Greco e6a3ae
+        VMSTATE_INT32(env.exception_nr, X86CPU),
Pablo Greco e6a3ae
         VMSTATE_UINT8(env.soft_interrupt, X86CPU),
Pablo Greco e6a3ae
         VMSTATE_UINT8(env.nmi_injected, X86CPU),
Pablo Greco e6a3ae
         VMSTATE_UINT8(env.nmi_pending, X86CPU),
Pablo Greco e6a3ae
@@ -1243,6 +1324,7 @@ VMStateDescription vmstate_x86_cpu = {
Pablo Greco e6a3ae
         /* The above list is not sorted /wrt version numbers, watch out! */
Pablo Greco e6a3ae
     },
Pablo Greco e6a3ae
     .subsections = (const VMStateDescription*[]) {
Pablo Greco e6a3ae
+        &vmstate_exception_info,
Pablo Greco e6a3ae
         &vmstate_async_pf_msr,
Pablo Greco e6a3ae
         &vmstate_pv_eoi_msr,
Pablo Greco e6a3ae
         &vmstate_steal_time_msr,
Pablo Greco e6a3ae
-- 
Pablo Greco e6a3ae
1.8.3.1
Pablo Greco e6a3ae