Blame SOURCES/kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch

28c80a
From f9494422b9c4abe8f7cfea0ecee729bbc618de02 Mon Sep 17 00:00:00 2001
28c80a
From: David Hildenbrand <david@redhat.com>
28c80a
Date: Fri, 21 Dec 2018 15:39:56 +0100
28c80a
Subject: [PATCH 12/14] s390x/tod: Properly stop the KVM TOD while the guest is
28c80a
 not running
28c80a
28c80a
RH-Author: David Hildenbrand <david@redhat.com>
28c80a
Message-id: <20181221153957.28183-12-david@redhat.com>
28c80a
Patchwork-id: 83765
28c80a
O-Subject: [RHEL-7.6.z qemu-kvm-ma PATCH 11/12] s390x/tod: Properly stop the KVM TOD while the guest is not running
28c80a
Bugzilla: 1672920
28c80a
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
28c80a
RH-Acked-by: Thomas Huth <thuth@redhat.com>
28c80a
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
28c80a
28c80a
Just like on other architectures, we should stop the clock while the guest
28c80a
is not running. This is already properly done for TCG. Right now, doing an
28c80a
offline migration (stop, migrate, cont) can easily trigger stalls in the
28c80a
guest.
28c80a
28c80a
Even doing a
28c80a
    (hmp) stop
28c80a
    ... wait 2 minutes ...
28c80a
    (hmp) cont
28c80a
will already trigger stalls.
28c80a
28c80a
So whenever the guest stops, backup the KVM TOD. When continuing to run
28c80a
the guest, restore the KVM TOD.
28c80a
28c80a
One special case is starting a simple VM: Reading the TOD from KVM to
28c80a
stop it right away until the guest is actually started means that the
28c80a
time of any simple VM will already differ to the host time. We can
28c80a
simply leave the TOD running and the guest won't be able to recognize
28c80a
it.
28c80a
28c80a
For migration, we actually want to keep the TOD stopped until really
28c80a
starting the guest. To be able to catch most errors, we should however
28c80a
try to set the TOD in addition to simply storing it. So we can still
28c80a
catch basic migration problems.
28c80a
28c80a
If anything goes wrong while backing up/restoring the TOD, we have to
28c80a
ignore it (but print a warning). This is then basically a fallback to
28c80a
old behavior (TOD remains running).
28c80a
28c80a
I tested this very basically with an initrd:
28c80a
    1. Start a simple VM. Observed that the TOD is kept running. Old
28c80a
       behavior.
28c80a
    2. Ordinary live migration. Observed that the TOD is temporarily
28c80a
       stopped on the destination when setting the new value and
28c80a
       correctly started when finally starting the guest.
28c80a
    3. Offline live migration. (stop, migrate, cont). Observed that the
28c80a
       TOD will be stopped on the source with the "stop" command. On the
28c80a
       destination, the TOD is temporarily stopped when setting the new
28c80a
       value and correctly started when finally starting the guest via
28c80a
       "cont".
28c80a
    4. Simple stop/cont correctly stops/starts the TOD. (multiple stops
28c80a
       or conts in a row have no effect, so works as expected)
28c80a
28c80a
In the future, we might want to send the guest a special kind of time sync
28c80a
interrupt under some conditions, so it can synchronize its tod to the
28c80a
host tod. This is interesting for migration scenarios but also when we
28c80a
get time sync interrupts ourselves. This however will most probably have
28c80a
to be handled in KVM (e.g. when the tods differ too much) and is not
28c80a
desired e.g. when debugging the guest (single stepping should not
28c80a
result in permanent time syncs). I consider something like that an add-on
28c80a
on top of this basic "don't break the guest" handling.
28c80a
28c80a
Signed-off-by: David Hildenbrand <david@redhat.com>
28c80a
Message-Id: <20181130094957.4121-1-david@redhat.com>
28c80a
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
28c80a
Reviewed-by: Thomas Huth <thuth@redhat.com>
28c80a
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
28c80a
(cherry picked from commit 9bc9d3d1ae3bcd1caaad1946494726b52f58b291)
28c80a
Signed-off-by: David Hildenbrand <david@redhat.com>
28c80a
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
28c80a
---
28c80a
 hw/s390x/tod-kvm.c     | 102 ++++++++++++++++++++++++++++++++++++++++++++++++-
28c80a
 include/hw/s390x/tod.h |   8 +++-
28c80a
 2 files changed, 107 insertions(+), 3 deletions(-)
28c80a
28c80a
diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c
28c80a
index df564ab..2456bf7 100644
28c80a
--- a/hw/s390x/tod-kvm.c
28c80a
+++ b/hw/s390x/tod-kvm.c
28c80a
@@ -10,10 +10,11 @@
28c80a
 
28c80a
 #include "qemu/osdep.h"
28c80a
 #include "qapi/error.h"
28c80a
+#include "sysemu/sysemu.h"
28c80a
 #include "hw/s390x/tod.h"
28c80a
 #include "kvm_s390x.h"
28c80a
 
28c80a
-static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp)
28c80a
+static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp)
28c80a
 {
28c80a
     int r;
28c80a
 
28c80a
@@ -27,7 +28,17 @@ static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp)
28c80a
     }
28c80a
 }
28c80a
 
28c80a
-static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp)
28c80a
+static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp)
28c80a
+{
28c80a
+    if (td->stopped) {
28c80a
+        *tod = td->base;
28c80a
+        return;
28c80a
+    }
28c80a
+
28c80a
+    kvm_s390_get_tod_raw(tod, errp);
28c80a
+}
28c80a
+
28c80a
+static void kvm_s390_set_tod_raw(const S390TOD *tod, Error **errp)
28c80a
 {
28c80a
     int r;
28c80a
 
28c80a
@@ -41,18 +52,105 @@ static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp)
28c80a
     }
28c80a
 }
28c80a
 
28c80a
+static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp)
28c80a
+{
28c80a
+    Error *local_err = NULL;
28c80a
+
28c80a
+    /*
28c80a
+     * Somebody (e.g. migration) set the TOD. We'll store it into KVM to
28c80a
+     * properly detect errors now but take a look at the runstate to decide
28c80a
+     * whether really to keep the tod running. E.g. during migration, this
28c80a
+     * is the point where we want to stop the initially running TOD to fire
28c80a
+     * it back up when actually starting the migrated guest.
28c80a
+     */
28c80a
+    kvm_s390_set_tod_raw(tod, &local_err);
28c80a
+    if (local_err) {
28c80a
+        error_propagate(errp, local_err);
28c80a
+        return;
28c80a
+    }
28c80a
+
28c80a
+    if (runstate_is_running()) {
28c80a
+        td->stopped = false;
28c80a
+    } else {
28c80a
+        td->stopped = true;
28c80a
+        td->base = *tod;
28c80a
+    }
28c80a
+}
28c80a
+
28c80a
+static void kvm_s390_tod_vm_state_change(void *opaque, int running,
28c80a
+                                         RunState state)
28c80a
+{
28c80a
+    S390TODState *td = opaque;
28c80a
+    Error *local_err = NULL;
28c80a
+
28c80a
+    if (running && td->stopped) {
28c80a
+        /* Set the old TOD when running the VM - start the TOD clock. */
28c80a
+        kvm_s390_set_tod_raw(&td->base, &local_err);
28c80a
+        if (local_err) {
28c80a
+            warn_report_err(local_err);
28c80a
+        }
28c80a
+        /* Treat errors like the TOD was running all the time. */
28c80a
+        td->stopped = false;
28c80a
+    } else if (!running && !td->stopped) {
28c80a
+        /* Store the TOD when stopping the VM - stop the TOD clock. */
28c80a
+        kvm_s390_get_tod_raw(&td->base, &local_err);
28c80a
+        if (local_err) {
28c80a
+            /* Keep the TOD running in case we could not back it up. */
28c80a
+            warn_report_err(local_err);
28c80a
+        } else {
28c80a
+            td->stopped = true;
28c80a
+        }
28c80a
+    }
28c80a
+}
28c80a
+
28c80a
+static void kvm_s390_tod_realize(DeviceState *dev, Error **errp)
28c80a
+{
28c80a
+    S390TODState *td = S390_TOD(dev);
28c80a
+    S390TODClass *tdc = S390_TOD_GET_CLASS(td);
28c80a
+    Error *local_err = NULL;
28c80a
+
28c80a
+    tdc->parent_realize(dev, &local_err);
28c80a
+    if (local_err) {
28c80a
+        error_propagate(errp, local_err);
28c80a
+        return;
28c80a
+    }
28c80a
+
28c80a
+    /*
28c80a
+     * We need to know when the VM gets started/stopped to start/stop the TOD.
28c80a
+     * As we can never have more than one TOD instance (and that will never be
28c80a
+     * removed), registering here and never unregistering is good enough.
28c80a
+     */
28c80a
+    qemu_add_vm_change_state_handler(kvm_s390_tod_vm_state_change, td);
28c80a
+}
28c80a
+
28c80a
 static void kvm_s390_tod_class_init(ObjectClass *oc, void *data)
28c80a
 {
28c80a
     S390TODClass *tdc = S390_TOD_CLASS(oc);
28c80a
 
28c80a
+    device_class_set_parent_realize(DEVICE_CLASS(oc), kvm_s390_tod_realize,
28c80a
+                                    &tdc->parent_realize);
28c80a
     tdc->get = kvm_s390_tod_get;
28c80a
     tdc->set = kvm_s390_tod_set;
28c80a
 }
28c80a
 
28c80a
+static void kvm_s390_tod_init(Object *obj)
28c80a
+{
28c80a
+    S390TODState *td = S390_TOD(obj);
28c80a
+
28c80a
+    /*
28c80a
+     * The TOD is initially running (value stored in KVM). Avoid needless
28c80a
+     * loading/storing of the TOD when starting a simple VM, so let it
28c80a
+     * run although the (never started) VM is stopped. For migration, we
28c80a
+     * will properly set the TOD later.
28c80a
+     */
28c80a
+    td->stopped = false;
28c80a
+}
28c80a
+
28c80a
 static TypeInfo kvm_s390_tod_info = {
28c80a
     .name = TYPE_KVM_S390_TOD,
28c80a
     .parent = TYPE_S390_TOD,
28c80a
     .instance_size = sizeof(S390TODState),
28c80a
+    .instance_init = kvm_s390_tod_init,
28c80a
     .class_init = kvm_s390_tod_class_init,
28c80a
     .class_size = sizeof(S390TODClass),
28c80a
 };
28c80a
diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h
28c80a
index 413c0d7..cbd7552 100644
28c80a
--- a/include/hw/s390x/tod.h
28c80a
+++ b/include/hw/s390x/tod.h
28c80a
@@ -31,13 +31,19 @@ typedef struct S390TODState {
28c80a
     /* private */
28c80a
     DeviceState parent_obj;
28c80a
 
28c80a
-    /* unused by KVM implementation */
28c80a
+    /*
28c80a
+     * Used by TCG to remember the time base. Used by KVM to backup the TOD
28c80a
+     * while the TOD is stopped.
28c80a
+     */
28c80a
     S390TOD base;
28c80a
+    /* Used by KVM to remember if the TOD is stopped and base is valid. */
28c80a
+    bool stopped;
28c80a
 } S390TODState;
28c80a
 
28c80a
 typedef struct S390TODClass {
28c80a
     /* private */
28c80a
     DeviceClass parent_class;
28c80a
+    void (*parent_realize)(DeviceState *dev, Error **errp);
28c80a
 
28c80a
     /* public */
28c80a
     void (*get)(const S390TODState *td, S390TOD *tod, Error **errp);
28c80a
-- 
28c80a
1.8.3.1
28c80a