Blame SOURCES/kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch

ae23c9
From 9552c25dc788925f211daaa55518e5144f8f0cc7 Mon Sep 17 00:00:00 2001
ae23c9
From: David Hildenbrand <david@redhat.com>
ae23c9
Date: Fri, 21 Dec 2018 15:36:13 +0000
ae23c9
Subject: [PATCH 11/22] s390x/tod: Properly stop the KVM TOD while the guest is
ae23c9
 not running
ae23c9
ae23c9
RH-Author: David Hildenbrand <david@redhat.com>
ae23c9
Message-id: <20181221153614.27961-12-david@redhat.com>
ae23c9
Patchwork-id: 83755
ae23c9
O-Subject: [RHEL-8.0 qemu-kvm v2 PATCH 11/12] s390x/tod: Properly stop the KVM TOD while the guest is not running
ae23c9
Bugzilla: 1653569
ae23c9
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
ae23c9
RH-Acked-by: Thomas Huth <thuth@redhat.com>
ae23c9
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
ae23c9
ae23c9
Just like on other architectures, we should stop the clock while the guest
ae23c9
is not running. This is already properly done for TCG. Right now, doing an
ae23c9
offline migration (stop, migrate, cont) can easily trigger stalls in the
ae23c9
guest.
ae23c9
ae23c9
Even doing a
ae23c9
    (hmp) stop
ae23c9
    ... wait 2 minutes ...
ae23c9
    (hmp) cont
ae23c9
will already trigger stalls.
ae23c9
ae23c9
So whenever the guest stops, backup the KVM TOD. When continuing to run
ae23c9
the guest, restore the KVM TOD.
ae23c9
ae23c9
One special case is starting a simple VM: Reading the TOD from KVM to
ae23c9
stop it right away until the guest is actually started means that the
ae23c9
time of any simple VM will already differ to the host time. We can
ae23c9
simply leave the TOD running and the guest won't be able to recognize
ae23c9
it.
ae23c9
ae23c9
For migration, we actually want to keep the TOD stopped until really
ae23c9
starting the guest. To be able to catch most errors, we should however
ae23c9
try to set the TOD in addition to simply storing it. So we can still
ae23c9
catch basic migration problems.
ae23c9
ae23c9
If anything goes wrong while backing up/restoring the TOD, we have to
ae23c9
ignore it (but print a warning). This is then basically a fallback to
ae23c9
old behavior (TOD remains running).
ae23c9
ae23c9
I tested this very basically with an initrd:
ae23c9
    1. Start a simple VM. Observed that the TOD is kept running. Old
ae23c9
       behavior.
ae23c9
    2. Ordinary live migration. Observed that the TOD is temporarily
ae23c9
       stopped on the destination when setting the new value and
ae23c9
       correctly started when finally starting the guest.
ae23c9
    3. Offline live migration. (stop, migrate, cont). Observed that the
ae23c9
       TOD will be stopped on the source with the "stop" command. On the
ae23c9
       destination, the TOD is temporarily stopped when setting the new
ae23c9
       value and correctly started when finally starting the guest via
ae23c9
       "cont".
ae23c9
    4. Simple stop/cont correctly stops/starts the TOD. (multiple stops
ae23c9
       or conts in a row have no effect, so works as expected)
ae23c9
ae23c9
In the future, we might want to send the guest a special kind of time sync
ae23c9
interrupt under some conditions, so it can synchronize its tod to the
ae23c9
host tod. This is interesting for migration scenarios but also when we
ae23c9
get time sync interrupts ourselves. This however will most probably have
ae23c9
to be handled in KVM (e.g. when the tods differ too much) and is not
ae23c9
desired e.g. when debugging the guest (single stepping should not
ae23c9
result in permanent time syncs). I consider something like that an add-on
ae23c9
on top of this basic "don't break the guest" handling.
ae23c9
ae23c9
Signed-off-by: David Hildenbrand <david@redhat.com>
ae23c9
Message-Id: <20181130094957.4121-1-david@redhat.com>
ae23c9
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
ae23c9
Reviewed-by: Thomas Huth <thuth@redhat.com>
ae23c9
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
ae23c9
(cherry picked from commit 9bc9d3d1ae3bcd1caaad1946494726b52f58b291)
ae23c9
Signed-off-by: David Hildenbrand <david@redhat.com>
ae23c9
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ae23c9
---
ae23c9
 hw/s390x/tod-kvm.c     | 102 ++++++++++++++++++++++++++++++++++++++++++++++++-
ae23c9
 include/hw/s390x/tod.h |   8 +++-
ae23c9
 2 files changed, 107 insertions(+), 3 deletions(-)
ae23c9
ae23c9
diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c
ae23c9
index df564ab..2456bf7 100644
ae23c9
--- a/hw/s390x/tod-kvm.c
ae23c9
+++ b/hw/s390x/tod-kvm.c
ae23c9
@@ -10,10 +10,11 @@
ae23c9
 
ae23c9
 #include "qemu/osdep.h"
ae23c9
 #include "qapi/error.h"
ae23c9
+#include "sysemu/sysemu.h"
ae23c9
 #include "hw/s390x/tod.h"
ae23c9
 #include "kvm_s390x.h"
ae23c9
 
ae23c9
-static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp)
ae23c9
+static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp)
ae23c9
 {
ae23c9
     int r;
ae23c9
 
ae23c9
@@ -27,7 +28,17 @@ static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp)
ae23c9
     }
ae23c9
 }
ae23c9
 
ae23c9
-static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp)
ae23c9
+static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp)
ae23c9
+{
ae23c9
+    if (td->stopped) {
ae23c9
+        *tod = td->base;
ae23c9
+        return;
ae23c9
+    }
ae23c9
+
ae23c9
+    kvm_s390_get_tod_raw(tod, errp);
ae23c9
+}
ae23c9
+
ae23c9
+static void kvm_s390_set_tod_raw(const S390TOD *tod, Error **errp)
ae23c9
 {
ae23c9
     int r;
ae23c9
 
ae23c9
@@ -41,18 +52,105 @@ static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp)
ae23c9
     }
ae23c9
 }
ae23c9
 
ae23c9
+static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp)
ae23c9
+{
ae23c9
+    Error *local_err = NULL;
ae23c9
+
ae23c9
+    /*
ae23c9
+     * Somebody (e.g. migration) set the TOD. We'll store it into KVM to
ae23c9
+     * properly detect errors now but take a look at the runstate to decide
ae23c9
+     * whether really to keep the tod running. E.g. during migration, this
ae23c9
+     * is the point where we want to stop the initially running TOD to fire
ae23c9
+     * it back up when actually starting the migrated guest.
ae23c9
+     */
ae23c9
+    kvm_s390_set_tod_raw(tod, &local_err);
ae23c9
+    if (local_err) {
ae23c9
+        error_propagate(errp, local_err);
ae23c9
+        return;
ae23c9
+    }
ae23c9
+
ae23c9
+    if (runstate_is_running()) {
ae23c9
+        td->stopped = false;
ae23c9
+    } else {
ae23c9
+        td->stopped = true;
ae23c9
+        td->base = *tod;
ae23c9
+    }
ae23c9
+}
ae23c9
+
ae23c9
+static void kvm_s390_tod_vm_state_change(void *opaque, int running,
ae23c9
+                                         RunState state)
ae23c9
+{
ae23c9
+    S390TODState *td = opaque;
ae23c9
+    Error *local_err = NULL;
ae23c9
+
ae23c9
+    if (running && td->stopped) {
ae23c9
+        /* Set the old TOD when running the VM - start the TOD clock. */
ae23c9
+        kvm_s390_set_tod_raw(&td->base, &local_err);
ae23c9
+        if (local_err) {
ae23c9
+            warn_report_err(local_err);
ae23c9
+        }
ae23c9
+        /* Treat errors like the TOD was running all the time. */
ae23c9
+        td->stopped = false;
ae23c9
+    } else if (!running && !td->stopped) {
ae23c9
+        /* Store the TOD when stopping the VM - stop the TOD clock. */
ae23c9
+        kvm_s390_get_tod_raw(&td->base, &local_err);
ae23c9
+        if (local_err) {
ae23c9
+            /* Keep the TOD running in case we could not back it up. */
ae23c9
+            warn_report_err(local_err);
ae23c9
+        } else {
ae23c9
+            td->stopped = true;
ae23c9
+        }
ae23c9
+    }
ae23c9
+}
ae23c9
+
ae23c9
+static void kvm_s390_tod_realize(DeviceState *dev, Error **errp)
ae23c9
+{
ae23c9
+    S390TODState *td = S390_TOD(dev);
ae23c9
+    S390TODClass *tdc = S390_TOD_GET_CLASS(td);
ae23c9
+    Error *local_err = NULL;
ae23c9
+
ae23c9
+    tdc->parent_realize(dev, &local_err);
ae23c9
+    if (local_err) {
ae23c9
+        error_propagate(errp, local_err);
ae23c9
+        return;
ae23c9
+    }
ae23c9
+
ae23c9
+    /*
ae23c9
+     * We need to know when the VM gets started/stopped to start/stop the TOD.
ae23c9
+     * As we can never have more than one TOD instance (and that will never be
ae23c9
+     * removed), registering here and never unregistering is good enough.
ae23c9
+     */
ae23c9
+    qemu_add_vm_change_state_handler(kvm_s390_tod_vm_state_change, td);
ae23c9
+}
ae23c9
+
ae23c9
 static void kvm_s390_tod_class_init(ObjectClass *oc, void *data)
ae23c9
 {
ae23c9
     S390TODClass *tdc = S390_TOD_CLASS(oc);
ae23c9
 
ae23c9
+    device_class_set_parent_realize(DEVICE_CLASS(oc), kvm_s390_tod_realize,
ae23c9
+                                    &tdc->parent_realize);
ae23c9
     tdc->get = kvm_s390_tod_get;
ae23c9
     tdc->set = kvm_s390_tod_set;
ae23c9
 }
ae23c9
 
ae23c9
+static void kvm_s390_tod_init(Object *obj)
ae23c9
+{
ae23c9
+    S390TODState *td = S390_TOD(obj);
ae23c9
+
ae23c9
+    /*
ae23c9
+     * The TOD is initially running (value stored in KVM). Avoid needless
ae23c9
+     * loading/storing of the TOD when starting a simple VM, so let it
ae23c9
+     * run although the (never started) VM is stopped. For migration, we
ae23c9
+     * will properly set the TOD later.
ae23c9
+     */
ae23c9
+    td->stopped = false;
ae23c9
+}
ae23c9
+
ae23c9
 static TypeInfo kvm_s390_tod_info = {
ae23c9
     .name = TYPE_KVM_S390_TOD,
ae23c9
     .parent = TYPE_S390_TOD,
ae23c9
     .instance_size = sizeof(S390TODState),
ae23c9
+    .instance_init = kvm_s390_tod_init,
ae23c9
     .class_init = kvm_s390_tod_class_init,
ae23c9
     .class_size = sizeof(S390TODClass),
ae23c9
 };
ae23c9
diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h
ae23c9
index 413c0d7..cbd7552 100644
ae23c9
--- a/include/hw/s390x/tod.h
ae23c9
+++ b/include/hw/s390x/tod.h
ae23c9
@@ -31,13 +31,19 @@ typedef struct S390TODState {
ae23c9
     /* private */
ae23c9
     DeviceState parent_obj;
ae23c9
 
ae23c9
-    /* unused by KVM implementation */
ae23c9
+    /*
ae23c9
+     * Used by TCG to remember the time base. Used by KVM to backup the TOD
ae23c9
+     * while the TOD is stopped.
ae23c9
+     */
ae23c9
     S390TOD base;
ae23c9
+    /* Used by KVM to remember if the TOD is stopped and base is valid. */
ae23c9
+    bool stopped;
ae23c9
 } S390TODState;
ae23c9
 
ae23c9
 typedef struct S390TODClass {
ae23c9
     /* private */
ae23c9
     DeviceClass parent_class;
ae23c9
+    void (*parent_realize)(DeviceState *dev, Error **errp);
ae23c9
 
ae23c9
     /* public */
ae23c9
     void (*get)(const S390TODState *td, S390TOD *tod, Error **errp);
ae23c9
-- 
ae23c9
1.8.3.1
ae23c9