9c6c51
From 91a37e3641afbd29067cd945ca14a6572e4d4897 Mon Sep 17 00:00:00 2001
9c6c51
Message-Id: <91a37e3641afbd29067cd945ca14a6572e4d4897@dist-git>
9c6c51
From: Jiri Denemark <jdenemar@redhat.com>
9c6c51
Date: Thu, 15 Nov 2018 11:16:43 +0100
9c6c51
Subject: [PATCH] qemu: Fix post-copy migration on the source
9c6c51
MIME-Version: 1.0
9c6c51
Content-Type: text/plain; charset=UTF-8
9c6c51
Content-Transfer-Encoding: 8bit
9c6c51
9c6c51
Post-copy migration has been broken on the source since commit
9c6c51
v3.8.0-245-g32c29f10db which implemented support for
9c6c51
pause-before-switchover QEMU migration capability.
9c6c51
9c6c51
Even though the migration itself went well, the source did not really
9c6c51
know when it switched to the post-copy mode despite the messages logged
9c6c51
by MIGRATION event handler. As a result of this, the events emitted by
9c6c51
source libvirtd were not accurate and statistics of the completed
9c6c51
migration would cover only the pre-copy part of migration. Moreover, if
9c6c51
migration failed during the post-copy phase for some reason, the source
9c6c51
libvirtd would just happily resume the domain, which could lead to disk
9c6c51
corruption.
9c6c51
9c6c51
With the pause-before-switchover capability enabled, the order of events
9c6c51
emitted by QEMU changed:
9c6c51
9c6c51
                    pause-before-switchover
9c6c51
           disabled                        enabled
9c6c51
    MIGRATION, postcopy-active      STOP
9c6c51
    STOP                            MIGRATION, pre-switchover
9c6c51
                                    MIGRATION, postcopy-active
9c6c51
9c6c51
The STOP even handler checks the migration status (postcopy-active) and
9c6c51
sets the domain state accordingly. Which is sufficient when
9c6c51
pause-before-switchover is disabled, but once we enable it, the
9c6c51
migration status is still active when we get STOP from QEMU. Thus the
9c6c51
domain state set in the STOP handler has to be corrected once we are
9c6c51
notified that migration changed to postcopy-active.
9c6c51
9c6c51
This results in two SUSPENDED events to be emitted by the source
9c6c51
libvirtd during post-copy migration. The first one with
9c6c51
VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED detail, while the second one reports
9c6c51
the corrected VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY detail. This is
9c6c51
inevitable because we don't know whether migration will eventually
9c6c51
switch to post-copy at the time we emit the first event.
9c6c51
9c6c51
https://bugzilla.redhat.com/show_bug.cgi?id=1647365
9c6c51
9c6c51
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
9c6c51
Reviewed-by: Ján Tomko <jtomko@redhat.com>
9c6c51
(cherry picked from commit eca9d21e6cc8129ec4426fbf1ace30e215b9cfbc)
9c6c51
9c6c51
https://bugzilla.redhat.com/show_bug.cgi?id=1649169
9c6c51
https://bugzilla.redhat.com/show_bug.cgi?id=1654732
9c6c51
9c6c51
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
9c6c51
---
9c6c51
 src/qemu/qemu_process.c | 26 +++++++++++++++++++++++++-
9c6c51
 1 file changed, 25 insertions(+), 1 deletion(-)
9c6c51
9c6c51
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
9c6c51
index 4b99fbd835..2d2954ba18 100644
9c6c51
--- a/src/qemu/qemu_process.c
9c6c51
+++ b/src/qemu/qemu_process.c
9c6c51
@@ -1522,9 +1522,13 @@ static int
9c6c51
 qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
9c6c51
                                  virDomainObjPtr vm,
9c6c51
                                  int status,
9c6c51
-                                 void *opaque ATTRIBUTE_UNUSED)
9c6c51
+                                 void *opaque)
9c6c51
 {
9c6c51
     qemuDomainObjPrivatePtr priv;
9c6c51
+    virQEMUDriverPtr driver = opaque;
9c6c51
+    virObjectEventPtr event = NULL;
9c6c51
+    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
9c6c51
+    int reason;
9c6c51
 
9c6c51
     virObjectLock(vm);
9c6c51
 
9c6c51
@@ -1541,8 +1545,28 @@ qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
9c6c51
     priv->job.current->stats.mig.status = status;
9c6c51
     virDomainObjBroadcast(vm);
9c6c51
 
9c6c51
+    if (status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY &&
9c6c51
+        virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
9c6c51
+        reason == VIR_DOMAIN_PAUSED_MIGRATION) {
9c6c51
+        VIR_DEBUG("Correcting paused state reason for domain %s to %s",
9c6c51
+                  vm->def->name,
9c6c51
+                  virDomainPausedReasonTypeToString(VIR_DOMAIN_PAUSED_POSTCOPY));
9c6c51
+
9c6c51
+        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_POSTCOPY);
9c6c51
+        event = virDomainEventLifecycleNewFromObj(vm,
9c6c51
+                                                  VIR_DOMAIN_EVENT_SUSPENDED,
9c6c51
+                                                  VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY);
9c6c51
+
9c6c51
+        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
9c6c51
+            VIR_WARN("Unable to save status on vm %s after state change",
9c6c51
+                     vm->def->name);
9c6c51
+        }
9c6c51
+    }
9c6c51
+
9c6c51
  cleanup:
9c6c51
     virObjectUnlock(vm);
9c6c51
+    virObjectEventStateQueue(driver->domainEventState, event);
9c6c51
+    virObjectUnref(cfg);
9c6c51
     return 0;
9c6c51
 }
9c6c51
 
9c6c51
-- 
9c6c51
2.19.2
9c6c51