35ebd4
From 35c8afb44903ae12239323873af0c0376082b02b Mon Sep 17 00:00:00 2001
35ebd4
Message-Id: <35c8afb44903ae12239323873af0c0376082b02b@dist-git>
35ebd4
From: Jiri Denemark <jdenemar@redhat.com>
35ebd4
Date: Thu, 15 Nov 2018 11:16:43 +0100
35ebd4
Subject: [PATCH] qemu: Fix post-copy migration on the source
35ebd4
MIME-Version: 1.0
35ebd4
Content-Type: text/plain; charset=UTF-8
35ebd4
Content-Transfer-Encoding: 8bit
35ebd4
35ebd4
Post-copy migration has been broken on the source since commit
35ebd4
v3.8.0-245-g32c29f10db which implemented support for
35ebd4
pause-before-switchover QEMU migration capability.
35ebd4
35ebd4
Even though the migration itself went well, the source did not really
35ebd4
know when it switched to the post-copy mode despite the messages logged
35ebd4
by MIGRATION event handler. As a result of this, the events emitted by
35ebd4
source libvirtd were not accurate and statistics of the completed
35ebd4
migration would cover only the pre-copy part of migration. Moreover, if
35ebd4
migration failed during the post-copy phase for some reason, the source
35ebd4
libvirtd would just happily resume the domain, which could lead to disk
35ebd4
corruption.
35ebd4
35ebd4
With the pause-before-switchover capability enabled, the order of events
35ebd4
emitted by QEMU changed:
35ebd4
35ebd4
                    pause-before-switchover
35ebd4
           disabled                        enabled
35ebd4
    MIGRATION, postcopy-active      STOP
35ebd4
    STOP                            MIGRATION, pre-switchover
35ebd4
                                    MIGRATION, postcopy-active
35ebd4
35ebd4
The STOP even handler checks the migration status (postcopy-active) and
35ebd4
sets the domain state accordingly. Which is sufficient when
35ebd4
pause-before-switchover is disabled, but once we enable it, the
35ebd4
migration status is still active when we get STOP from QEMU. Thus the
35ebd4
domain state set in the STOP handler has to be corrected once we are
35ebd4
notified that migration changed to postcopy-active.
35ebd4
35ebd4
This results in two SUSPENDED events to be emitted by the source
35ebd4
libvirtd during post-copy migration. The first one with
35ebd4
VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED detail, while the second one reports
35ebd4
the corrected VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY detail. This is
35ebd4
inevitable because we don't know whether migration will eventually
35ebd4
switch to post-copy at the time we emit the first event.
35ebd4
35ebd4
https://bugzilla.redhat.com/show_bug.cgi?id=1647365
35ebd4
35ebd4
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
35ebd4
Reviewed-by: Ján Tomko <jtomko@redhat.com>
35ebd4
(cherry picked from commit eca9d21e6cc8129ec4426fbf1ace30e215b9cfbc)
35ebd4
35ebd4
https://bugzilla.redhat.com/show_bug.cgi?id=1649169
35ebd4
https://bugzilla.redhat.com/show_bug.cgi?id=1654732
35ebd4
35ebd4
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
35ebd4
---
35ebd4
 src/qemu/qemu_process.c | 26 +++++++++++++++++++++++++-
35ebd4
 1 file changed, 25 insertions(+), 1 deletion(-)
35ebd4
35ebd4
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
35ebd4
index 9b5cb93325..485e455a44 100644
35ebd4
--- a/src/qemu/qemu_process.c
35ebd4
+++ b/src/qemu/qemu_process.c
35ebd4
@@ -1521,9 +1521,13 @@ static int
35ebd4
 qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
35ebd4
                                  virDomainObjPtr vm,
35ebd4
                                  int status,
35ebd4
-                                 void *opaque ATTRIBUTE_UNUSED)
35ebd4
+                                 void *opaque)
35ebd4
 {
35ebd4
     qemuDomainObjPrivatePtr priv;
35ebd4
+    virQEMUDriverPtr driver = opaque;
35ebd4
+    virObjectEventPtr event = NULL;
35ebd4
+    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
35ebd4
+    int reason;
35ebd4
 
35ebd4
     virObjectLock(vm);
35ebd4
 
35ebd4
@@ -1540,8 +1544,28 @@ qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
35ebd4
     priv->job.current->stats.mig.status = status;
35ebd4
     virDomainObjBroadcast(vm);
35ebd4
 
35ebd4
+    if (status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY &&
35ebd4
+        virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
35ebd4
+        reason == VIR_DOMAIN_PAUSED_MIGRATION) {
35ebd4
+        VIR_DEBUG("Correcting paused state reason for domain %s to %s",
35ebd4
+                  vm->def->name,
35ebd4
+                  virDomainPausedReasonTypeToString(VIR_DOMAIN_PAUSED_POSTCOPY));
35ebd4
+
35ebd4
+        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_POSTCOPY);
35ebd4
+        event = virDomainEventLifecycleNewFromObj(vm,
35ebd4
+                                                  VIR_DOMAIN_EVENT_SUSPENDED,
35ebd4
+                                                  VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY);
35ebd4
+
35ebd4
+        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
35ebd4
+            VIR_WARN("Unable to save status on vm %s after state change",
35ebd4
+                     vm->def->name);
35ebd4
+        }
35ebd4
+    }
35ebd4
+
35ebd4
  cleanup:
35ebd4
     virObjectUnlock(vm);
35ebd4
+    virObjectEventStateQueue(driver->domainEventState, event);
35ebd4
+    virObjectUnref(cfg);
35ebd4
     return 0;
35ebd4
 }
35ebd4
 
35ebd4
-- 
35ebd4
2.20.1
35ebd4