af37ac
From 35c8afb44903ae12239323873af0c0376082b02b Mon Sep 17 00:00:00 2001
af37ac
Message-Id: <35c8afb44903ae12239323873af0c0376082b02b@dist-git>
af37ac
From: Jiri Denemark <jdenemar@redhat.com>
af37ac
Date: Thu, 15 Nov 2018 11:16:43 +0100
af37ac
Subject: [PATCH] qemu: Fix post-copy migration on the source
af37ac
MIME-Version: 1.0
af37ac
Content-Type: text/plain; charset=UTF-8
af37ac
Content-Transfer-Encoding: 8bit
af37ac
af37ac
Post-copy migration has been broken on the source since commit
af37ac
v3.8.0-245-g32c29f10db which implemented support for
af37ac
pause-before-switchover QEMU migration capability.
af37ac
af37ac
Even though the migration itself went well, the source did not really
af37ac
know when it switched to the post-copy mode despite the messages logged
af37ac
by MIGRATION event handler. As a result of this, the events emitted by
af37ac
source libvirtd were not accurate and statistics of the completed
af37ac
migration would cover only the pre-copy part of migration. Moreover, if
af37ac
migration failed during the post-copy phase for some reason, the source
af37ac
libvirtd would just happily resume the domain, which could lead to disk
af37ac
corruption.
af37ac
af37ac
With the pause-before-switchover capability enabled, the order of events
af37ac
emitted by QEMU changed:
af37ac
af37ac
                    pause-before-switchover
af37ac
           disabled                        enabled
af37ac
    MIGRATION, postcopy-active      STOP
af37ac
    STOP                            MIGRATION, pre-switchover
af37ac
                                    MIGRATION, postcopy-active
af37ac
af37ac
The STOP even handler checks the migration status (postcopy-active) and
af37ac
sets the domain state accordingly. Which is sufficient when
af37ac
pause-before-switchover is disabled, but once we enable it, the
af37ac
migration status is still active when we get STOP from QEMU. Thus the
af37ac
domain state set in the STOP handler has to be corrected once we are
af37ac
notified that migration changed to postcopy-active.
af37ac
af37ac
This results in two SUSPENDED events to be emitted by the source
af37ac
libvirtd during post-copy migration. The first one with
af37ac
VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED detail, while the second one reports
af37ac
the corrected VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY detail. This is
af37ac
inevitable because we don't know whether migration will eventually
af37ac
switch to post-copy at the time we emit the first event.
af37ac
af37ac
https://bugzilla.redhat.com/show_bug.cgi?id=1647365
af37ac
af37ac
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
af37ac
Reviewed-by: Ján Tomko <jtomko@redhat.com>
af37ac
(cherry picked from commit eca9d21e6cc8129ec4426fbf1ace30e215b9cfbc)
af37ac
af37ac
https://bugzilla.redhat.com/show_bug.cgi?id=1649169
af37ac
https://bugzilla.redhat.com/show_bug.cgi?id=1654732
af37ac
af37ac
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
af37ac
---
af37ac
 src/qemu/qemu_process.c | 26 +++++++++++++++++++++++++-
af37ac
 1 file changed, 25 insertions(+), 1 deletion(-)
af37ac
af37ac
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
af37ac
index 9b5cb93325..485e455a44 100644
af37ac
--- a/src/qemu/qemu_process.c
af37ac
+++ b/src/qemu/qemu_process.c
af37ac
@@ -1521,9 +1521,13 @@ static int
af37ac
 qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
af37ac
                                  virDomainObjPtr vm,
af37ac
                                  int status,
af37ac
-                                 void *opaque ATTRIBUTE_UNUSED)
af37ac
+                                 void *opaque)
af37ac
 {
af37ac
     qemuDomainObjPrivatePtr priv;
af37ac
+    virQEMUDriverPtr driver = opaque;
af37ac
+    virObjectEventPtr event = NULL;
af37ac
+    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
af37ac
+    int reason;
af37ac
 
af37ac
     virObjectLock(vm);
af37ac
 
af37ac
@@ -1540,8 +1544,28 @@ qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
af37ac
     priv->job.current->stats.mig.status = status;
af37ac
     virDomainObjBroadcast(vm);
af37ac
 
af37ac
+    if (status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY &&
af37ac
+        virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
af37ac
+        reason == VIR_DOMAIN_PAUSED_MIGRATION) {
af37ac
+        VIR_DEBUG("Correcting paused state reason for domain %s to %s",
af37ac
+                  vm->def->name,
af37ac
+                  virDomainPausedReasonTypeToString(VIR_DOMAIN_PAUSED_POSTCOPY));
af37ac
+
af37ac
+        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_POSTCOPY);
af37ac
+        event = virDomainEventLifecycleNewFromObj(vm,
af37ac
+                                                  VIR_DOMAIN_EVENT_SUSPENDED,
af37ac
+                                                  VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY);
af37ac
+
af37ac
+        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
af37ac
+            VIR_WARN("Unable to save status on vm %s after state change",
af37ac
+                     vm->def->name);
af37ac
+        }
af37ac
+    }
af37ac
+
af37ac
  cleanup:
af37ac
     virObjectUnlock(vm);
af37ac
+    virObjectEventStateQueue(driver->domainEventState, event);
af37ac
+    virObjectUnref(cfg);
af37ac
     return 0;
af37ac
 }
af37ac
 
af37ac
-- 
af37ac
2.20.1
af37ac