99cbc7
From 5d260e0603146e1bdb3246498126745ab1a4245a Mon Sep 17 00:00:00 2001
99cbc7
Message-Id: <5d260e0603146e1bdb3246498126745ab1a4245a@dist-git>
99cbc7
From: Jiri Denemark <jdenemar@redhat.com>
99cbc7
Date: Thu, 15 Nov 2018 11:16:43 +0100
99cbc7
Subject: [PATCH] qemu: Fix post-copy migration on the source
99cbc7
MIME-Version: 1.0
99cbc7
Content-Type: text/plain; charset=UTF-8
99cbc7
Content-Transfer-Encoding: 8bit
99cbc7
99cbc7
Post-copy migration has been broken on the source since commit
99cbc7
v3.8.0-245-g32c29f10db which implemented support for
99cbc7
pause-before-switchover QEMU migration capability.
99cbc7
99cbc7
Even though the migration itself went well, the source did not really
99cbc7
know when it switched to the post-copy mode despite the messages logged
99cbc7
by MIGRATION event handler. As a result of this, the events emitted by
99cbc7
source libvirtd were not accurate and statistics of the completed
99cbc7
migration would cover only the pre-copy part of migration. Moreover, if
99cbc7
migration failed during the post-copy phase for some reason, the source
99cbc7
libvirtd would just happily resume the domain, which could lead to disk
99cbc7
corruption.
99cbc7
99cbc7
With the pause-before-switchover capability enabled, the order of events
99cbc7
emitted by QEMU changed:
99cbc7
99cbc7
                    pause-before-switchover
99cbc7
           disabled                        enabled
99cbc7
    MIGRATION, postcopy-active      STOP
99cbc7
    STOP                            MIGRATION, pre-switchover
99cbc7
                                    MIGRATION, postcopy-active
99cbc7
99cbc7
The STOP even handler checks the migration status (postcopy-active) and
99cbc7
sets the domain state accordingly. Which is sufficient when
99cbc7
pause-before-switchover is disabled, but once we enable it, the
99cbc7
migration status is still active when we get STOP from QEMU. Thus the
99cbc7
domain state set in the STOP handler has to be corrected once we are
99cbc7
notified that migration changed to postcopy-active.
99cbc7
99cbc7
This results in two SUSPENDED events to be emitted by the source
99cbc7
libvirtd during post-copy migration. The first one with
99cbc7
VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED detail, while the second one reports
99cbc7
the corrected VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY detail. This is
99cbc7
inevitable because we don't know whether migration will eventually
99cbc7
switch to post-copy at the time we emit the first event.
99cbc7
99cbc7
https://bugzilla.redhat.com/show_bug.cgi?id=1647365
99cbc7
99cbc7
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
99cbc7
Reviewed-by: Ján Tomko <jtomko@redhat.com>
99cbc7
(cherry picked from commit eca9d21e6cc8129ec4426fbf1ace30e215b9cfbc)
99cbc7
99cbc7
https://bugzilla.redhat.com/show_bug.cgi?id=1649169
99cbc7
https://bugzilla.redhat.com/show_bug.cgi?id=1654732
99cbc7
99cbc7
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
99cbc7
---
99cbc7
 src/qemu/qemu_process.c | 26 +++++++++++++++++++++++++-
99cbc7
 1 file changed, 25 insertions(+), 1 deletion(-)
99cbc7
99cbc7
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
99cbc7
index 9b5cb93325..485e455a44 100644
99cbc7
--- a/src/qemu/qemu_process.c
99cbc7
+++ b/src/qemu/qemu_process.c
99cbc7
@@ -1521,9 +1521,13 @@ static int
99cbc7
 qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
99cbc7
                                  virDomainObjPtr vm,
99cbc7
                                  int status,
99cbc7
-                                 void *opaque ATTRIBUTE_UNUSED)
99cbc7
+                                 void *opaque)
99cbc7
 {
99cbc7
     qemuDomainObjPrivatePtr priv;
99cbc7
+    virQEMUDriverPtr driver = opaque;
99cbc7
+    virObjectEventPtr event = NULL;
99cbc7
+    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
99cbc7
+    int reason;
99cbc7
 
99cbc7
     virObjectLock(vm);
99cbc7
 
99cbc7
@@ -1540,8 +1544,28 @@ qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
99cbc7
     priv->job.current->stats.mig.status = status;
99cbc7
     virDomainObjBroadcast(vm);
99cbc7
 
99cbc7
+    if (status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY &&
99cbc7
+        virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
99cbc7
+        reason == VIR_DOMAIN_PAUSED_MIGRATION) {
99cbc7
+        VIR_DEBUG("Correcting paused state reason for domain %s to %s",
99cbc7
+                  vm->def->name,
99cbc7
+                  virDomainPausedReasonTypeToString(VIR_DOMAIN_PAUSED_POSTCOPY));
99cbc7
+
99cbc7
+        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_POSTCOPY);
99cbc7
+        event = virDomainEventLifecycleNewFromObj(vm,
99cbc7
+                                                  VIR_DOMAIN_EVENT_SUSPENDED,
99cbc7
+                                                  VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY);
99cbc7
+
99cbc7
+        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
99cbc7
+            VIR_WARN("Unable to save status on vm %s after state change",
99cbc7
+                     vm->def->name);
99cbc7
+        }
99cbc7
+    }
99cbc7
+
99cbc7
  cleanup:
99cbc7
     virObjectUnlock(vm);
99cbc7
+    virObjectEventStateQueue(driver->domainEventState, event);
99cbc7
+    virObjectUnref(cfg);
99cbc7
     return 0;
99cbc7
 }
99cbc7
 
99cbc7
-- 
99cbc7
2.21.0
99cbc7