|
|
af37ac |
From 35c8afb44903ae12239323873af0c0376082b02b Mon Sep 17 00:00:00 2001
|
|
|
af37ac |
Message-Id: <35c8afb44903ae12239323873af0c0376082b02b@dist-git>
|
|
|
af37ac |
From: Jiri Denemark <jdenemar@redhat.com>
|
|
|
af37ac |
Date: Thu, 15 Nov 2018 11:16:43 +0100
|
|
|
af37ac |
Subject: [PATCH] qemu: Fix post-copy migration on the source
|
|
|
af37ac |
MIME-Version: 1.0
|
|
|
af37ac |
Content-Type: text/plain; charset=UTF-8
|
|
|
af37ac |
Content-Transfer-Encoding: 8bit
|
|
|
af37ac |
|
|
|
af37ac |
Post-copy migration has been broken on the source since commit
|
|
|
af37ac |
v3.8.0-245-g32c29f10db which implemented support for
|
|
|
af37ac |
pause-before-switchover QEMU migration capability.
|
|
|
af37ac |
|
|
|
af37ac |
Even though the migration itself went well, the source did not really
|
|
|
af37ac |
know when it switched to the post-copy mode despite the messages logged
|
|
|
af37ac |
by MIGRATION event handler. As a result of this, the events emitted by
|
|
|
af37ac |
source libvirtd were not accurate and statistics of the completed
|
|
|
af37ac |
migration would cover only the pre-copy part of migration. Moreover, if
|
|
|
af37ac |
migration failed during the post-copy phase for some reason, the source
|
|
|
af37ac |
libvirtd would just happily resume the domain, which could lead to disk
|
|
|
af37ac |
corruption.
|
|
|
af37ac |
|
|
|
af37ac |
With the pause-before-switchover capability enabled, the order of events
|
|
|
af37ac |
emitted by QEMU changed:
|
|
|
af37ac |
|
|
|
af37ac |
pause-before-switchover
|
|
|
af37ac |
disabled enabled
|
|
|
af37ac |
MIGRATION, postcopy-active STOP
|
|
|
af37ac |
STOP MIGRATION, pre-switchover
|
|
|
af37ac |
MIGRATION, postcopy-active
|
|
|
af37ac |
|
|
|
af37ac |
The STOP even handler checks the migration status (postcopy-active) and
|
|
|
af37ac |
sets the domain state accordingly. Which is sufficient when
|
|
|
af37ac |
pause-before-switchover is disabled, but once we enable it, the
|
|
|
af37ac |
migration status is still active when we get STOP from QEMU. Thus the
|
|
|
af37ac |
domain state set in the STOP handler has to be corrected once we are
|
|
|
af37ac |
notified that migration changed to postcopy-active.
|
|
|
af37ac |
|
|
|
af37ac |
This results in two SUSPENDED events to be emitted by the source
|
|
|
af37ac |
libvirtd during post-copy migration. The first one with
|
|
|
af37ac |
VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED detail, while the second one reports
|
|
|
af37ac |
the corrected VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY detail. This is
|
|
|
af37ac |
inevitable because we don't know whether migration will eventually
|
|
|
af37ac |
switch to post-copy at the time we emit the first event.
|
|
|
af37ac |
|
|
|
af37ac |
https://bugzilla.redhat.com/show_bug.cgi?id=1647365
|
|
|
af37ac |
|
|
|
af37ac |
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
|
|
|
af37ac |
Reviewed-by: Ján Tomko <jtomko@redhat.com>
|
|
|
af37ac |
(cherry picked from commit eca9d21e6cc8129ec4426fbf1ace30e215b9cfbc)
|
|
|
af37ac |
|
|
|
af37ac |
https://bugzilla.redhat.com/show_bug.cgi?id=1649169
|
|
|
af37ac |
https://bugzilla.redhat.com/show_bug.cgi?id=1654732
|
|
|
af37ac |
|
|
|
af37ac |
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
|
|
|
af37ac |
---
|
|
|
af37ac |
src/qemu/qemu_process.c | 26 +++++++++++++++++++++++++-
|
|
|
af37ac |
1 file changed, 25 insertions(+), 1 deletion(-)
|
|
|
af37ac |
|
|
|
af37ac |
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
|
|
|
af37ac |
index 9b5cb93325..485e455a44 100644
|
|
|
af37ac |
--- a/src/qemu/qemu_process.c
|
|
|
af37ac |
+++ b/src/qemu/qemu_process.c
|
|
|
af37ac |
@@ -1521,9 +1521,13 @@ static int
|
|
|
af37ac |
qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
|
|
|
af37ac |
virDomainObjPtr vm,
|
|
|
af37ac |
int status,
|
|
|
af37ac |
- void *opaque ATTRIBUTE_UNUSED)
|
|
|
af37ac |
+ void *opaque)
|
|
|
af37ac |
{
|
|
|
af37ac |
qemuDomainObjPrivatePtr priv;
|
|
|
af37ac |
+ virQEMUDriverPtr driver = opaque;
|
|
|
af37ac |
+ virObjectEventPtr event = NULL;
|
|
|
af37ac |
+ virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
|
|
|
af37ac |
+ int reason;
|
|
|
af37ac |
|
|
|
af37ac |
virObjectLock(vm);
|
|
|
af37ac |
|
|
|
af37ac |
@@ -1540,8 +1544,28 @@ qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
|
|
|
af37ac |
priv->job.current->stats.mig.status = status;
|
|
|
af37ac |
virDomainObjBroadcast(vm);
|
|
|
af37ac |
|
|
|
af37ac |
+ if (status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY &&
|
|
|
af37ac |
+ virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
|
|
|
af37ac |
+ reason == VIR_DOMAIN_PAUSED_MIGRATION) {
|
|
|
af37ac |
+ VIR_DEBUG("Correcting paused state reason for domain %s to %s",
|
|
|
af37ac |
+ vm->def->name,
|
|
|
af37ac |
+ virDomainPausedReasonTypeToString(VIR_DOMAIN_PAUSED_POSTCOPY));
|
|
|
af37ac |
+
|
|
|
af37ac |
+ virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_POSTCOPY);
|
|
|
af37ac |
+ event = virDomainEventLifecycleNewFromObj(vm,
|
|
|
af37ac |
+ VIR_DOMAIN_EVENT_SUSPENDED,
|
|
|
af37ac |
+ VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY);
|
|
|
af37ac |
+
|
|
|
af37ac |
+ if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
|
|
|
af37ac |
+ VIR_WARN("Unable to save status on vm %s after state change",
|
|
|
af37ac |
+ vm->def->name);
|
|
|
af37ac |
+ }
|
|
|
af37ac |
+ }
|
|
|
af37ac |
+
|
|
|
af37ac |
cleanup:
|
|
|
af37ac |
virObjectUnlock(vm);
|
|
|
af37ac |
+ virObjectEventStateQueue(driver->domainEventState, event);
|
|
|
af37ac |
+ virObjectUnref(cfg);
|
|
|
af37ac |
return 0;
|
|
|
af37ac |
}
|
|
|
af37ac |
|
|
|
af37ac |
--
|
|
|
af37ac |
2.20.1
|
|
|
af37ac |
|