|
|
0a7476 |
From 5d260e0603146e1bdb3246498126745ab1a4245a Mon Sep 17 00:00:00 2001
|
|
|
0a7476 |
Message-Id: <5d260e0603146e1bdb3246498126745ab1a4245a@dist-git>
|
|
|
35ebd4 |
From: Jiri Denemark <jdenemar@redhat.com>
|
|
|
35ebd4 |
Date: Thu, 15 Nov 2018 11:16:43 +0100
|
|
|
35ebd4 |
Subject: [PATCH] qemu: Fix post-copy migration on the source
|
|
|
35ebd4 |
MIME-Version: 1.0
|
|
|
35ebd4 |
Content-Type: text/plain; charset=UTF-8
|
|
|
35ebd4 |
Content-Transfer-Encoding: 8bit
|
|
|
35ebd4 |
|
|
|
35ebd4 |
Post-copy migration has been broken on the source since commit
|
|
|
35ebd4 |
v3.8.0-245-g32c29f10db which implemented support for
|
|
|
35ebd4 |
pause-before-switchover QEMU migration capability.
|
|
|
35ebd4 |
|
|
|
35ebd4 |
Even though the migration itself went well, the source did not really
|
|
|
35ebd4 |
know when it switched to the post-copy mode despite the messages logged
|
|
|
35ebd4 |
by MIGRATION event handler. As a result of this, the events emitted by
|
|
|
35ebd4 |
source libvirtd were not accurate and statistics of the completed
|
|
|
35ebd4 |
migration would cover only the pre-copy part of migration. Moreover, if
|
|
|
35ebd4 |
migration failed during the post-copy phase for some reason, the source
|
|
|
35ebd4 |
libvirtd would just happily resume the domain, which could lead to disk
|
|
|
35ebd4 |
corruption.
|
|
|
35ebd4 |
|
|
|
35ebd4 |
With the pause-before-switchover capability enabled, the order of events
|
|
|
35ebd4 |
emitted by QEMU changed:
|
|
|
35ebd4 |
|
|
|
35ebd4 |
pause-before-switchover
|
|
|
35ebd4 |
disabled enabled
|
|
|
35ebd4 |
MIGRATION, postcopy-active STOP
|
|
|
35ebd4 |
STOP MIGRATION, pre-switchover
|
|
|
35ebd4 |
MIGRATION, postcopy-active
|
|
|
35ebd4 |
|
|
|
35ebd4 |
The STOP even handler checks the migration status (postcopy-active) and
|
|
|
35ebd4 |
sets the domain state accordingly. Which is sufficient when
|
|
|
35ebd4 |
pause-before-switchover is disabled, but once we enable it, the
|
|
|
35ebd4 |
migration status is still active when we get STOP from QEMU. Thus the
|
|
|
35ebd4 |
domain state set in the STOP handler has to be corrected once we are
|
|
|
35ebd4 |
notified that migration changed to postcopy-active.
|
|
|
35ebd4 |
|
|
|
35ebd4 |
This results in two SUSPENDED events to be emitted by the source
|
|
|
35ebd4 |
libvirtd during post-copy migration. The first one with
|
|
|
35ebd4 |
VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED detail, while the second one reports
|
|
|
35ebd4 |
the corrected VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY detail. This is
|
|
|
35ebd4 |
inevitable because we don't know whether migration will eventually
|
|
|
35ebd4 |
switch to post-copy at the time we emit the first event.
|
|
|
35ebd4 |
|
|
|
35ebd4 |
https://bugzilla.redhat.com/show_bug.cgi?id=1647365
|
|
|
35ebd4 |
|
|
|
35ebd4 |
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
|
|
|
35ebd4 |
Reviewed-by: Ján Tomko <jtomko@redhat.com>
|
|
|
35ebd4 |
(cherry picked from commit eca9d21e6cc8129ec4426fbf1ace30e215b9cfbc)
|
|
|
35ebd4 |
|
|
|
35ebd4 |
https://bugzilla.redhat.com/show_bug.cgi?id=1649169
|
|
|
35ebd4 |
https://bugzilla.redhat.com/show_bug.cgi?id=1654732
|
|
|
35ebd4 |
|
|
|
35ebd4 |
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
|
|
|
35ebd4 |
---
|
|
|
35ebd4 |
src/qemu/qemu_process.c | 26 +++++++++++++++++++++++++-
|
|
|
35ebd4 |
1 file changed, 25 insertions(+), 1 deletion(-)
|
|
|
35ebd4 |
|
|
|
35ebd4 |
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
|
|
|
35ebd4 |
index 9b5cb93325..485e455a44 100644
|
|
|
35ebd4 |
--- a/src/qemu/qemu_process.c
|
|
|
35ebd4 |
+++ b/src/qemu/qemu_process.c
|
|
|
35ebd4 |
@@ -1521,9 +1521,13 @@ static int
|
|
|
35ebd4 |
qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
|
|
|
35ebd4 |
virDomainObjPtr vm,
|
|
|
35ebd4 |
int status,
|
|
|
35ebd4 |
- void *opaque ATTRIBUTE_UNUSED)
|
|
|
35ebd4 |
+ void *opaque)
|
|
|
35ebd4 |
{
|
|
|
35ebd4 |
qemuDomainObjPrivatePtr priv;
|
|
|
35ebd4 |
+ virQEMUDriverPtr driver = opaque;
|
|
|
35ebd4 |
+ virObjectEventPtr event = NULL;
|
|
|
35ebd4 |
+ virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
|
|
|
35ebd4 |
+ int reason;
|
|
|
35ebd4 |
|
|
|
35ebd4 |
virObjectLock(vm);
|
|
|
35ebd4 |
|
|
|
35ebd4 |
@@ -1540,8 +1544,28 @@ qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
|
|
|
35ebd4 |
priv->job.current->stats.mig.status = status;
|
|
|
35ebd4 |
virDomainObjBroadcast(vm);
|
|
|
35ebd4 |
|
|
|
35ebd4 |
+ if (status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY &&
|
|
|
35ebd4 |
+ virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
|
|
|
35ebd4 |
+ reason == VIR_DOMAIN_PAUSED_MIGRATION) {
|
|
|
35ebd4 |
+ VIR_DEBUG("Correcting paused state reason for domain %s to %s",
|
|
|
35ebd4 |
+ vm->def->name,
|
|
|
35ebd4 |
+ virDomainPausedReasonTypeToString(VIR_DOMAIN_PAUSED_POSTCOPY));
|
|
|
35ebd4 |
+
|
|
|
35ebd4 |
+ virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_POSTCOPY);
|
|
|
35ebd4 |
+ event = virDomainEventLifecycleNewFromObj(vm,
|
|
|
35ebd4 |
+ VIR_DOMAIN_EVENT_SUSPENDED,
|
|
|
35ebd4 |
+ VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY);
|
|
|
35ebd4 |
+
|
|
|
35ebd4 |
+ if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
|
|
|
35ebd4 |
+ VIR_WARN("Unable to save status on vm %s after state change",
|
|
|
35ebd4 |
+ vm->def->name);
|
|
|
35ebd4 |
+ }
|
|
|
35ebd4 |
+ }
|
|
|
35ebd4 |
+
|
|
|
35ebd4 |
cleanup:
|
|
|
35ebd4 |
virObjectUnlock(vm);
|
|
|
35ebd4 |
+ virObjectEventStateQueue(driver->domainEventState, event);
|
|
|
35ebd4 |
+ virObjectUnref(cfg);
|
|
|
35ebd4 |
return 0;
|
|
|
35ebd4 |
}
|
|
|
35ebd4 |
|
|
|
35ebd4 |
--
|
|
|
0a7476 |
2.21.0
|
|
|
35ebd4 |
|