|
|
c401cc |
From aa779e5b9449f52c9a7de83d63fd7f3e2cccbf7a Mon Sep 17 00:00:00 2001
|
|
|
c401cc |
Message-Id: <aa779e5b9449f52c9a7de83d63fd7f3e2cccbf7a@dist-git>
|
|
|
c401cc |
From: Martin Kletzander <mkletzan@redhat.com>
|
|
|
c401cc |
Date: Fri, 7 Feb 2014 11:39:30 +0100
|
|
|
c401cc |
Subject: [PATCH] qemu: keep pre-migration domain state after failed migration
|
|
|
c401cc |
|
|
|
c401cc |
Couple of codepaths shared the same code which can be moved out to a
|
|
|
c401cc |
function and on one of such places, qemuMigrationConfirmPhase(), the
|
|
|
c401cc |
domain was resumed even if it wasn't running before the migration
|
|
|
c401cc |
started.
|
|
|
c401cc |
|
|
|
c401cc |
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1057407
|
|
|
c401cc |
|
|
|
c401cc |
Signed-off-by: Martin Kletzander <mkletzan@redhat.com>
|
|
|
c401cc |
(cherry picked from commit 440a1aa508f7abec635a035158e9b37e179f2db2)
|
|
|
c401cc |
|
|
|
c401cc |
Conflicts:
|
|
|
c401cc |
src/qemu/qemu_migration.c -- 6ffce0f6
|
|
|
c401cc |
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
|
|
|
c401cc |
---
|
|
|
c401cc |
src/qemu/qemu_domain.h | 3 +-
|
|
|
c401cc |
src/qemu/qemu_migration.c | 112 +++++++++++++++++++++++++---------------------
|
|
|
c401cc |
2 files changed, 63 insertions(+), 52 deletions(-)
|
|
|
c401cc |
|
|
|
c401cc |
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
|
|
|
c401cc |
index 04f08a3..9735761 100644
|
|
|
c401cc |
--- a/src/qemu/qemu_domain.h
|
|
|
c401cc |
+++ b/src/qemu/qemu_domain.h
|
|
|
c401cc |
@@ -1,7 +1,7 @@
|
|
|
c401cc |
/*
|
|
|
c401cc |
* qemu_domain.h: QEMU domain private state
|
|
|
c401cc |
*
|
|
|
c401cc |
- * Copyright (C) 2006-2013 Red Hat, Inc.
|
|
|
c401cc |
+ * Copyright (C) 2006-2014 Red Hat, Inc.
|
|
|
c401cc |
* Copyright (C) 2006 Daniel P. Berrange
|
|
|
c401cc |
*
|
|
|
c401cc |
* This library is free software; you can redistribute it and/or
|
|
|
c401cc |
@@ -161,6 +161,7 @@ struct _qemuDomainObjPrivate {
|
|
|
c401cc |
char *origname;
|
|
|
c401cc |
int nbdPort; /* Port used for migration with NBD */
|
|
|
c401cc |
unsigned short migrationPort;
|
|
|
c401cc |
+ int preMigrationState;
|
|
|
c401cc |
|
|
|
c401cc |
virChrdevsPtr devs;
|
|
|
c401cc |
|
|
|
c401cc |
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
|
|
|
c401cc |
index 8fd3c9e..03ae9e4 100644
|
|
|
c401cc |
--- a/src/qemu/qemu_migration.c
|
|
|
c401cc |
+++ b/src/qemu/qemu_migration.c
|
|
|
c401cc |
@@ -1078,6 +1078,53 @@ error:
|
|
|
c401cc |
return NULL;
|
|
|
c401cc |
}
|
|
|
c401cc |
|
|
|
c401cc |
+static void
|
|
|
c401cc |
+qemuMigrationStoreDomainState(virDomainObjPtr vm)
|
|
|
c401cc |
+{
|
|
|
c401cc |
+ qemuDomainObjPrivatePtr priv = vm->privateData;
|
|
|
c401cc |
+ priv->preMigrationState = virDomainObjGetState(vm, NULL);
|
|
|
c401cc |
+
|
|
|
c401cc |
+ VIR_DEBUG("Storing pre-migration state=%d domain=%p",
|
|
|
c401cc |
+ priv->preMigrationState, vm);
|
|
|
c401cc |
+}
|
|
|
c401cc |
+
|
|
|
c401cc |
+/* Returns true if the domain was resumed, false otherwise */
|
|
|
c401cc |
+static bool
|
|
|
c401cc |
+qemuMigrationRestoreDomainState(virConnectPtr conn, virDomainObjPtr vm)
|
|
|
c401cc |
+{
|
|
|
c401cc |
+ virQEMUDriverPtr driver = conn->privateData;
|
|
|
c401cc |
+ qemuDomainObjPrivatePtr priv = vm->privateData;
|
|
|
c401cc |
+ int state = virDomainObjGetState(vm, NULL);
|
|
|
c401cc |
+ bool ret = false;
|
|
|
c401cc |
+
|
|
|
c401cc |
+ VIR_DEBUG("driver=%p, vm=%p, pre-mig-state=%d, state=%d",
|
|
|
c401cc |
+ driver, vm, priv->preMigrationState, state);
|
|
|
c401cc |
+
|
|
|
c401cc |
+ if (state == VIR_DOMAIN_PAUSED &&
|
|
|
c401cc |
+ priv->preMigrationState == VIR_DOMAIN_RUNNING) {
|
|
|
c401cc |
+ /* This is basically the only restore possibility that's safe
|
|
|
c401cc |
+ * and we should attempt to do */
|
|
|
c401cc |
+
|
|
|
c401cc |
+ VIR_DEBUG("Restoring pre-migration state due to migration error");
|
|
|
c401cc |
+
|
|
|
c401cc |
+ /* we got here through some sort of failure; start the domain again */
|
|
|
c401cc |
+ if (qemuProcessStartCPUs(driver, vm, conn,
|
|
|
c401cc |
+ VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
|
|
|
c401cc |
+ QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
|
|
|
c401cc |
+ /* Hm, we already know we are in error here. We don't want to
|
|
|
c401cc |
+ * overwrite the previous error, though, so we just throw something
|
|
|
c401cc |
+ * to the logs and hope for the best */
|
|
|
c401cc |
+ VIR_ERROR(_("Failed to resume guest %s after failure"), vm->def->name);
|
|
|
c401cc |
+ goto cleanup;
|
|
|
c401cc |
+ }
|
|
|
c401cc |
+ ret = true;
|
|
|
c401cc |
+ }
|
|
|
c401cc |
+
|
|
|
c401cc |
+ cleanup:
|
|
|
c401cc |
+ priv->preMigrationState = VIR_DOMAIN_NOSTATE;
|
|
|
c401cc |
+ return ret;
|
|
|
c401cc |
+}
|
|
|
c401cc |
+
|
|
|
c401cc |
/**
|
|
|
c401cc |
* qemuMigrationStartNBDServer:
|
|
|
c401cc |
* @driver: qemu driver
|
|
|
c401cc |
@@ -2079,6 +2126,8 @@ qemuMigrationBegin(virConnectPtr conn,
|
|
|
c401cc |
asyncJob = QEMU_ASYNC_JOB_NONE;
|
|
|
c401cc |
}
|
|
|
c401cc |
|
|
|
c401cc |
+ qemuMigrationStoreDomainState(vm);
|
|
|
c401cc |
+
|
|
|
c401cc |
if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) {
|
|
|
c401cc |
virReportError(VIR_ERR_OPERATION_INVALID,
|
|
|
c401cc |
"%s", _("domain is not running"));
|
|
|
c401cc |
@@ -2750,22 +2799,12 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver,
|
|
|
c401cc |
/* cancel any outstanding NBD jobs */
|
|
|
c401cc |
qemuMigrationCancelDriveMirror(mig, driver, vm);
|
|
|
c401cc |
|
|
|
c401cc |
- /* run 'cont' on the destination, which allows migration on qemu
|
|
|
c401cc |
- * >= 0.10.6 to work properly. This isn't strictly necessary on
|
|
|
c401cc |
- * older qemu's, but it also doesn't hurt anything there
|
|
|
c401cc |
- */
|
|
|
c401cc |
- if (qemuProcessStartCPUs(driver, vm, conn,
|
|
|
c401cc |
- VIR_DOMAIN_RUNNING_MIGRATED,
|
|
|
c401cc |
- QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
|
|
|
c401cc |
- if (virGetLastError() == NULL)
|
|
|
c401cc |
- virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
c401cc |
- "%s", _("resume operation failed"));
|
|
|
c401cc |
- goto cleanup;
|
|
|
c401cc |
+ if (qemuMigrationRestoreDomainState(conn, vm)) {
|
|
|
c401cc |
+ event = virDomainEventNewFromObj(vm,
|
|
|
c401cc |
+ VIR_DOMAIN_EVENT_RESUMED,
|
|
|
c401cc |
+ VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
|
|
|
c401cc |
}
|
|
|
c401cc |
|
|
|
c401cc |
- event = virDomainEventNewFromObj(vm,
|
|
|
c401cc |
- VIR_DOMAIN_EVENT_RESUMED,
|
|
|
c401cc |
- VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
|
|
|
c401cc |
if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) {
|
|
|
c401cc |
VIR_WARN("Failed to save status on vm %s", vm->def->name);
|
|
|
c401cc |
goto cleanup;
|
|
|
c401cc |
@@ -4063,7 +4102,6 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
|
|
|
c401cc |
{
|
|
|
c401cc |
virDomainEventPtr event = NULL;
|
|
|
c401cc |
int ret = -1;
|
|
|
c401cc |
- int resume = 0;
|
|
|
c401cc |
virErrorPtr orig_err = NULL;
|
|
|
c401cc |
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
|
|
|
c401cc |
bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);
|
|
|
c401cc |
@@ -4083,7 +4121,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
|
|
|
c401cc |
if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def))
|
|
|
c401cc |
goto endjob;
|
|
|
c401cc |
|
|
|
c401cc |
- resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING;
|
|
|
c401cc |
+ qemuMigrationStoreDomainState(vm);
|
|
|
c401cc |
|
|
|
c401cc |
if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) {
|
|
|
c401cc |
ret = doPeer2PeerMigrate(driver, conn, vm, xmlin,
|
|
|
c401cc |
@@ -4110,25 +4148,12 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
|
|
|
c401cc |
VIR_DOMAIN_EVENT_STOPPED,
|
|
|
c401cc |
VIR_DOMAIN_EVENT_STOPPED_MIGRATED);
|
|
|
c401cc |
}
|
|
|
c401cc |
- resume = 0;
|
|
|
c401cc |
|
|
|
c401cc |
endjob:
|
|
|
c401cc |
if (ret < 0)
|
|
|
c401cc |
orig_err = virSaveLastError();
|
|
|
c401cc |
|
|
|
c401cc |
- if (resume && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
|
|
|
c401cc |
- /* we got here through some sort of failure; start the domain again */
|
|
|
c401cc |
- if (qemuProcessStartCPUs(driver, vm, conn,
|
|
|
c401cc |
- VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
|
|
|
c401cc |
- QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
|
|
|
c401cc |
- /* Hm, we already know we are in error here. We don't want to
|
|
|
c401cc |
- * overwrite the previous error, though, so we just throw something
|
|
|
c401cc |
- * to the logs and hope for the best
|
|
|
c401cc |
- */
|
|
|
c401cc |
- VIR_ERROR(_("Failed to resume guest %s after failure"),
|
|
|
c401cc |
- vm->def->name);
|
|
|
c401cc |
- }
|
|
|
c401cc |
-
|
|
|
c401cc |
+ if (qemuMigrationRestoreDomainState(conn, vm)) {
|
|
|
c401cc |
event = virDomainEventNewFromObj(vm,
|
|
|
c401cc |
VIR_DOMAIN_EVENT_RESUMED,
|
|
|
c401cc |
VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
|
|
|
c401cc |
@@ -4177,7 +4202,6 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver,
|
|
|
c401cc |
{
|
|
|
c401cc |
virDomainEventPtr event = NULL;
|
|
|
c401cc |
int ret = -1;
|
|
|
c401cc |
- bool resume;
|
|
|
c401cc |
bool hasrefs;
|
|
|
c401cc |
|
|
|
c401cc |
/* If we didn't start the job in the begin phase, start it now. */
|
|
|
c401cc |
@@ -4192,32 +4216,18 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver,
|
|
|
c401cc |
virCloseCallbacksUnset(driver->closeCallbacks, vm,
|
|
|
c401cc |
qemuMigrationCleanup);
|
|
|
c401cc |
|
|
|
c401cc |
- resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING;
|
|
|
c401cc |
ret = doNativeMigrate(driver, vm, uri, cookiein, cookieinlen,
|
|
|
c401cc |
cookieout, cookieoutlen,
|
|
|
c401cc |
flags, resource, NULL, graphicsuri);
|
|
|
c401cc |
|
|
|
c401cc |
- if (ret < 0 && resume &&
|
|
|
c401cc |
- virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
|
|
|
c401cc |
- /* we got here through some sort of failure; start the domain again */
|
|
|
c401cc |
- if (qemuProcessStartCPUs(driver, vm, conn,
|
|
|
c401cc |
- VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
|
|
|
c401cc |
- QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
|
|
|
c401cc |
- /* Hm, we already know we are in error here. We don't want to
|
|
|
c401cc |
- * overwrite the previous error, though, so we just throw something
|
|
|
c401cc |
- * to the logs and hope for the best
|
|
|
c401cc |
- */
|
|
|
c401cc |
- VIR_ERROR(_("Failed to resume guest %s after failure"),
|
|
|
c401cc |
- vm->def->name);
|
|
|
c401cc |
+ if (ret < 0) {
|
|
|
c401cc |
+ if (qemuMigrationRestoreDomainState(conn, vm)) {
|
|
|
c401cc |
+ event = virDomainEventNewFromObj(vm,
|
|
|
c401cc |
+ VIR_DOMAIN_EVENT_RESUMED,
|
|
|
c401cc |
+ VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
|
|
|
c401cc |
}
|
|
|
c401cc |
-
|
|
|
c401cc |
- event = virDomainEventNewFromObj(vm,
|
|
|
c401cc |
- VIR_DOMAIN_EVENT_RESUMED,
|
|
|
c401cc |
- VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
|
|
|
c401cc |
- }
|
|
|
c401cc |
-
|
|
|
c401cc |
- if (ret < 0)
|
|
|
c401cc |
goto endjob;
|
|
|
c401cc |
+ }
|
|
|
c401cc |
|
|
|
c401cc |
qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE);
|
|
|
c401cc |
|
|
|
c401cc |
--
|
|
|
c401cc |
1.8.5.4
|
|
|
c401cc |
|