From aa779e5b9449f52c9a7de83d63fd7f3e2cccbf7a Mon Sep 17 00:00:00 2001 Message-Id: From: Martin Kletzander Date: Fri, 7 Feb 2014 11:39:30 +0100 Subject: [PATCH] qemu: keep pre-migration domain state after failed migration Couple of codepaths shared the same code which can be moved out to a function and on one of such places, qemuMigrationConfirmPhase(), the domain was resumed even if it wasn't running before the migration started. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1057407 Signed-off-by: Martin Kletzander (cherry picked from commit 440a1aa508f7abec635a035158e9b37e179f2db2) Conflicts: src/qemu/qemu_migration.c -- 6ffce0f6 Signed-off-by: Jiri Denemark --- src/qemu/qemu_domain.h | 3 +- src/qemu/qemu_migration.c | 112 +++++++++++++++++++++++++--------------------- 2 files changed, 63 insertions(+), 52 deletions(-) diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 04f08a3..9735761 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -1,7 +1,7 @@ /* * qemu_domain.h: QEMU domain private state * - * Copyright (C) 2006-2013 Red Hat, Inc. + * Copyright (C) 2006-2014 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -161,6 +161,7 @@ struct _qemuDomainObjPrivate { char *origname; int nbdPort; /* Port used for migration with NBD */ unsigned short migrationPort; + int preMigrationState; virChrdevsPtr devs; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 8fd3c9e..03ae9e4 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1078,6 +1078,53 @@ error: return NULL; } +static void +qemuMigrationStoreDomainState(virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + priv->preMigrationState = virDomainObjGetState(vm, NULL); + + VIR_DEBUG("Storing pre-migration state=%d domain=%p", + priv->preMigrationState, vm); +} + +/* Returns true if the domain was resumed, false otherwise */ +static bool +qemuMigrationRestoreDomainState(virConnectPtr conn, virDomainObjPtr vm) +{ + virQEMUDriverPtr driver = conn->privateData; + qemuDomainObjPrivatePtr priv = vm->privateData; + int state = virDomainObjGetState(vm, NULL); + bool ret = false; + + VIR_DEBUG("driver=%p, vm=%p, pre-mig-state=%d, state=%d", + driver, vm, priv->preMigrationState, state); + + if (state == VIR_DOMAIN_PAUSED && + priv->preMigrationState == VIR_DOMAIN_RUNNING) { + /* This is basically the only restore possibility that's safe + * and we should attempt to do */ + + VIR_DEBUG("Restoring pre-migration state due to migration error"); + + /* we got here through some sort of failure; start the domain again */ + if (qemuProcessStartCPUs(driver, vm, conn, + VIR_DOMAIN_RUNNING_MIGRATION_CANCELED, + QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) { + /* Hm, we already know we are in error here. We don't want to + * overwrite the previous error, though, so we just throw something + * to the logs and hope for the best */ + VIR_ERROR(_("Failed to resume guest %s after failure"), vm->def->name); + goto cleanup; + } + ret = true; + } + + cleanup: + priv->preMigrationState = VIR_DOMAIN_NOSTATE; + return ret; +} + /** * qemuMigrationStartNBDServer: * @driver: qemu driver @@ -2079,6 +2126,8 @@ qemuMigrationBegin(virConnectPtr conn, asyncJob = QEMU_ASYNC_JOB_NONE; } + qemuMigrationStoreDomainState(vm); + if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("domain is not running")); @@ -2750,22 +2799,12 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver, /* cancel any outstanding NBD jobs */ qemuMigrationCancelDriveMirror(mig, driver, vm); - /* run 'cont' on the destination, which allows migration on qemu - * >= 0.10.6 to work properly. This isn't strictly necessary on - * older qemu's, but it also doesn't hurt anything there - */ - if (qemuProcessStartCPUs(driver, vm, conn, - VIR_DOMAIN_RUNNING_MIGRATED, - QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) { - if (virGetLastError() == NULL) - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("resume operation failed")); - goto cleanup; + if (qemuMigrationRestoreDomainState(conn, vm)) { + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_RESUMED, + VIR_DOMAIN_EVENT_RESUMED_MIGRATED); } - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_RESUMED, - VIR_DOMAIN_EVENT_RESUMED_MIGRATED); if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) { VIR_WARN("Failed to save status on vm %s", vm->def->name); goto cleanup; @@ -4063,7 +4102,6 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver, { virDomainEventPtr event = NULL; int ret = -1; - int resume = 0; virErrorPtr orig_err = NULL; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR); @@ -4083,7 +4121,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver, if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def)) goto endjob; - resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING; + qemuMigrationStoreDomainState(vm); if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) { ret = doPeer2PeerMigrate(driver, conn, vm, xmlin, @@ -4110,25 +4148,12 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver, VIR_DOMAIN_EVENT_STOPPED, VIR_DOMAIN_EVENT_STOPPED_MIGRATED); } - resume = 0; endjob: if (ret < 0) orig_err = virSaveLastError(); - if (resume && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) { - /* we got here through some sort of failure; start the domain again */ - if (qemuProcessStartCPUs(driver, vm, conn, - VIR_DOMAIN_RUNNING_MIGRATION_CANCELED, - QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) { - /* Hm, we already know we are in error here. We don't want to - * overwrite the previous error, though, so we just throw something - * to the logs and hope for the best - */ - VIR_ERROR(_("Failed to resume guest %s after failure"), - vm->def->name); - } - + if (qemuMigrationRestoreDomainState(conn, vm)) { event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_RESUMED, VIR_DOMAIN_EVENT_RESUMED_MIGRATED); @@ -4177,7 +4202,6 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver, { virDomainEventPtr event = NULL; int ret = -1; - bool resume; bool hasrefs; /* If we didn't start the job in the begin phase, start it now. */ @@ -4192,32 +4216,18 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver, virCloseCallbacksUnset(driver->closeCallbacks, vm, qemuMigrationCleanup); - resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING; ret = doNativeMigrate(driver, vm, uri, cookiein, cookieinlen, cookieout, cookieoutlen, flags, resource, NULL, graphicsuri); - if (ret < 0 && resume && - virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) { - /* we got here through some sort of failure; start the domain again */ - if (qemuProcessStartCPUs(driver, vm, conn, - VIR_DOMAIN_RUNNING_MIGRATION_CANCELED, - QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) { - /* Hm, we already know we are in error here. We don't want to - * overwrite the previous error, though, so we just throw something - * to the logs and hope for the best - */ - VIR_ERROR(_("Failed to resume guest %s after failure"), - vm->def->name); + if (ret < 0) { + if (qemuMigrationRestoreDomainState(conn, vm)) { + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_RESUMED, + VIR_DOMAIN_EVENT_RESUMED_MIGRATED); } - - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_RESUMED, - VIR_DOMAIN_EVENT_RESUMED_MIGRATED); - } - - if (ret < 0) goto endjob; + } qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE); -- 1.8.5.4