fbe740
From 84c5cad5921e96c6106cfd217de2064b64e1464f Mon Sep 17 00:00:00 2001
fbe740
Message-Id: <84c5cad5921e96c6106cfd217de2064b64e1464f@dist-git>
fbe740
From: Michal Privoznik <mprivozn@redhat.com>
fbe740
Date: Thu, 16 Jan 2020 10:03:54 +0100
fbe740
Subject: [PATCH] qemu: Stop domain on failed restore
fbe740
MIME-Version: 1.0
fbe740
Content-Type: text/plain; charset=UTF-8
fbe740
Content-Transfer-Encoding: 8bit
fbe740
fbe740
When resuming a domain from a save file, we read the domain XML
fbe740
from the file, add it onto our internal list of domains, start
fbe740
the qemu process, let it load the incoming migration stream and
fbe740
resume its vCPUs afterwards. If anything goes wrong, the domain
fbe740
object is removed from the list of domains and error is returned
fbe740
to the caller. However, the qemu process might be left behind -
fbe740
if resuming vCPUs fails (e.g. because qemu is unable to acquire
fbe740
write lock on a disk) then due to a bug the qemu process is not
fbe740
killed but the domain object is removed from the list.
fbe740
fbe740
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1718707
fbe740
fbe740
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
fbe740
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
fbe740
(cherry picked from commit 4c581527d431939a63be70c201b4ddab703cddbe)
fbe740
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
fbe740
Message-Id: <4048f92488a8b8c31c7a17a14b579840a9492328.1579165329.git.mprivozn@redhat.com>
fbe740
Reviewed-by: Ján Tomko <jtomko@redhat.com>
fbe740
---
fbe740
 src/qemu/qemu_driver.c | 23 ++++++++++++-----------
fbe740
 1 file changed, 12 insertions(+), 11 deletions(-)
fbe740
fbe740
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
fbe740
index ce9b1772c1..217d873671 100644
fbe740
--- a/src/qemu/qemu_driver.c
fbe740
+++ b/src/qemu/qemu_driver.c
fbe740
@@ -6800,7 +6800,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
fbe740
 {
fbe740
     qemuDomainObjPrivatePtr priv = vm->privateData;
fbe740
     int ret = -1;
fbe740
-    bool restored = false;
fbe740
+    bool started = false;
fbe740
     virObjectEventPtr event;
fbe740
     VIR_AUTOCLOSE intermediatefd = -1;
fbe740
     g_autoptr(virCommand) cmd = NULL;
fbe740
@@ -6808,6 +6808,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
fbe740
     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
fbe740
     virQEMUSaveHeaderPtr header = &data->header;
fbe740
     g_autoptr(qemuDomainSaveCookie) cookie = NULL;
fbe740
+    int rc = 0;
fbe740
 
fbe740
     if (virSaveCookieParseString(data->cookie, (virObjectPtr *)&cookie,
fbe740
                                  virDomainXMLOptionGetSaveCookie(driver->xmlopt)) < 0)
fbe740
@@ -6848,12 +6849,12 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
fbe740
                          VIR_NETDEV_VPORT_PROFILE_OP_RESTORE,
fbe740
                          VIR_QEMU_PROCESS_START_PAUSED |
fbe740
                          VIR_QEMU_PROCESS_START_GEN_VMID) == 0)
fbe740
-        restored = true;
fbe740
+        started = true;
fbe740
 
fbe740
     if (intermediatefd != -1) {
fbe740
         virErrorPtr orig_err = NULL;
fbe740
 
fbe740
-        if (!restored) {
fbe740
+        if (!started) {
fbe740
             /* if there was an error setting up qemu, the intermediate
fbe740
              * process will wait forever to write to stdout, so we
fbe740
              * must manually kill it and ignore any error related to
fbe740
@@ -6864,21 +6865,17 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
fbe740
             VIR_FORCE_CLOSE(*fd);
fbe740
         }
fbe740
 
fbe740
-        if (virCommandWait(cmd, NULL) < 0) {
fbe740
-            qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, 0);
fbe740
-            restored = false;
fbe740
-        }
fbe740
+        rc = virCommandWait(cmd, NULL);
fbe740
         VIR_DEBUG("Decompression binary stderr: %s", NULLSTR(errbuf));
fbe740
-
fbe740
         virErrorRestore(&orig_err);
fbe740
     }
fbe740
     if (VIR_CLOSE(*fd) < 0) {
fbe740
         virReportSystemError(errno, _("cannot close file: %s"), path);
fbe740
-        restored = false;
fbe740
+        rc = -1;
fbe740
     }
fbe740
 
fbe740
-    virDomainAuditStart(vm, "restored", restored);
fbe740
-    if (!restored)
fbe740
+    virDomainAuditStart(vm, "restored", started);
fbe740
+    if (!started || rc < 0)
fbe740
         goto cleanup;
fbe740
 
fbe740
     /* qemuProcessStart doesn't unset the qemu error reporting infrastructure
fbe740
@@ -6918,6 +6915,10 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
fbe740
     ret = 0;
fbe740
 
fbe740
  cleanup:
fbe740
+    if (ret < 0 && started) {
fbe740
+        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED,
fbe740
+                        asyncJob, VIR_QEMU_PROCESS_STOP_MIGRATED);
fbe740
+    }
fbe740
     if (qemuSecurityRestoreSavedStateLabel(driver, vm, path) < 0)
fbe740
         VIR_WARN("failed to restore save state label on %s", path);
fbe740
     return ret;
fbe740
-- 
fbe740
2.25.0
fbe740