a41c76
From 84c5cad5921e96c6106cfd217de2064b64e1464f Mon Sep 17 00:00:00 2001
a41c76
Message-Id: <84c5cad5921e96c6106cfd217de2064b64e1464f@dist-git>
a41c76
From: Michal Privoznik <mprivozn@redhat.com>
a41c76
Date: Thu, 16 Jan 2020 10:03:54 +0100
a41c76
Subject: [PATCH] qemu: Stop domain on failed restore
a41c76
MIME-Version: 1.0
a41c76
Content-Type: text/plain; charset=UTF-8
a41c76
Content-Transfer-Encoding: 8bit
a41c76
a41c76
When resuming a domain from a save file, we read the domain XML
a41c76
from the file, add it onto our internal list of domains, start
a41c76
the qemu process, let it load the incoming migration stream and
a41c76
resume its vCPUs afterwards. If anything goes wrong, the domain
a41c76
object is removed from the list of domains and error is returned
a41c76
to the caller. However, the qemu process might be left behind -
a41c76
if resuming vCPUs fails (e.g. because qemu is unable to acquire
a41c76
write lock on a disk) then due to a bug the qemu process is not
a41c76
killed but the domain object is removed from the list.
a41c76
a41c76
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1718707
a41c76
a41c76
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
a41c76
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
a41c76
(cherry picked from commit 4c581527d431939a63be70c201b4ddab703cddbe)
a41c76
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
a41c76
Message-Id: <4048f92488a8b8c31c7a17a14b579840a9492328.1579165329.git.mprivozn@redhat.com>
a41c76
Reviewed-by: Ján Tomko <jtomko@redhat.com>
a41c76
---
a41c76
 src/qemu/qemu_driver.c | 23 ++++++++++++-----------
a41c76
 1 file changed, 12 insertions(+), 11 deletions(-)
a41c76
a41c76
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
a41c76
index ce9b1772c1..217d873671 100644
a41c76
--- a/src/qemu/qemu_driver.c
a41c76
+++ b/src/qemu/qemu_driver.c
a41c76
@@ -6800,7 +6800,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
a41c76
 {
a41c76
     qemuDomainObjPrivatePtr priv = vm->privateData;
a41c76
     int ret = -1;
a41c76
-    bool restored = false;
a41c76
+    bool started = false;
a41c76
     virObjectEventPtr event;
a41c76
     VIR_AUTOCLOSE intermediatefd = -1;
a41c76
     g_autoptr(virCommand) cmd = NULL;
a41c76
@@ -6808,6 +6808,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
a41c76
     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
a41c76
     virQEMUSaveHeaderPtr header = &data->header;
a41c76
     g_autoptr(qemuDomainSaveCookie) cookie = NULL;
a41c76
+    int rc = 0;
a41c76
 
a41c76
     if (virSaveCookieParseString(data->cookie, (virObjectPtr *)&cookie,
a41c76
                                  virDomainXMLOptionGetSaveCookie(driver->xmlopt)) < 0)
a41c76
@@ -6848,12 +6849,12 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
a41c76
                          VIR_NETDEV_VPORT_PROFILE_OP_RESTORE,
a41c76
                          VIR_QEMU_PROCESS_START_PAUSED |
a41c76
                          VIR_QEMU_PROCESS_START_GEN_VMID) == 0)
a41c76
-        restored = true;
a41c76
+        started = true;
a41c76
 
a41c76
     if (intermediatefd != -1) {
a41c76
         virErrorPtr orig_err = NULL;
a41c76
 
a41c76
-        if (!restored) {
a41c76
+        if (!started) {
a41c76
             /* if there was an error setting up qemu, the intermediate
a41c76
              * process will wait forever to write to stdout, so we
a41c76
              * must manually kill it and ignore any error related to
a41c76
@@ -6864,21 +6865,17 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
a41c76
             VIR_FORCE_CLOSE(*fd);
a41c76
         }
a41c76
 
a41c76
-        if (virCommandWait(cmd, NULL) < 0) {
a41c76
-            qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, 0);
a41c76
-            restored = false;
a41c76
-        }
a41c76
+        rc = virCommandWait(cmd, NULL);
a41c76
         VIR_DEBUG("Decompression binary stderr: %s", NULLSTR(errbuf));
a41c76
-
a41c76
         virErrorRestore(&orig_err);
a41c76
     }
a41c76
     if (VIR_CLOSE(*fd) < 0) {
a41c76
         virReportSystemError(errno, _("cannot close file: %s"), path);
a41c76
-        restored = false;
a41c76
+        rc = -1;
a41c76
     }
a41c76
 
a41c76
-    virDomainAuditStart(vm, "restored", restored);
a41c76
-    if (!restored)
a41c76
+    virDomainAuditStart(vm, "restored", started);
a41c76
+    if (!started || rc < 0)
a41c76
         goto cleanup;
a41c76
 
a41c76
     /* qemuProcessStart doesn't unset the qemu error reporting infrastructure
a41c76
@@ -6918,6 +6915,10 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
a41c76
     ret = 0;
a41c76
 
a41c76
  cleanup:
a41c76
+    if (ret < 0 && started) {
a41c76
+        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED,
a41c76
+                        asyncJob, VIR_QEMU_PROCESS_STOP_MIGRATED);
a41c76
+    }
a41c76
     if (qemuSecurityRestoreSavedStateLabel(driver, vm, path) < 0)
a41c76
         VIR_WARN("failed to restore save state label on %s", path);
a41c76
     return ret;
a41c76
-- 
a41c76
2.25.0
a41c76