d759b5
From 3e06f98a83d5f23c345a71e48115659a83f673b5 Mon Sep 17 00:00:00 2001
d759b5
Message-Id: <3e06f98a83d5f23c345a71e48115659a83f673b5@dist-git>
d759b5
From: Christian Ehrhardt <christian.ehrhardt@canonical.com>
d759b5
Date: Wed, 4 Dec 2019 16:18:13 +0100
d759b5
Subject: [PATCH] process: wait longer on kill per assigned Hostdev
d759b5
MIME-Version: 1.0
d759b5
Content-Type: text/plain; charset=UTF-8
d759b5
Content-Transfer-Encoding: 8bit
d759b5
d759b5
It was found that in cases with host devices virProcessKillPainfully
d759b5
might be able to send signal zero to the target PID for quite a while
d759b5
with the process already being gone from /proc/<PID>.
d759b5
d759b5
That is due to cleanup and reset of devices which might include a
d759b5
secondary bus reset that on top of the actions taken has a 1s delay
d759b5
to let the bus settle. Due to that guests with plenty of Host devices
d759b5
could easily exceed the default timeouts.
d759b5
d759b5
To solve that, this adds an extra delay of 2s per hostdev that is associated
d759b5
to a VM.
d759b5
d759b5
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
d759b5
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
d759b5
(cherry picked from commit be2ca0444728edd12a000653d3693d68a5c9102f)
d759b5
d759b5
https://bugzilla.redhat.com/show_bug.cgi?id=1771204
d759b5
d759b5
Signed-off-by: Ján Tomko <jtomko@redhat.com>
d759b5
Message-Id: <6d18f7131fe022cb2ee4613ad4c58faefa9d574c.1575472548.git.jtomko@redhat.com>
d759b5
Reviewed-by: Andrea Bolognani <abologna@redhat.com>
d759b5
---
d759b5
 src/libvirt_private.syms |  1 +
d759b5
 src/qemu/qemu_process.c  |  7 +++++--
d759b5
 src/util/virprocess.c    | 20 +++++++++++++++++---
d759b5
 src/util/virprocess.h    |  3 +++
d759b5
 4 files changed, 26 insertions(+), 5 deletions(-)
d759b5
d759b5
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
d759b5
index 53dbce7cfe..2a3950fd35 100644
d759b5
--- a/src/libvirt_private.syms
d759b5
+++ b/src/libvirt_private.syms
d759b5
@@ -2623,6 +2623,7 @@ virProcessGetPids;
d759b5
 virProcessGetStartTime;
d759b5
 virProcessKill;
d759b5
 virProcessKillPainfully;
d759b5
+virProcessKillPainfullyDelay;
d759b5
 virProcessNamespaceAvailable;
d759b5
 virProcessRunInMountNamespace;
d759b5
 virProcessSchedPolicyTypeFromString;
d759b5
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
d759b5
index 0add197af5..d813c59c0d 100644
d759b5
--- a/src/qemu/qemu_process.c
d759b5
+++ b/src/qemu/qemu_process.c
d759b5
@@ -6937,8 +6937,11 @@ qemuProcessKill(virDomainObjPtr vm, unsigned int flags)
d759b5
         return 0;
d759b5
     }
d759b5
 
d759b5
-    ret = virProcessKillPainfully(vm->pid,
d759b5
-                                  !!(flags & VIR_QEMU_PROCESS_KILL_FORCE));
d759b5
+    /* Request an extra delay of two seconds per current nhostdevs
d759b5
+     * to be safe against stalls by the kernel freeing up the resources */
d759b5
+    ret = virProcessKillPainfullyDelay(vm->pid,
d759b5
+                                       !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
d759b5
+                                       vm->def->nhostdevs * 2);
d759b5
 
d759b5
     return ret;
d759b5
 }
d759b5
diff --git a/src/util/virprocess.c b/src/util/virprocess.c
d759b5
index f92b0dce37..297c96a8e5 100644
d759b5
--- a/src/util/virprocess.c
d759b5
+++ b/src/util/virprocess.c
d759b5
@@ -344,15 +344,21 @@ int virProcessKill(pid_t pid, int sig)
d759b5
  * Returns 0 if it was killed gracefully, 1 if it
d759b5
  * was killed forcibly, -1 if it is still alive,
d759b5
  * or another error occurred.
d759b5
+ *
d759b5
+ * Callers can proide an extra delay in seconds to
d759b5
+ * wait longer than the default.
d759b5
  */
d759b5
 int
d759b5
-virProcessKillPainfully(pid_t pid, bool force)
d759b5
+virProcessKillPainfullyDelay(pid_t pid, bool force, unsigned int extradelay)
d759b5
 {
d759b5
     size_t i;
d759b5
     int ret = -1;
d759b5
+    /* This is in 1/5th seconds since polling is on a 0.2s interval */
d759b5
+    unsigned int polldelay = 75 + (extradelay*5);
d759b5
     const char *signame = "TERM";
d759b5
 
d759b5
-    VIR_DEBUG("vpid=%lld force=%d", (long long)pid, force);
d759b5
+    VIR_DEBUG("vpid=%lld force=%d extradelay=%u",
d759b5
+              (long long)pid, force, extradelay);
d759b5
 
d759b5
     /* This loop sends SIGTERM, then waits a few iterations (10 seconds)
d759b5
      * to see if it dies. If the process still hasn't exited, and
d759b5
@@ -360,9 +366,12 @@ virProcessKillPainfully(pid_t pid, bool force)
d759b5
      * wait up to 5 seconds more for the process to exit before
d759b5
      * returning.
d759b5
      *
d759b5
+     * An extra delay can be passed by the caller for cases that are
d759b5
+     * expected to clean up slower than usual.
d759b5
+     *
d759b5
      * Note that setting @force could result in dataloss for the process.
d759b5
      */
d759b5
-    for (i = 0; i < 75; i++) {
d759b5
+    for (i = 0; i < polldelay; i++) {
d759b5
         int signum;
d759b5
         if (i == 0) {
d759b5
             signum = SIGTERM; /* kindly suggest it should exit */
d759b5
@@ -405,6 +414,11 @@ virProcessKillPainfully(pid_t pid, bool force)
d759b5
 }
d759b5
 
d759b5
 
d759b5
+int virProcessKillPainfully(pid_t pid, bool force)
d759b5
+{
d759b5
+    return virProcessKillPainfullyDelay(pid, force, 0);
d759b5
+}
d759b5
+
d759b5
 #if HAVE_SCHED_GETAFFINITY
d759b5
 
d759b5
 int virProcessSetAffinity(pid_t pid, virBitmapPtr map)
d759b5
diff --git a/src/util/virprocess.h b/src/util/virprocess.h
d759b5
index 3c5a882772..5faa0892fe 100644
d759b5
--- a/src/util/virprocess.h
d759b5
+++ b/src/util/virprocess.h
d759b5
@@ -55,6 +55,9 @@ virProcessWait(pid_t pid, int *exitstatus, bool raw)
d759b5
 int virProcessKill(pid_t pid, int sig);
d759b5
 
d759b5
 int virProcessKillPainfully(pid_t pid, bool force);
d759b5
+int virProcessKillPainfullyDelay(pid_t pid,
d759b5
+                                 bool force,
d759b5
+                                 unsigned int extradelay);
d759b5
 
d759b5
 int virProcessSetAffinity(pid_t pid, virBitmapPtr map);
d759b5
 
d759b5
-- 
d759b5
2.24.0
d759b5