Pablo Greco 40546a
From 92027209ce5acc92b43dc15ef582f7c8c8095cf6 Mon Sep 17 00:00:00 2001
Pablo Greco 40546a
Message-Id: <92027209ce5acc92b43dc15ef582f7c8c8095cf6@dist-git>
Pablo Greco 40546a
From: Christian Ehrhardt <christian.ehrhardt@canonical.com>
Pablo Greco 40546a
Date: Tue, 11 Feb 2020 18:47:09 +0100
Pablo Greco 40546a
Subject: [PATCH] process: wait longer on kill per assigned Hostdev
Pablo Greco 40546a
MIME-Version: 1.0
Pablo Greco 40546a
Content-Type: text/plain; charset=UTF-8
Pablo Greco 40546a
Content-Transfer-Encoding: 8bit
Pablo Greco 40546a
Pablo Greco 40546a
It was found that in cases with host devices virProcessKillPainfully
Pablo Greco 40546a
might be able to send signal zero to the target PID for quite a while
Pablo Greco 40546a
with the process already being gone from /proc/<PID>.
Pablo Greco 40546a
Pablo Greco 40546a
That is due to cleanup and reset of devices which might include a
Pablo Greco 40546a
secondary bus reset that on top of the actions taken has a 1s delay
Pablo Greco 40546a
to let the bus settle. Due to that guests with plenty of Host devices
Pablo Greco 40546a
could easily exceed the default timeouts.
Pablo Greco 40546a
Pablo Greco 40546a
To solve that, this adds an extra delay of 2s per hostdev that is associated
Pablo Greco 40546a
to a VM.
Pablo Greco 40546a
Pablo Greco 40546a
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Pablo Greco 40546a
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
Pablo Greco 40546a
(cherry picked from commit be2ca0444728edd12a000653d3693d68a5c9102f)
Pablo Greco 40546a
Pablo Greco 40546a
https://bugzilla.redhat.com/show_bug.cgi?id=1785338
Pablo Greco 40546a
Pablo Greco 40546a
Signed-off-by: Andrea Bolognani <abologna@redhat.com>
Pablo Greco 40546a
Message-Id: <20200211174710.203500-2-abologna@redhat.com>
Pablo Greco 40546a
Reviewed-by: Ján Tomko <jtomko@redhat.com>
Pablo Greco 40546a
---
Pablo Greco 40546a
 src/libvirt_private.syms |  1 +
Pablo Greco 40546a
 src/qemu/qemu_process.c  |  7 +++++--
Pablo Greco 40546a
 src/util/virprocess.c    | 20 +++++++++++++++++---
Pablo Greco 40546a
 src/util/virprocess.h    |  3 +++
Pablo Greco 40546a
 4 files changed, 26 insertions(+), 5 deletions(-)
Pablo Greco 40546a
Pablo Greco 40546a
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
Pablo Greco 40546a
index f4b54cee0b..2ad21a68bc 100644
Pablo Greco 40546a
--- a/src/libvirt_private.syms
Pablo Greco 40546a
+++ b/src/libvirt_private.syms
Pablo Greco 40546a
@@ -2646,6 +2646,7 @@ virProcessGetPids;
Pablo Greco 40546a
 virProcessGetStartTime;
Pablo Greco 40546a
 virProcessKill;
Pablo Greco 40546a
 virProcessKillPainfully;
Pablo Greco 40546a
+virProcessKillPainfullyDelay;
Pablo Greco 40546a
 virProcessNamespaceAvailable;
Pablo Greco 40546a
 virProcessRunInMountNamespace;
Pablo Greco 40546a
 virProcessSchedPolicyTypeFromString;
Pablo Greco 40546a
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
Pablo Greco 40546a
index 17d48357b3..4d10a38f1d 100644
Pablo Greco 40546a
--- a/src/qemu/qemu_process.c
Pablo Greco 40546a
+++ b/src/qemu/qemu_process.c
Pablo Greco 40546a
@@ -6918,8 +6918,11 @@ qemuProcessKill(virDomainObjPtr vm, unsigned int flags)
Pablo Greco 40546a
         return 0;
Pablo Greco 40546a
     }
Pablo Greco 40546a
 
Pablo Greco 40546a
-    ret = virProcessKillPainfully(vm->pid,
Pablo Greco 40546a
-                                  !!(flags & VIR_QEMU_PROCESS_KILL_FORCE));
Pablo Greco 40546a
+    /* Request an extra delay of two seconds per current nhostdevs
Pablo Greco 40546a
+     * to be safe against stalls by the kernel freeing up the resources */
Pablo Greco 40546a
+    ret = virProcessKillPainfullyDelay(vm->pid,
Pablo Greco 40546a
+                                       !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
Pablo Greco 40546a
+                                       vm->def->nhostdevs * 2);
Pablo Greco 40546a
 
Pablo Greco 40546a
     return ret;
Pablo Greco 40546a
 }
Pablo Greco 40546a
diff --git a/src/util/virprocess.c b/src/util/virprocess.c
Pablo Greco 40546a
index f92b0dce37..297c96a8e5 100644
Pablo Greco 40546a
--- a/src/util/virprocess.c
Pablo Greco 40546a
+++ b/src/util/virprocess.c
Pablo Greco 40546a
@@ -344,15 +344,21 @@ int virProcessKill(pid_t pid, int sig)
Pablo Greco 40546a
  * Returns 0 if it was killed gracefully, 1 if it
Pablo Greco 40546a
  * was killed forcibly, -1 if it is still alive,
Pablo Greco 40546a
  * or another error occurred.
Pablo Greco 40546a
+ *
Pablo Greco 40546a
+ * Callers can proide an extra delay in seconds to
Pablo Greco 40546a
+ * wait longer than the default.
Pablo Greco 40546a
  */
Pablo Greco 40546a
 int
Pablo Greco 40546a
-virProcessKillPainfully(pid_t pid, bool force)
Pablo Greco 40546a
+virProcessKillPainfullyDelay(pid_t pid, bool force, unsigned int extradelay)
Pablo Greco 40546a
 {
Pablo Greco 40546a
     size_t i;
Pablo Greco 40546a
     int ret = -1;
Pablo Greco 40546a
+    /* This is in 1/5th seconds since polling is on a 0.2s interval */
Pablo Greco 40546a
+    unsigned int polldelay = 75 + (extradelay*5);
Pablo Greco 40546a
     const char *signame = "TERM";
Pablo Greco 40546a
 
Pablo Greco 40546a
-    VIR_DEBUG("vpid=%lld force=%d", (long long)pid, force);
Pablo Greco 40546a
+    VIR_DEBUG("vpid=%lld force=%d extradelay=%u",
Pablo Greco 40546a
+              (long long)pid, force, extradelay);
Pablo Greco 40546a
 
Pablo Greco 40546a
     /* This loop sends SIGTERM, then waits a few iterations (10 seconds)
Pablo Greco 40546a
      * to see if it dies. If the process still hasn't exited, and
Pablo Greco 40546a
@@ -360,9 +366,12 @@ virProcessKillPainfully(pid_t pid, bool force)
Pablo Greco 40546a
      * wait up to 5 seconds more for the process to exit before
Pablo Greco 40546a
      * returning.
Pablo Greco 40546a
      *
Pablo Greco 40546a
+     * An extra delay can be passed by the caller for cases that are
Pablo Greco 40546a
+     * expected to clean up slower than usual.
Pablo Greco 40546a
+     *
Pablo Greco 40546a
      * Note that setting @force could result in dataloss for the process.
Pablo Greco 40546a
      */
Pablo Greco 40546a
-    for (i = 0; i < 75; i++) {
Pablo Greco 40546a
+    for (i = 0; i < polldelay; i++) {
Pablo Greco 40546a
         int signum;
Pablo Greco 40546a
         if (i == 0) {
Pablo Greco 40546a
             signum = SIGTERM; /* kindly suggest it should exit */
Pablo Greco 40546a
@@ -405,6 +414,11 @@ virProcessKillPainfully(pid_t pid, bool force)
Pablo Greco 40546a
 }
Pablo Greco 40546a
 
Pablo Greco 40546a
 
Pablo Greco 40546a
+int virProcessKillPainfully(pid_t pid, bool force)
Pablo Greco 40546a
+{
Pablo Greco 40546a
+    return virProcessKillPainfullyDelay(pid, force, 0);
Pablo Greco 40546a
+}
Pablo Greco 40546a
+
Pablo Greco 40546a
 #if HAVE_SCHED_GETAFFINITY
Pablo Greco 40546a
 
Pablo Greco 40546a
 int virProcessSetAffinity(pid_t pid, virBitmapPtr map)
Pablo Greco 40546a
diff --git a/src/util/virprocess.h b/src/util/virprocess.h
Pablo Greco 40546a
index 3c5a882772..5faa0892fe 100644
Pablo Greco 40546a
--- a/src/util/virprocess.h
Pablo Greco 40546a
+++ b/src/util/virprocess.h
Pablo Greco 40546a
@@ -55,6 +55,9 @@ virProcessWait(pid_t pid, int *exitstatus, bool raw)
Pablo Greco 40546a
 int virProcessKill(pid_t pid, int sig);
Pablo Greco 40546a
 
Pablo Greco 40546a
 int virProcessKillPainfully(pid_t pid, bool force);
Pablo Greco 40546a
+int virProcessKillPainfullyDelay(pid_t pid,
Pablo Greco 40546a
+                                 bool force,
Pablo Greco 40546a
+                                 unsigned int extradelay);
Pablo Greco 40546a
 
Pablo Greco 40546a
 int virProcessSetAffinity(pid_t pid, virBitmapPtr map);
Pablo Greco 40546a
 
Pablo Greco 40546a
-- 
Pablo Greco 40546a
2.25.0
Pablo Greco 40546a