Blame SOURCES/kvm-spapr-reset-DRCs-after-devices.patch

9bac43
From 4dcd885d5308e34fb4550c20fc7ffc050e014c91 Mon Sep 17 00:00:00 2001
9bac43
From: Laurent Vivier <lvivier@redhat.com>
9bac43
Date: Mon, 27 Nov 2017 09:03:20 +0100
9bac43
Subject: [PATCH 6/7] spapr: reset DRCs after devices
9bac43
9bac43
RH-Author: Laurent Vivier <lvivier@redhat.com>
9bac43
Message-id: <20171127090320.32307-1-lvivier@redhat.com>
9bac43
Patchwork-id: 77902
9bac43
O-Subject: [RHV7.5 qemu-kvm-rhev PATCH] spapr: reset DRCs after devices
9bac43
Bugzilla: 1516145
9bac43
RH-Acked-by: Serhii Popovych <spopovyc@redhat.com>
9bac43
RH-Acked-by: David Gibson <dgibson@redhat.com>
9bac43
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
9bac43
RH-Acked-by: Thomas Huth <thuth@redhat.com>
9bac43
9bac43
From: Greg Kurz <groug@kaod.org>
9bac43
9bac43
A DRC with a pending unplug request releases its associated device at
9bac43
machine reset time.
9bac43
9bac43
In the case of LMB, when all DRCs for a DIMM device have been reset,
9bac43
the DIMM gets unplugged, causing guest memory to disappear. This may
9bac43
be very confusing for anything still using this memory.
9bac43
9bac43
This is exactly what happens with vhost backends, and QEMU aborts
9bac43
with:
9bac43
9bac43
qemu-system-ppc64: used ring relocated for ring 2
9bac43
qemu-system-ppc64: qemu/hw/virtio/vhost.c:649: vhost_commit: Assertion
9bac43
 `r >= 0' failed.
9bac43
9bac43
The issue is that each DRC registers a QEMU reset handler, and we
9bac43
don't control the order in which these handlers are called (ie,
9bac43
a LMB DRC will unplug a DIMM before the virtio device using the
9bac43
memory on this DIMM could stop its vhost backend).
9bac43
9bac43
To avoid such situations, let's reset DRCs after all devices
9bac43
have been reset.
9bac43
9bac43
Reported-by: Mallesh N. Koti <mallesh@linux.vnet.ibm.com>
9bac43
Signed-off-by: Greg Kurz <groug@kaod.org>
9bac43
Reviewed-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
9bac43
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
9bac43
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
9bac43
(cherry picked from commit 82512483940c756e2db1bd67ea91b02bc29c5e01)
9bac43
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
9bac43
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
9bac43
---
9bac43
 hw/ppc/spapr.c     | 21 +++++++++++++++++++++
9bac43
 hw/ppc/spapr_drc.c |  7 -------
9bac43
 2 files changed, 21 insertions(+), 7 deletions(-)
9bac43
9bac43
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
9bac43
index 2065f09..6c64c55 100644
9bac43
--- a/hw/ppc/spapr.c
9bac43
+++ b/hw/ppc/spapr.c
9bac43
@@ -1394,6 +1394,19 @@ static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
9bac43
     }
9bac43
 }
9bac43
 
9bac43
+static int spapr_reset_drcs(Object *child, void *opaque)
9bac43
+{
9bac43
+    sPAPRDRConnector *drc =
9bac43
+        (sPAPRDRConnector *) object_dynamic_cast(child,
9bac43
+                                                 TYPE_SPAPR_DR_CONNECTOR);
9bac43
+
9bac43
+    if (drc) {
9bac43
+        spapr_drc_reset(drc);
9bac43
+    }
9bac43
+
9bac43
+    return 0;
9bac43
+}
9bac43
+
9bac43
 static void ppc_spapr_reset(void)
9bac43
 {
9bac43
     MachineState *machine = MACHINE(qdev_get_machine());
9bac43
@@ -1417,6 +1430,14 @@ static void ppc_spapr_reset(void)
9bac43
     }
9bac43
 
9bac43
     qemu_devices_reset();
9bac43
+
9bac43
+    /* DRC reset may cause a device to be unplugged. This will cause troubles
9bac43
+     * if this device is used by another device (eg, a running vhost backend
9bac43
+     * will crash QEMU if the DIMM holding the vring goes away). To avoid such
9bac43
+     * situations, we reset DRCs after all devices have been reset.
9bac43
+     */
9bac43
+    object_child_foreach_recursive(object_get_root(), spapr_reset_drcs, NULL);
9bac43
+
9bac43
     spapr_clear_pending_events(spapr);
9bac43
 
9bac43
     /*
9bac43
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
9bac43
index 85c999d..7d33e4c 100644
9bac43
--- a/hw/ppc/spapr_drc.c
9bac43
+++ b/hw/ppc/spapr_drc.c
9bac43
@@ -455,11 +455,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
9bac43
     }
9bac43
 }
9bac43
 
9bac43
-static void drc_reset(void *opaque)
9bac43
-{
9bac43
-    spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque));
9bac43
-}
9bac43
-
9bac43
 bool spapr_drc_needed(void *opaque)
9bac43
 {
9bac43
     sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
9bac43
@@ -517,7 +512,6 @@ static void realize(DeviceState *d, Error **errp)
9bac43
     }
9bac43
     vmstate_register(DEVICE(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
9bac43
                      drc);
9bac43
-    qemu_register_reset(drc_reset, drc);
9bac43
     trace_spapr_drc_realize_complete(spapr_drc_index(drc));
9bac43
 }
9bac43
 
9bac43
@@ -528,7 +522,6 @@ static void unrealize(DeviceState *d, Error **errp)
9bac43
     char name[256];
9bac43
 
9bac43
     trace_spapr_drc_unrealize(spapr_drc_index(drc));
9bac43
-    qemu_unregister_reset(drc_reset, drc);
9bac43
     vmstate_unregister(DEVICE(drc), &vmstate_spapr_drc, drc);
9bac43
     root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
9bac43
     snprintf(name, sizeof(name), "%x", spapr_drc_index(drc));
9bac43
-- 
9bac43
1.8.3.1
9bac43