Blame SOURCES/kvm-intel_iommu-better-handling-of-dmar-state-switch.patch

7711c0
From ad285b807f37f16571d03e7667ea7200fb9b25a7 Mon Sep 17 00:00:00 2001
7711c0
From: Peter Xu <peterx@redhat.com>
7711c0
Date: Thu, 8 Nov 2018 05:37:19 +0100
7711c0
Subject: [PATCH 12/22] intel_iommu: better handling of dmar state switch
7711c0
MIME-Version: 1.0
7711c0
Content-Type: text/plain; charset=UTF-8
7711c0
Content-Transfer-Encoding: 8bit
7711c0
7711c0
RH-Author: Peter Xu <peterx@redhat.com>
7711c0
Message-id: <20181108053721.13162-6-peterx@redhat.com>
7711c0
Patchwork-id: 82956
7711c0
O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 5/7] intel_iommu: better handling of dmar state switch
7711c0
Bugzilla: 1627272
7711c0
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
7711c0
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
7711c0
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7711c0
7711c0
QEMU is not handling the global DMAR switch well, especially when from
7711c0
"on" to "off".
7711c0
7711c0
Let's first take the example of system reset.
7711c0
7711c0
Assuming that a guest has IOMMU enabled.  When it reboots, we will drop
7711c0
all the existing DMAR mappings to handle the system reset, however we'll
7711c0
still keep the existing memory layouts which has the IOMMU memory region
7711c0
enabled.  So after the reboot and before the kernel reloads again, there
7711c0
will be no mapping at all for the host device.  That's problematic since
7711c0
any software (for example, SeaBIOS) that runs earlier than the kernel
7711c0
after the reboot will assume the IOMMU is disabled, so any DMA from the
7711c0
software will fail.
7711c0
7711c0
For example, a guest that boots on an assigned NVMe device might fail to
7711c0
find the boot device after a system reboot/reset and we'll be able to
7711c0
observe SeaBIOS errors if we capture the debugging log:
7711c0
7711c0
  WARNING - Timeout at nvme_wait:144!
7711c0
7711c0
Meanwhile, we should see DMAR errors on the host of that NVMe device.
7711c0
It's the DMA fault that caused a NVMe driver timeout.
7711c0
7711c0
The correct fix should be that we do proper switching of device DMA
7711c0
address spaces when system resets, which will setup correct memory
7711c0
regions and notify the backend of the devices.  This might not affect
7711c0
much on non-assigned devices since QEMU VT-d emulation will assume a
7711c0
default passthrough mapping if DMAR is not enabled in the GCMD
7711c0
register (please refer to vtd_iommu_translate).  However that's required
7711c0
for an assigned devices, since that'll rebuild the correct GPA to HPA
7711c0
mapping that is needed for any DMA operation during guest bootstrap.
7711c0
7711c0
Besides the system reset, we have some other places that might change
7711c0
the global DMAR status and we'd better do the same thing there.  For
7711c0
example, when we change the state of GCMD register, or the DMAR root
7711c0
pointer.  Do the same refresh for all these places.  For these two
7711c0
places we'll also need to explicitly invalidate the context entry cache
7711c0
and iotlb cache.
7711c0
7711c0
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1625173
7711c0
CC: QEMU Stable <qemu-stable@nongnu.org>
7711c0
Reported-by: Cong Li <coli@redhat.com>
7711c0
Signed-off-by: Peter Xu <peterx@redhat.com>
7711c0
--
7711c0
v2:
7711c0
- do the same for GCMD write, or root pointer update [Alex]
7711c0
- test is carried out by me this time, by observing the
7711c0
  vtd_switch_address_space tracepoint after system reboot
7711c0
v3:
7711c0
- rewrite commit message as suggested by Alex
7711c0
Signed-off-by: Peter Xu <peterx@redhat.com>
7711c0
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7711c0
Reviewed-by: Jason Wang <jasowang@redhat.com>
7711c0
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
7711c0
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
7711c0
(cherry picked from commit 2cc9ddccebcaa48b3debfc279a83761fcbb7616c)
7711c0
Signed-off-by: Peter Xu <peterx@redhat.com>
7711c0
7711c0
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
7711c0
---
7711c0
 hw/i386/intel_iommu.c | 21 ++++++++++++++-------
7711c0
 1 file changed, 14 insertions(+), 7 deletions(-)
7711c0
7711c0
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
7711c0
index 48d0ba3..a6e87a9 100644
7711c0
--- a/hw/i386/intel_iommu.c
7711c0
+++ b/hw/i386/intel_iommu.c
7711c0
@@ -37,6 +37,8 @@
7711c0
 #include "kvm_i386.h"
7711c0
 #include "trace.h"
7711c0
 
7711c0
+static void vtd_address_space_refresh_all(IntelIOMMUState *s);
7711c0
+
7711c0
 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
7711c0
                             uint64_t wmask, uint64_t w1cmask)
7711c0
 {
7711c0
@@ -1436,7 +1438,7 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
7711c0
         vtd_reset_context_cache_locked(s);
7711c0
     }
7711c0
     vtd_iommu_unlock(s);
7711c0
-    vtd_switch_address_space_all(s);
7711c0
+    vtd_address_space_refresh_all(s);
7711c0
     /*
7711c0
      * From VT-d spec 6.5.2.1, a global context entry invalidation
7711c0
      * should be followed by a IOTLB global invalidation, so we should
7711c0
@@ -1727,6 +1729,8 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
7711c0
     vtd_root_table_setup(s);
7711c0
     /* Ok - report back to driver */
7711c0
     vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
7711c0
+    vtd_reset_caches(s);
7711c0
+    vtd_address_space_refresh_all(s);
7711c0
 }
7711c0
 
7711c0
 /* Set Interrupt Remap Table Pointer */
7711c0
@@ -1759,7 +1763,8 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
7711c0
         vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0);
7711c0
     }
7711c0
 
7711c0
-    vtd_switch_address_space_all(s);
7711c0
+    vtd_reset_caches(s);
7711c0
+    vtd_address_space_refresh_all(s);
7711c0
 }
7711c0
 
7711c0
 /* Handle Interrupt Remap Enable/Disable */
7711c0
@@ -3059,6 +3064,12 @@ static void vtd_address_space_unmap_all(IntelIOMMUState *s)
7711c0
     }
7711c0
 }
7711c0
 
7711c0
+static void vtd_address_space_refresh_all(IntelIOMMUState *s)
7711c0
+{
7711c0
+    vtd_address_space_unmap_all(s);
7711c0
+    vtd_switch_address_space_all(s);
7711c0
+}
7711c0
+
7711c0
 static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private)
7711c0
 {
7711c0
     memory_region_notify_one((IOMMUNotifier *)private, entry);
7711c0
@@ -3231,11 +3242,7 @@ static void vtd_reset(DeviceState *dev)
7711c0
     IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
7711c0
 
7711c0
     vtd_init(s);
7711c0
-
7711c0
-    /*
7711c0
-     * When device reset, throw away all mappings and external caches
7711c0
-     */
7711c0
-    vtd_address_space_unmap_all(s);
7711c0
+    vtd_address_space_refresh_all(s);
7711c0
 }
7711c0
 
7711c0
 static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
7711c0
-- 
7711c0
1.8.3.1
7711c0