|
|
7711c0 |
From ad285b807f37f16571d03e7667ea7200fb9b25a7 Mon Sep 17 00:00:00 2001
|
|
|
7711c0 |
From: Peter Xu <peterx@redhat.com>
|
|
|
7711c0 |
Date: Thu, 8 Nov 2018 05:37:19 +0100
|
|
|
7711c0 |
Subject: [PATCH 12/22] intel_iommu: better handling of dmar state switch
|
|
|
7711c0 |
MIME-Version: 1.0
|
|
|
7711c0 |
Content-Type: text/plain; charset=UTF-8
|
|
|
7711c0 |
Content-Transfer-Encoding: 8bit
|
|
|
7711c0 |
|
|
|
7711c0 |
RH-Author: Peter Xu <peterx@redhat.com>
|
|
|
7711c0 |
Message-id: <20181108053721.13162-6-peterx@redhat.com>
|
|
|
7711c0 |
Patchwork-id: 82956
|
|
|
7711c0 |
O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 5/7] intel_iommu: better handling of dmar state switch
|
|
|
7711c0 |
Bugzilla: 1627272
|
|
|
7711c0 |
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
|
|
|
7711c0 |
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
7711c0 |
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
|
|
7711c0 |
|
|
|
7711c0 |
QEMU is not handling the global DMAR switch well, especially when from
|
|
|
7711c0 |
"on" to "off".
|
|
|
7711c0 |
|
|
|
7711c0 |
Let's first take the example of system reset.
|
|
|
7711c0 |
|
|
|
7711c0 |
Assuming that a guest has IOMMU enabled. When it reboots, we will drop
|
|
|
7711c0 |
all the existing DMAR mappings to handle the system reset, however we'll
|
|
|
7711c0 |
still keep the existing memory layouts which has the IOMMU memory region
|
|
|
7711c0 |
enabled. So after the reboot and before the kernel reloads again, there
|
|
|
7711c0 |
will be no mapping at all for the host device. That's problematic since
|
|
|
7711c0 |
any software (for example, SeaBIOS) that runs earlier than the kernel
|
|
|
7711c0 |
after the reboot will assume the IOMMU is disabled, so any DMA from the
|
|
|
7711c0 |
software will fail.
|
|
|
7711c0 |
|
|
|
7711c0 |
For example, a guest that boots on an assigned NVMe device might fail to
|
|
|
7711c0 |
find the boot device after a system reboot/reset and we'll be able to
|
|
|
7711c0 |
observe SeaBIOS errors if we capture the debugging log:
|
|
|
7711c0 |
|
|
|
7711c0 |
WARNING - Timeout at nvme_wait:144!
|
|
|
7711c0 |
|
|
|
7711c0 |
Meanwhile, we should see DMAR errors on the host of that NVMe device.
|
|
|
7711c0 |
It's the DMA fault that caused a NVMe driver timeout.
|
|
|
7711c0 |
|
|
|
7711c0 |
The correct fix should be that we do proper switching of device DMA
|
|
|
7711c0 |
address spaces when system resets, which will setup correct memory
|
|
|
7711c0 |
regions and notify the backend of the devices. This might not affect
|
|
|
7711c0 |
much on non-assigned devices since QEMU VT-d emulation will assume a
|
|
|
7711c0 |
default passthrough mapping if DMAR is not enabled in the GCMD
|
|
|
7711c0 |
register (please refer to vtd_iommu_translate). However that's required
|
|
|
7711c0 |
for an assigned devices, since that'll rebuild the correct GPA to HPA
|
|
|
7711c0 |
mapping that is needed for any DMA operation during guest bootstrap.
|
|
|
7711c0 |
|
|
|
7711c0 |
Besides the system reset, we have some other places that might change
|
|
|
7711c0 |
the global DMAR status and we'd better do the same thing there. For
|
|
|
7711c0 |
example, when we change the state of GCMD register, or the DMAR root
|
|
|
7711c0 |
pointer. Do the same refresh for all these places. For these two
|
|
|
7711c0 |
places we'll also need to explicitly invalidate the context entry cache
|
|
|
7711c0 |
and iotlb cache.
|
|
|
7711c0 |
|
|
|
7711c0 |
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1625173
|
|
|
7711c0 |
CC: QEMU Stable <qemu-stable@nongnu.org>
|
|
|
7711c0 |
Reported-by: Cong Li <coli@redhat.com>
|
|
|
7711c0 |
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
|
7711c0 |
--
|
|
|
7711c0 |
v2:
|
|
|
7711c0 |
- do the same for GCMD write, or root pointer update [Alex]
|
|
|
7711c0 |
- test is carried out by me this time, by observing the
|
|
|
7711c0 |
vtd_switch_address_space tracepoint after system reboot
|
|
|
7711c0 |
v3:
|
|
|
7711c0 |
- rewrite commit message as suggested by Alex
|
|
|
7711c0 |
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
|
7711c0 |
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
|
|
7711c0 |
Reviewed-by: Jason Wang <jasowang@redhat.com>
|
|
|
7711c0 |
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
7711c0 |
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
7711c0 |
(cherry picked from commit 2cc9ddccebcaa48b3debfc279a83761fcbb7616c)
|
|
|
7711c0 |
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
|
7711c0 |
|
|
|
7711c0 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
7711c0 |
---
|
|
|
7711c0 |
hw/i386/intel_iommu.c | 21 ++++++++++++++-------
|
|
|
7711c0 |
1 file changed, 14 insertions(+), 7 deletions(-)
|
|
|
7711c0 |
|
|
|
7711c0 |
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
|
|
|
7711c0 |
index 48d0ba3..a6e87a9 100644
|
|
|
7711c0 |
--- a/hw/i386/intel_iommu.c
|
|
|
7711c0 |
+++ b/hw/i386/intel_iommu.c
|
|
|
7711c0 |
@@ -37,6 +37,8 @@
|
|
|
7711c0 |
#include "kvm_i386.h"
|
|
|
7711c0 |
#include "trace.h"
|
|
|
7711c0 |
|
|
|
7711c0 |
+static void vtd_address_space_refresh_all(IntelIOMMUState *s);
|
|
|
7711c0 |
+
|
|
|
7711c0 |
static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
|
|
|
7711c0 |
uint64_t wmask, uint64_t w1cmask)
|
|
|
7711c0 |
{
|
|
|
7711c0 |
@@ -1436,7 +1438,7 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
|
|
|
7711c0 |
vtd_reset_context_cache_locked(s);
|
|
|
7711c0 |
}
|
|
|
7711c0 |
vtd_iommu_unlock(s);
|
|
|
7711c0 |
- vtd_switch_address_space_all(s);
|
|
|
7711c0 |
+ vtd_address_space_refresh_all(s);
|
|
|
7711c0 |
/*
|
|
|
7711c0 |
* From VT-d spec 6.5.2.1, a global context entry invalidation
|
|
|
7711c0 |
* should be followed by a IOTLB global invalidation, so we should
|
|
|
7711c0 |
@@ -1727,6 +1729,8 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
|
|
|
7711c0 |
vtd_root_table_setup(s);
|
|
|
7711c0 |
/* Ok - report back to driver */
|
|
|
7711c0 |
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
|
|
|
7711c0 |
+ vtd_reset_caches(s);
|
|
|
7711c0 |
+ vtd_address_space_refresh_all(s);
|
|
|
7711c0 |
}
|
|
|
7711c0 |
|
|
|
7711c0 |
/* Set Interrupt Remap Table Pointer */
|
|
|
7711c0 |
@@ -1759,7 +1763,8 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
|
|
|
7711c0 |
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0);
|
|
|
7711c0 |
}
|
|
|
7711c0 |
|
|
|
7711c0 |
- vtd_switch_address_space_all(s);
|
|
|
7711c0 |
+ vtd_reset_caches(s);
|
|
|
7711c0 |
+ vtd_address_space_refresh_all(s);
|
|
|
7711c0 |
}
|
|
|
7711c0 |
|
|
|
7711c0 |
/* Handle Interrupt Remap Enable/Disable */
|
|
|
7711c0 |
@@ -3059,6 +3064,12 @@ static void vtd_address_space_unmap_all(IntelIOMMUState *s)
|
|
|
7711c0 |
}
|
|
|
7711c0 |
}
|
|
|
7711c0 |
|
|
|
7711c0 |
+static void vtd_address_space_refresh_all(IntelIOMMUState *s)
|
|
|
7711c0 |
+{
|
|
|
7711c0 |
+ vtd_address_space_unmap_all(s);
|
|
|
7711c0 |
+ vtd_switch_address_space_all(s);
|
|
|
7711c0 |
+}
|
|
|
7711c0 |
+
|
|
|
7711c0 |
static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private)
|
|
|
7711c0 |
{
|
|
|
7711c0 |
memory_region_notify_one((IOMMUNotifier *)private, entry);
|
|
|
7711c0 |
@@ -3231,11 +3242,7 @@ static void vtd_reset(DeviceState *dev)
|
|
|
7711c0 |
IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
|
|
|
7711c0 |
|
|
|
7711c0 |
vtd_init(s);
|
|
|
7711c0 |
-
|
|
|
7711c0 |
- /*
|
|
|
7711c0 |
- * When device reset, throw away all mappings and external caches
|
|
|
7711c0 |
- */
|
|
|
7711c0 |
- vtd_address_space_unmap_all(s);
|
|
|
7711c0 |
+ vtd_address_space_refresh_all(s);
|
|
|
7711c0 |
}
|
|
|
7711c0 |
|
|
|
7711c0 |
static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
|
|
|
7711c0 |
--
|
|
|
7711c0 |
1.8.3.1
|
|
|
7711c0 |
|