| From 467b5e3127faf168fba6ce61a602f82d34a699c7 Mon Sep 17 00:00:00 2001 |
| From: Peter Xu <peterx@redhat.com> |
| Date: Thu, 8 Nov 2018 06:29:36 +0000 |
| Subject: [PATCH 08/35] intel_iommu: better handling of dmar state switch |
| |
| RH-Author: Peter Xu <peterx@redhat.com> |
| Message-id: <20181108062938.21143-6-peterx@redhat.com> |
| Patchwork-id: 82964 |
| O-Subject: [RHEL-8 qemu-kvm PATCH 5/7] intel_iommu: better handling of dmar state switch |
| Bugzilla: 1625173 |
| RH-Acked-by: Auger Eric <eric.auger@redhat.com> |
| RH-Acked-by: Michael S. Tsirkin <mst@redhat.com> |
| RH-Acked-by: Laurent Vivier <lvivier@redhat.com> |
| |
| Bugzilla: 1625173 |
| |
| QEMU is not handling the global DMAR switch well, especially when from |
| "on" to "off". |
| |
| Let's first take the example of system reset. |
| |
| Assuming that a guest has IOMMU enabled. When it reboots, we will drop |
| all the existing DMAR mappings to handle the system reset, however we'll |
| still keep the existing memory layouts which has the IOMMU memory region |
| enabled. So after the reboot and before the kernel reloads again, there |
| will be no mapping at all for the host device. That's problematic since |
| any software (for example, SeaBIOS) that runs earlier than the kernel |
| after the reboot will assume the IOMMU is disabled, so any DMA from the |
| software will fail. |
| |
| For example, a guest that boots on an assigned NVMe device might fail to |
| find the boot device after a system reboot/reset and we'll be able to |
| observe SeaBIOS errors if we capture the debugging log: |
| |
| WARNING - Timeout at nvme_wait:144! |
| |
| Meanwhile, we should see DMAR errors on the host of that NVMe device. |
| It's the DMA fault that caused a NVMe driver timeout. |
| |
| The correct fix should be that we do proper switching of device DMA |
| address spaces when system resets, which will setup correct memory |
| regions and notify the backend of the devices. This might not affect |
| much on non-assigned devices since QEMU VT-d emulation will assume a |
| default passthrough mapping if DMAR is not enabled in the GCMD |
| register (please refer to vtd_iommu_translate). However that's required |
| for an assigned devices, since that'll rebuild the correct GPA to HPA |
| mapping that is needed for any DMA operation during guest bootstrap. |
| |
| Besides the system reset, we have some other places that might change |
| the global DMAR status and we'd better do the same thing there. For |
| example, when we change the state of GCMD register, or the DMAR root |
| pointer. Do the same refresh for all these places. For these two |
| places we'll also need to explicitly invalidate the context entry cache |
| and iotlb cache. |
| |
| Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1625173 |
| CC: QEMU Stable <qemu-stable@nongnu.org> |
| Reported-by: Cong Li <coli@redhat.com> |
| Signed-off-by: Peter Xu <peterx@redhat.com> |
| -- |
| v2: |
| - do the same for GCMD write, or root pointer update [Alex] |
| - test is carried out by me this time, by observing the |
| vtd_switch_address_space tracepoint after system reboot |
| v3: |
| - rewrite commit message as suggested by Alex |
| Signed-off-by: Peter Xu <peterx@redhat.com> |
| Reviewed-by: Eric Auger <eric.auger@redhat.com> |
| Reviewed-by: Jason Wang <jasowang@redhat.com> |
| Reviewed-by: Michael S. Tsirkin <mst@redhat.com> |
| Signed-off-by: Michael S. Tsirkin <mst@redhat.com> |
| (cherry picked from commit 2cc9ddccebcaa48b3debfc279a83761fcbb7616c) |
| Signed-off-by: Peter Xu <peterx@redhat.com> |
| |
| Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com> |
| |
| hw/i386/intel_iommu.c | 21 ++++++++++++++------- |
| 1 file changed, 14 insertions(+), 7 deletions(-) |
| |
| diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c |
| index 48d0ba3..a6e87a9 100644 |
| |
| |
| @@ -37,6 +37,8 @@ |
| #include "kvm_i386.h" |
| #include "trace.h" |
| |
| +static void vtd_address_space_refresh_all(IntelIOMMUState *s); |
| + |
| static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, |
| uint64_t wmask, uint64_t w1cmask) |
| { |
| @@ -1436,7 +1438,7 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s) |
| vtd_reset_context_cache_locked(s); |
| } |
| vtd_iommu_unlock(s); |
| - vtd_switch_address_space_all(s); |
| + vtd_address_space_refresh_all(s); |
| /* |
| * From VT-d spec 6.5.2.1, a global context entry invalidation |
| * should be followed by a IOTLB global invalidation, so we should |
| @@ -1727,6 +1729,8 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s) |
| vtd_root_table_setup(s); |
| /* Ok - report back to driver */ |
| vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS); |
| + vtd_reset_caches(s); |
| + vtd_address_space_refresh_all(s); |
| } |
| |
| /* Set Interrupt Remap Table Pointer */ |
| @@ -1759,7 +1763,8 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) |
| vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0); |
| } |
| |
| - vtd_switch_address_space_all(s); |
| + vtd_reset_caches(s); |
| + vtd_address_space_refresh_all(s); |
| } |
| |
| /* Handle Interrupt Remap Enable/Disable */ |
| @@ -3059,6 +3064,12 @@ static void vtd_address_space_unmap_all(IntelIOMMUState *s) |
| } |
| } |
| |
| +static void vtd_address_space_refresh_all(IntelIOMMUState *s) |
| +{ |
| + vtd_address_space_unmap_all(s); |
| + vtd_switch_address_space_all(s); |
| +} |
| + |
| static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) |
| { |
| memory_region_notify_one((IOMMUNotifier *)private, entry); |
| @@ -3231,11 +3242,7 @@ static void vtd_reset(DeviceState *dev) |
| IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); |
| |
| vtd_init(s); |
| - |
| - /* |
| - * When device reset, throw away all mappings and external caches |
| - */ |
| - vtd_address_space_unmap_all(s); |
| + vtd_address_space_refresh_all(s); |
| } |
| |
| static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) |
| -- |
| 1.8.3.1 |
| |