|
|
1bdc94 |
From db590f2a02907a6762edd0877b32e79405ed4932 Mon Sep 17 00:00:00 2001
|
|
|
1bdc94 |
From: Peter Xu <peterx@redhat.com>
|
|
|
1bdc94 |
Date: Mon, 3 Sep 2018 04:52:33 +0200
|
|
|
1bdc94 |
Subject: [PATCH 18/29] intel-iommu: send PSI always even if across PDEs
|
|
|
1bdc94 |
|
|
|
1bdc94 |
RH-Author: Peter Xu <peterx@redhat.com>
|
|
|
1bdc94 |
Message-id: <20180903045241.6456-2-peterx@redhat.com>
|
|
|
1bdc94 |
Patchwork-id: 82021
|
|
|
1bdc94 |
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 1/9] intel-iommu: send PSI always even if across PDEs
|
|
|
1bdc94 |
Bugzilla: 1623859
|
|
|
1bdc94 |
RH-Acked-by: Xiao Wang <jasowang@redhat.com>
|
|
|
1bdc94 |
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
|
|
|
1bdc94 |
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
1bdc94 |
|
|
|
1bdc94 |
SECURITY IMPLICATION: without this patch, any guest with both assigned
|
|
|
1bdc94 |
device and a vIOMMU might encounter stale IO page mappings even if guest
|
|
|
1bdc94 |
has already unmapped the page, which may lead to guest memory
|
|
|
1bdc94 |
corruption. The stale mappings will only be limited to the guest's own
|
|
|
1bdc94 |
memory range, so it should not affect the host memory or other guests on
|
|
|
1bdc94 |
the host.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
During IOVA page table walking, there is a special case when the PSI
|
|
|
1bdc94 |
covers one whole PDE (Page Directory Entry, which contains 512 Page
|
|
|
1bdc94 |
Table Entries) or more. In the past, we skip that entry and we don't
|
|
|
1bdc94 |
notify the IOMMU notifiers. This is not correct. We should send UNMAP
|
|
|
1bdc94 |
notification to registered UNMAP notifiers in this case.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
For UNMAP only notifiers, this might cause IOTLBs cached in the devices
|
|
|
1bdc94 |
even if they were already invalid. For MAP/UNMAP notifiers like
|
|
|
1bdc94 |
vfio-pci, this will cause stale page mappings.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
This special case doesn't trigger often, but it is very easy to be
|
|
|
1bdc94 |
triggered by nested device assignments, since in that case we'll
|
|
|
1bdc94 |
possibly map the whole L2 guest RAM region into the device's IOVA
|
|
|
1bdc94 |
address space (several GBs at least), which is far bigger than normal
|
|
|
1bdc94 |
kernel driver usages of the device (tens of MBs normally).
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Without this patch applied to L1 QEMU, nested device assignment to L2
|
|
|
1bdc94 |
guests will dump some errors like:
|
|
|
1bdc94 |
|
|
|
1bdc94 |
qemu-system-x86_64: VFIO_MAP_DMA: -17
|
|
|
1bdc94 |
qemu-system-x86_64: vfio_dma_map(0x557305420c30, 0xad000, 0x1000,
|
|
|
1bdc94 |
0x7f89a920d000) = -17 (File exists)
|
|
|
1bdc94 |
|
|
|
1bdc94 |
CC: QEMU Stable <qemu-stable@nongnu.org>
|
|
|
1bdc94 |
Acked-by: Jason Wang <jasowang@redhat.com>
|
|
|
1bdc94 |
[peterx: rewrite the commit message]
|
|
|
1bdc94 |
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
|
1bdc94 |
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
1bdc94 |
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
1bdc94 |
(cherry picked from commit 36d2d52bdb45f5b753a61fdaf0fe7891f1f5b61d)
|
|
|
1bdc94 |
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
1bdc94 |
---
|
|
|
1bdc94 |
hw/i386/intel_iommu.c | 42 ++++++++++++++++++++++++++++++------------
|
|
|
1bdc94 |
1 file changed, 30 insertions(+), 12 deletions(-)
|
|
|
1bdc94 |
|
|
|
1bdc94 |
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
|
|
|
1bdc94 |
index fb31de9..b359efd 100644
|
|
|
1bdc94 |
--- a/hw/i386/intel_iommu.c
|
|
|
1bdc94 |
+++ b/hw/i386/intel_iommu.c
|
|
|
1bdc94 |
@@ -722,6 +722,15 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
|
|
|
1bdc94 |
|
|
|
1bdc94 |
typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+static int vtd_page_walk_one(IOMMUTLBEntry *entry, int level,
|
|
|
1bdc94 |
+ vtd_page_walk_hook hook_fn, void *private)
|
|
|
1bdc94 |
+{
|
|
|
1bdc94 |
+ assert(hook_fn);
|
|
|
1bdc94 |
+ trace_vtd_page_walk_one(level, entry->iova, entry->translated_addr,
|
|
|
1bdc94 |
+ entry->addr_mask, entry->perm);
|
|
|
1bdc94 |
+ return hook_fn(entry, private);
|
|
|
1bdc94 |
+}
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
/**
|
|
|
1bdc94 |
* vtd_page_walk_level - walk over specific level for IOVA range
|
|
|
1bdc94 |
*
|
|
|
1bdc94 |
@@ -781,28 +790,37 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
|
|
|
1bdc94 |
*/
|
|
|
1bdc94 |
entry_valid = read_cur | write_cur;
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+ entry.target_as = &address_space_memory;
|
|
|
1bdc94 |
+ entry.iova = iova & subpage_mask;
|
|
|
1bdc94 |
+ entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
|
|
|
1bdc94 |
+ entry.addr_mask = ~subpage_mask;
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
if (vtd_is_last_slpte(slpte, level)) {
|
|
|
1bdc94 |
- entry.target_as = &address_space_memory;
|
|
|
1bdc94 |
- entry.iova = iova & subpage_mask;
|
|
|
1bdc94 |
/* NOTE: this is only meaningful if entry_valid == true */
|
|
|
1bdc94 |
entry.translated_addr = vtd_get_slpte_addr(slpte, aw);
|
|
|
1bdc94 |
- entry.addr_mask = ~subpage_mask;
|
|
|
1bdc94 |
- entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
|
|
|
1bdc94 |
if (!entry_valid && !notify_unmap) {
|
|
|
1bdc94 |
trace_vtd_page_walk_skip_perm(iova, iova_next);
|
|
|
1bdc94 |
goto next;
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
- trace_vtd_page_walk_one(level, entry.iova, entry.translated_addr,
|
|
|
1bdc94 |
- entry.addr_mask, entry.perm);
|
|
|
1bdc94 |
- if (hook_fn) {
|
|
|
1bdc94 |
- ret = hook_fn(&entry, private);
|
|
|
1bdc94 |
- if (ret < 0) {
|
|
|
1bdc94 |
- return ret;
|
|
|
1bdc94 |
- }
|
|
|
1bdc94 |
+ ret = vtd_page_walk_one(&entry, level, hook_fn, private);
|
|
|
1bdc94 |
+ if (ret < 0) {
|
|
|
1bdc94 |
+ return ret;
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
} else {
|
|
|
1bdc94 |
if (!entry_valid) {
|
|
|
1bdc94 |
- trace_vtd_page_walk_skip_perm(iova, iova_next);
|
|
|
1bdc94 |
+ if (notify_unmap) {
|
|
|
1bdc94 |
+ /*
|
|
|
1bdc94 |
+ * The whole entry is invalid; unmap it all.
|
|
|
1bdc94 |
+ * Translated address is meaningless, zero it.
|
|
|
1bdc94 |
+ */
|
|
|
1bdc94 |
+ entry.translated_addr = 0x0;
|
|
|
1bdc94 |
+ ret = vtd_page_walk_one(&entry, level, hook_fn, private);
|
|
|
1bdc94 |
+ if (ret < 0) {
|
|
|
1bdc94 |
+ return ret;
|
|
|
1bdc94 |
+ }
|
|
|
1bdc94 |
+ } else {
|
|
|
1bdc94 |
+ trace_vtd_page_walk_skip_perm(iova, iova_next);
|
|
|
1bdc94 |
+ }
|
|
|
1bdc94 |
goto next;
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, aw), iova,
|
|
|
1bdc94 |
--
|
|
|
1bdc94 |
1.8.3.1
|
|
|
1bdc94 |
|