Blame SOURCES/kvm-intel-iommu-send-PSI-always-even-if-across-PDEs.patch

357786
From db590f2a02907a6762edd0877b32e79405ed4932 Mon Sep 17 00:00:00 2001
357786
From: Peter Xu <peterx@redhat.com>
357786
Date: Mon, 3 Sep 2018 04:52:33 +0200
357786
Subject: [PATCH 18/29] intel-iommu: send PSI always even if across PDEs
357786
357786
RH-Author: Peter Xu <peterx@redhat.com>
357786
Message-id: <20180903045241.6456-2-peterx@redhat.com>
357786
Patchwork-id: 82021
357786
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 1/9] intel-iommu: send PSI always even if across PDEs
357786
Bugzilla: 1623859
357786
RH-Acked-by: Xiao Wang <jasowang@redhat.com>
357786
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
357786
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
357786
357786
SECURITY IMPLICATION: without this patch, any guest with both assigned
357786
device and a vIOMMU might encounter stale IO page mappings even if guest
357786
has already unmapped the page, which may lead to guest memory
357786
corruption.  The stale mappings will only be limited to the guest's own
357786
memory range, so it should not affect the host memory or other guests on
357786
the host.
357786
357786
During IOVA page table walking, there is a special case when the PSI
357786
covers one whole PDE (Page Directory Entry, which contains 512 Page
357786
Table Entries) or more.  In the past, we skip that entry and we don't
357786
notify the IOMMU notifiers.  This is not correct.  We should send UNMAP
357786
notification to registered UNMAP notifiers in this case.
357786
357786
For UNMAP only notifiers, this might cause IOTLBs cached in the devices
357786
even if they were already invalid.  For MAP/UNMAP notifiers like
357786
vfio-pci, this will cause stale page mappings.
357786
357786
This special case doesn't trigger often, but it is very easy to be
357786
triggered by nested device assignments, since in that case we'll
357786
possibly map the whole L2 guest RAM region into the device's IOVA
357786
address space (several GBs at least), which is far bigger than normal
357786
kernel driver usages of the device (tens of MBs normally).
357786
357786
Without this patch applied to L1 QEMU, nested device assignment to L2
357786
guests will dump some errors like:
357786
357786
qemu-system-x86_64: VFIO_MAP_DMA: -17
357786
qemu-system-x86_64: vfio_dma_map(0x557305420c30, 0xad000, 0x1000,
357786
                    0x7f89a920d000) = -17 (File exists)
357786
357786
CC: QEMU Stable <qemu-stable@nongnu.org>
357786
Acked-by: Jason Wang <jasowang@redhat.com>
357786
[peterx: rewrite the commit message]
357786
Signed-off-by: Peter Xu <peterx@redhat.com>
357786
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
357786
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
357786
(cherry picked from commit 36d2d52bdb45f5b753a61fdaf0fe7891f1f5b61d)
357786
Signed-off-by: Peter Xu <peterx@redhat.com>
357786
357786
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
357786
---
357786
 hw/i386/intel_iommu.c | 42 ++++++++++++++++++++++++++++++------------
357786
 1 file changed, 30 insertions(+), 12 deletions(-)
357786
357786
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
357786
index fb31de9..b359efd 100644
357786
--- a/hw/i386/intel_iommu.c
357786
+++ b/hw/i386/intel_iommu.c
357786
@@ -722,6 +722,15 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
357786
 
357786
 typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
357786
 
357786
+static int vtd_page_walk_one(IOMMUTLBEntry *entry, int level,
357786
+                             vtd_page_walk_hook hook_fn, void *private)
357786
+{
357786
+    assert(hook_fn);
357786
+    trace_vtd_page_walk_one(level, entry->iova, entry->translated_addr,
357786
+                            entry->addr_mask, entry->perm);
357786
+    return hook_fn(entry, private);
357786
+}
357786
+
357786
 /**
357786
  * vtd_page_walk_level - walk over specific level for IOVA range
357786
  *
357786
@@ -781,28 +790,37 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
357786
          */
357786
         entry_valid = read_cur | write_cur;
357786
 
357786
+        entry.target_as = &address_space_memory;
357786
+        entry.iova = iova & subpage_mask;
357786
+        entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
357786
+        entry.addr_mask = ~subpage_mask;
357786
+
357786
         if (vtd_is_last_slpte(slpte, level)) {
357786
-            entry.target_as = &address_space_memory;
357786
-            entry.iova = iova & subpage_mask;
357786
             /* NOTE: this is only meaningful if entry_valid == true */
357786
             entry.translated_addr = vtd_get_slpte_addr(slpte, aw);
357786
-            entry.addr_mask = ~subpage_mask;
357786
-            entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
357786
             if (!entry_valid && !notify_unmap) {
357786
                 trace_vtd_page_walk_skip_perm(iova, iova_next);
357786
                 goto next;
357786
             }
357786
-            trace_vtd_page_walk_one(level, entry.iova, entry.translated_addr,
357786
-                                    entry.addr_mask, entry.perm);
357786
-            if (hook_fn) {
357786
-                ret = hook_fn(&entry, private);
357786
-                if (ret < 0) {
357786
-                    return ret;
357786
-                }
357786
+            ret = vtd_page_walk_one(&entry, level, hook_fn, private);
357786
+            if (ret < 0) {
357786
+                return ret;
357786
             }
357786
         } else {
357786
             if (!entry_valid) {
357786
-                trace_vtd_page_walk_skip_perm(iova, iova_next);
357786
+                if (notify_unmap) {
357786
+                    /*
357786
+                     * The whole entry is invalid; unmap it all.
357786
+                     * Translated address is meaningless, zero it.
357786
+                     */
357786
+                    entry.translated_addr = 0x0;
357786
+                    ret = vtd_page_walk_one(&entry, level, hook_fn, private);
357786
+                    if (ret < 0) {
357786
+                        return ret;
357786
+                    }
357786
+                } else {
357786
+                    trace_vtd_page_walk_skip_perm(iova, iova_next);
357786
+                }
357786
                 goto next;
357786
             }
357786
             ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, aw), iova,
357786
-- 
357786
1.8.3.1
357786