Blame SOURCES/kvm-intel-iommu-add-iommu-lock.patch

1bdc94
From c865a8af8574b64c06b9cbdf080d93e75dd8019c Mon Sep 17 00:00:00 2001
1bdc94
From: Peter Xu <peterx@redhat.com>
1bdc94
Date: Mon, 3 Sep 2018 04:52:35 +0200
1bdc94
Subject: [PATCH 20/29] intel-iommu: add iommu lock
1bdc94
1bdc94
RH-Author: Peter Xu <peterx@redhat.com>
1bdc94
Message-id: <20180903045241.6456-4-peterx@redhat.com>
1bdc94
Patchwork-id: 82022
1bdc94
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 3/9] intel-iommu: add iommu lock
1bdc94
Bugzilla: 1623859
1bdc94
RH-Acked-by: Xiao Wang <jasowang@redhat.com>
1bdc94
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
1bdc94
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
1bdc94
1bdc94
SECURITY IMPLICATION: this patch fixes a potential race when multiple
1bdc94
threads access the IOMMU IOTLB cache.
1bdc94
1bdc94
Add a per-iommu big lock to protect IOMMU status.  Currently the only
1bdc94
thing to be protected is the IOTLB/context cache, since that can be
1bdc94
accessed even without BQL, e.g., in IO dataplane.
1bdc94
1bdc94
Note that we don't need to protect device page tables since that's fully
1bdc94
controlled by the guest kernel.  However there is still possibility that
1bdc94
malicious drivers will program the device to not obey the rule.  In that
1bdc94
case QEMU can't really do anything useful, instead the guest itself will
1bdc94
be responsible for all uncertainties.
1bdc94
1bdc94
CC: QEMU Stable <qemu-stable@nongnu.org>
1bdc94
Reported-by: Fam Zheng <famz@redhat.com>
1bdc94
Signed-off-by: Peter Xu <peterx@redhat.com>
1bdc94
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
1bdc94
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
1bdc94
(cherry picked from commit 1d9efa73e12ddf361ea997c2d532cc4afa6674d1)
1bdc94
Signed-off-by: Peter Xu <peterx@redhat.com>
1bdc94
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
1bdc94
---
1bdc94
 hw/i386/intel_iommu.c         | 56 ++++++++++++++++++++++++++++++++++++-------
1bdc94
 include/hw/i386/intel_iommu.h |  6 +++++
1bdc94
 2 files changed, 53 insertions(+), 9 deletions(-)
1bdc94
1bdc94
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
1bdc94
index 3df9045..8d4069d 100644
1bdc94
--- a/hw/i386/intel_iommu.c
1bdc94
+++ b/hw/i386/intel_iommu.c
1bdc94
@@ -128,6 +128,16 @@ static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
1bdc94
     return new_val;
1bdc94
 }
1bdc94
 
1bdc94
+static inline void vtd_iommu_lock(IntelIOMMUState *s)
1bdc94
+{
1bdc94
+    qemu_mutex_lock(&s->iommu_lock);
1bdc94
+}
1bdc94
+
1bdc94
+static inline void vtd_iommu_unlock(IntelIOMMUState *s)
1bdc94
+{
1bdc94
+    qemu_mutex_unlock(&s->iommu_lock);
1bdc94
+}
1bdc94
+
1bdc94
 /* GHashTable functions */
1bdc94
 static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
1bdc94
 {
1bdc94
@@ -172,9 +182,9 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
1bdc94
 }
1bdc94
 
1bdc94
 /* Reset all the gen of VTDAddressSpace to zero and set the gen of
1bdc94
- * IntelIOMMUState to 1.
1bdc94
+ * IntelIOMMUState to 1.  Must be called with IOMMU lock held.
1bdc94
  */
1bdc94
-static void vtd_reset_context_cache(IntelIOMMUState *s)
1bdc94
+static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
1bdc94
 {
1bdc94
     VTDAddressSpace *vtd_as;
1bdc94
     VTDBus *vtd_bus;
1bdc94
@@ -197,12 +207,20 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
1bdc94
     s->context_cache_gen = 1;
1bdc94
 }
1bdc94
 
1bdc94
-static void vtd_reset_iotlb(IntelIOMMUState *s)
1bdc94
+/* Must be called with IOMMU lock held. */
1bdc94
+static void vtd_reset_iotlb_locked(IntelIOMMUState *s)
1bdc94
 {
1bdc94
     assert(s->iotlb);
1bdc94
     g_hash_table_remove_all(s->iotlb);
1bdc94
 }
1bdc94
 
1bdc94
+static void vtd_reset_iotlb(IntelIOMMUState *s)
1bdc94
+{
1bdc94
+    vtd_iommu_lock(s);
1bdc94
+    vtd_reset_iotlb_locked(s);
1bdc94
+    vtd_iommu_unlock(s);
1bdc94
+}
1bdc94
+
1bdc94
 static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
1bdc94
                                   uint32_t level)
1bdc94
 {
1bdc94
@@ -215,6 +233,7 @@ static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
1bdc94
     return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
1bdc94
 }
1bdc94
 
1bdc94
+/* Must be called with IOMMU lock held */
1bdc94
 static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
1bdc94
                                        hwaddr addr)
1bdc94
 {
1bdc94
@@ -235,6 +254,7 @@ out:
1bdc94
     return entry;
1bdc94
 }
1bdc94
 
1bdc94
+/* Must be with IOMMU lock held */
1bdc94
 static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
1bdc94
                              uint16_t domain_id, hwaddr addr, uint64_t slpte,
1bdc94
                              uint8_t access_flags, uint32_t level)
1bdc94
@@ -246,7 +266,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
1bdc94
     trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
1bdc94
     if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
1bdc94
         trace_vtd_iotlb_reset("iotlb exceeds size limit");
1bdc94
-        vtd_reset_iotlb(s);
1bdc94
+        vtd_reset_iotlb_locked(s);
1bdc94
     }
1bdc94
 
1bdc94
     entry->gfn = gfn;
1bdc94
@@ -1106,7 +1126,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
1bdc94
     IntelIOMMUState *s = vtd_as->iommu_state;
1bdc94
     VTDContextEntry ce;
1bdc94
     uint8_t bus_num = pci_bus_num(bus);
1bdc94
-    VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
1bdc94
+    VTDContextCacheEntry *cc_entry;
1bdc94
     uint64_t slpte, page_mask;
1bdc94
     uint32_t level;
1bdc94
     uint16_t source_id = vtd_make_source_id(bus_num, devfn);
1bdc94
@@ -1123,6 +1143,10 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
1bdc94
      */
1bdc94
     assert(!vtd_is_interrupt_addr(addr));
1bdc94
 
1bdc94
+    vtd_iommu_lock(s);
1bdc94
+
1bdc94
+    cc_entry = &vtd_as->context_cache_entry;
1bdc94
+
1bdc94
     /* Try to fetch slpte form IOTLB */
1bdc94
     iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
1bdc94
     if (iotlb_entry) {
1bdc94
@@ -1182,7 +1206,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
1bdc94
          * IOMMU region can be swapped back.
1bdc94
          */
1bdc94
         vtd_pt_enable_fast_path(s, source_id);
1bdc94
-
1bdc94
+        vtd_iommu_unlock(s);
1bdc94
         return true;
1bdc94
     }
1bdc94
 
1bdc94
@@ -1203,6 +1227,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
1bdc94
     vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
1bdc94
                      access_flags, level);
1bdc94
 out:
1bdc94
+    vtd_iommu_unlock(s);
1bdc94
     entry->iova = addr & page_mask;
1bdc94
     entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask;
1bdc94
     entry->addr_mask = ~page_mask;
1bdc94
@@ -1210,6 +1235,7 @@ out:
1bdc94
     return true;
1bdc94
 
1bdc94
 error:
1bdc94
+    vtd_iommu_unlock(s);
1bdc94
     entry->iova = 0;
1bdc94
     entry->translated_addr = 0;
1bdc94
     entry->addr_mask = 0;
1bdc94
@@ -1258,10 +1284,13 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
1bdc94
 static void vtd_context_global_invalidate(IntelIOMMUState *s)
1bdc94
 {
1bdc94
     trace_vtd_inv_desc_cc_global();
1bdc94
+    /* Protects context cache */
1bdc94
+    vtd_iommu_lock(s);
1bdc94
     s->context_cache_gen++;
1bdc94
     if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
1bdc94
-        vtd_reset_context_cache(s);
1bdc94
+        vtd_reset_context_cache_locked(s);
1bdc94
     }
1bdc94
+    vtd_iommu_unlock(s);
1bdc94
     vtd_switch_address_space_all(s);
1bdc94
     /*
1bdc94
      * From VT-d spec 6.5.2.1, a global context entry invalidation
1bdc94
@@ -1313,7 +1342,9 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
1bdc94
             if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
1bdc94
                 trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
1bdc94
                                              VTD_PCI_FUNC(devfn_it));
1bdc94
+                vtd_iommu_lock(s);
1bdc94
                 vtd_as->context_cache_entry.context_cache_gen = 0;
1bdc94
+                vtd_iommu_unlock(s);
1bdc94
                 /*
1bdc94
                  * Do switch address space when needed, in case if the
1bdc94
                  * device passthrough bit is switched.
1bdc94
@@ -1377,8 +1408,10 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
1bdc94
 
1bdc94
     trace_vtd_inv_desc_iotlb_domain(domain_id);
1bdc94
 
1bdc94
+    vtd_iommu_lock(s);
1bdc94
     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
1bdc94
                                 &domain_id);
1bdc94
+    vtd_iommu_unlock(s);
1bdc94
 
1bdc94
     QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
1bdc94
         if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
1bdc94
@@ -1426,7 +1459,9 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
1bdc94
     info.domain_id = domain_id;
1bdc94
     info.addr = addr;
1bdc94
     info.mask = ~((1 << am) - 1);
1bdc94
+    vtd_iommu_lock(s);
1bdc94
     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info;;
1bdc94
+    vtd_iommu_unlock(s);
1bdc94
     vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
1bdc94
 }
1bdc94
 
1bdc94
@@ -2929,8 +2964,10 @@ static void vtd_init(IntelIOMMUState *s)
1bdc94
         s->cap |= VTD_CAP_CM;
1bdc94
     }
1bdc94
 
1bdc94
-    vtd_reset_context_cache(s);
1bdc94
-    vtd_reset_iotlb(s);
1bdc94
+    vtd_iommu_lock(s);
1bdc94
+    vtd_reset_context_cache_locked(s);
1bdc94
+    vtd_reset_iotlb_locked(s);
1bdc94
+    vtd_iommu_unlock(s);
1bdc94
 
1bdc94
     /* Define registers with default values and bit semantics */
1bdc94
     vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0);
1bdc94
@@ -3070,6 +3107,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
1bdc94
     }
1bdc94
 
1bdc94
     QLIST_INIT(&s->vtd_as_with_notifiers);
1bdc94
+    qemu_mutex_init(&s->iommu_lock);
1bdc94
     memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
1bdc94
     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
1bdc94
                           "intel_iommu", DMAR_REG_SIZE);
1bdc94
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
1bdc94
index 032e33b..016e74b 100644
1bdc94
--- a/include/hw/i386/intel_iommu.h
1bdc94
+++ b/include/hw/i386/intel_iommu.h
1bdc94
@@ -300,6 +300,12 @@ struct IntelIOMMUState {
1bdc94
     OnOffAuto intr_eim;             /* Toggle for EIM cabability */
1bdc94
     bool buggy_eim;                 /* Force buggy EIM unless eim=off */
1bdc94
     uint8_t aw_bits;                /* Host/IOVA address width (in bits) */
1bdc94
+
1bdc94
+    /*
1bdc94
+     * Protects IOMMU states in general.  Currently it protects the
1bdc94
+     * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace.
1bdc94
+     */
1bdc94
+    QemuMutex iommu_lock;
1bdc94
 };
1bdc94
 
1bdc94
 /* Find the VTD Address space associated with the given bus pointer,
1bdc94
-- 
1bdc94
1.8.3.1
1bdc94