26ba25
From a4d88508d1d4f8995d15c1ed822104e46c7b9624 Mon Sep 17 00:00:00 2001
26ba25
From: Peter Xu <peterx@redhat.com>
26ba25
Date: Fri, 12 Oct 2018 07:58:40 +0100
26ba25
Subject: [PATCH 10/17] intel-iommu: add iommu lock
26ba25
26ba25
RH-Author: Peter Xu <peterx@redhat.com>
26ba25
Message-id: <20181012075846.25449-4-peterx@redhat.com>
26ba25
Patchwork-id: 82675
26ba25
O-Subject: [RHEL-8 qemu-kvm PATCH 3/9] intel-iommu: add iommu lock
26ba25
Bugzilla: 1450712
26ba25
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
26ba25
RH-Acked-by: Xiao Wang <jasowang@redhat.com>
26ba25
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
26ba25
26ba25
SECURITY IMPLICATION: this patch fixes a potential race when multiple
26ba25
threads access the IOMMU IOTLB cache.
26ba25
26ba25
Add a per-iommu big lock to protect IOMMU status.  Currently the only
26ba25
thing to be protected is the IOTLB/context cache, since that can be
26ba25
accessed even without BQL, e.g., in IO dataplane.
26ba25
26ba25
Note that we don't need to protect device page tables since that's fully
26ba25
controlled by the guest kernel.  However there is still possibility that
26ba25
malicious drivers will program the device to not obey the rule.  In that
26ba25
case QEMU can't really do anything useful, instead the guest itself will
26ba25
be responsible for all uncertainties.
26ba25
26ba25
CC: QEMU Stable <qemu-stable@nongnu.org>
26ba25
Reported-by: Fam Zheng <famz@redhat.com>
26ba25
Signed-off-by: Peter Xu <peterx@redhat.com>
26ba25
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
26ba25
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
26ba25
(cherry picked from commit 1d9efa73e12ddf361ea997c2d532cc4afa6674d1)
26ba25
Signed-off-by: Peter Xu <peterx@redhat.com>
26ba25
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
26ba25
---
26ba25
 hw/i386/intel_iommu.c         | 56 ++++++++++++++++++++++++++++++++++++-------
26ba25
 include/hw/i386/intel_iommu.h |  6 +++++
26ba25
 2 files changed, 53 insertions(+), 9 deletions(-)
26ba25
26ba25
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
26ba25
index 3df9045..8d4069d 100644
26ba25
--- a/hw/i386/intel_iommu.c
26ba25
+++ b/hw/i386/intel_iommu.c
26ba25
@@ -128,6 +128,16 @@ static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
26ba25
     return new_val;
26ba25
 }
26ba25
 
26ba25
+static inline void vtd_iommu_lock(IntelIOMMUState *s)
26ba25
+{
26ba25
+    qemu_mutex_lock(&s->iommu_lock);
26ba25
+}
26ba25
+
26ba25
+static inline void vtd_iommu_unlock(IntelIOMMUState *s)
26ba25
+{
26ba25
+    qemu_mutex_unlock(&s->iommu_lock);
26ba25
+}
26ba25
+
26ba25
 /* GHashTable functions */
26ba25
 static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
26ba25
 {
26ba25
@@ -172,9 +182,9 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
26ba25
 }
26ba25
 
26ba25
 /* Reset all the gen of VTDAddressSpace to zero and set the gen of
26ba25
- * IntelIOMMUState to 1.
26ba25
+ * IntelIOMMUState to 1.  Must be called with IOMMU lock held.
26ba25
  */
26ba25
-static void vtd_reset_context_cache(IntelIOMMUState *s)
26ba25
+static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
26ba25
 {
26ba25
     VTDAddressSpace *vtd_as;
26ba25
     VTDBus *vtd_bus;
26ba25
@@ -197,12 +207,20 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
26ba25
     s->context_cache_gen = 1;
26ba25
 }
26ba25
 
26ba25
-static void vtd_reset_iotlb(IntelIOMMUState *s)
26ba25
+/* Must be called with IOMMU lock held. */
26ba25
+static void vtd_reset_iotlb_locked(IntelIOMMUState *s)
26ba25
 {
26ba25
     assert(s->iotlb);
26ba25
     g_hash_table_remove_all(s->iotlb);
26ba25
 }
26ba25
 
26ba25
+static void vtd_reset_iotlb(IntelIOMMUState *s)
26ba25
+{
26ba25
+    vtd_iommu_lock(s);
26ba25
+    vtd_reset_iotlb_locked(s);
26ba25
+    vtd_iommu_unlock(s);
26ba25
+}
26ba25
+
26ba25
 static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
26ba25
                                   uint32_t level)
26ba25
 {
26ba25
@@ -215,6 +233,7 @@ static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
26ba25
     return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
26ba25
 }
26ba25
 
26ba25
+/* Must be called with IOMMU lock held */
26ba25
 static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
26ba25
                                        hwaddr addr)
26ba25
 {
26ba25
@@ -235,6 +254,7 @@ out:
26ba25
     return entry;
26ba25
 }
26ba25
 
26ba25
+/* Must be with IOMMU lock held */
26ba25
 static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
26ba25
                              uint16_t domain_id, hwaddr addr, uint64_t slpte,
26ba25
                              uint8_t access_flags, uint32_t level)
26ba25
@@ -246,7 +266,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
26ba25
     trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
26ba25
     if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
26ba25
         trace_vtd_iotlb_reset("iotlb exceeds size limit");
26ba25
-        vtd_reset_iotlb(s);
26ba25
+        vtd_reset_iotlb_locked(s);
26ba25
     }
26ba25
 
26ba25
     entry->gfn = gfn;
26ba25
@@ -1106,7 +1126,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
26ba25
     IntelIOMMUState *s = vtd_as->iommu_state;
26ba25
     VTDContextEntry ce;
26ba25
     uint8_t bus_num = pci_bus_num(bus);
26ba25
-    VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
26ba25
+    VTDContextCacheEntry *cc_entry;
26ba25
     uint64_t slpte, page_mask;
26ba25
     uint32_t level;
26ba25
     uint16_t source_id = vtd_make_source_id(bus_num, devfn);
26ba25
@@ -1123,6 +1143,10 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
26ba25
      */
26ba25
     assert(!vtd_is_interrupt_addr(addr));
26ba25
 
26ba25
+    vtd_iommu_lock(s);
26ba25
+
26ba25
+    cc_entry = &vtd_as->context_cache_entry;
26ba25
+
26ba25
     /* Try to fetch slpte form IOTLB */
26ba25
     iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
26ba25
     if (iotlb_entry) {
26ba25
@@ -1182,7 +1206,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
26ba25
          * IOMMU region can be swapped back.
26ba25
          */
26ba25
         vtd_pt_enable_fast_path(s, source_id);
26ba25
-
26ba25
+        vtd_iommu_unlock(s);
26ba25
         return true;
26ba25
     }
26ba25
 
26ba25
@@ -1203,6 +1227,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
26ba25
     vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
26ba25
                      access_flags, level);
26ba25
 out:
26ba25
+    vtd_iommu_unlock(s);
26ba25
     entry->iova = addr & page_mask;
26ba25
     entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask;
26ba25
     entry->addr_mask = ~page_mask;
26ba25
@@ -1210,6 +1235,7 @@ out:
26ba25
     return true;
26ba25
 
26ba25
 error:
26ba25
+    vtd_iommu_unlock(s);
26ba25
     entry->iova = 0;
26ba25
     entry->translated_addr = 0;
26ba25
     entry->addr_mask = 0;
26ba25
@@ -1258,10 +1284,13 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
26ba25
 static void vtd_context_global_invalidate(IntelIOMMUState *s)
26ba25
 {
26ba25
     trace_vtd_inv_desc_cc_global();
26ba25
+    /* Protects context cache */
26ba25
+    vtd_iommu_lock(s);
26ba25
     s->context_cache_gen++;
26ba25
     if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
26ba25
-        vtd_reset_context_cache(s);
26ba25
+        vtd_reset_context_cache_locked(s);
26ba25
     }
26ba25
+    vtd_iommu_unlock(s);
26ba25
     vtd_switch_address_space_all(s);
26ba25
     /*
26ba25
      * From VT-d spec 6.5.2.1, a global context entry invalidation
26ba25
@@ -1313,7 +1342,9 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
26ba25
             if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
26ba25
                 trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
26ba25
                                              VTD_PCI_FUNC(devfn_it));
26ba25
+                vtd_iommu_lock(s);
26ba25
                 vtd_as->context_cache_entry.context_cache_gen = 0;
26ba25
+                vtd_iommu_unlock(s);
26ba25
                 /*
26ba25
                  * Do switch address space when needed, in case if the
26ba25
                  * device passthrough bit is switched.
26ba25
@@ -1377,8 +1408,10 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
26ba25
 
26ba25
     trace_vtd_inv_desc_iotlb_domain(domain_id);
26ba25
 
26ba25
+    vtd_iommu_lock(s);
26ba25
     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
26ba25
                                 &domain_id);
26ba25
+    vtd_iommu_unlock(s);
26ba25
 
26ba25
     QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
26ba25
         if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
26ba25
@@ -1426,7 +1459,9 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
26ba25
     info.domain_id = domain_id;
26ba25
     info.addr = addr;
26ba25
     info.mask = ~((1 << am) - 1);
26ba25
+    vtd_iommu_lock(s);
26ba25
     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info;;
26ba25
+    vtd_iommu_unlock(s);
26ba25
     vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
26ba25
 }
26ba25
 
26ba25
@@ -2929,8 +2964,10 @@ static void vtd_init(IntelIOMMUState *s)
26ba25
         s->cap |= VTD_CAP_CM;
26ba25
     }
26ba25
 
26ba25
-    vtd_reset_context_cache(s);
26ba25
-    vtd_reset_iotlb(s);
26ba25
+    vtd_iommu_lock(s);
26ba25
+    vtd_reset_context_cache_locked(s);
26ba25
+    vtd_reset_iotlb_locked(s);
26ba25
+    vtd_iommu_unlock(s);
26ba25
 
26ba25
     /* Define registers with default values and bit semantics */
26ba25
     vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0);
26ba25
@@ -3070,6 +3107,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
26ba25
     }
26ba25
 
26ba25
     QLIST_INIT(&s->vtd_as_with_notifiers);
26ba25
+    qemu_mutex_init(&s->iommu_lock);
26ba25
     memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
26ba25
     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
26ba25
                           "intel_iommu", DMAR_REG_SIZE);
26ba25
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
26ba25
index 032e33b..016e74b 100644
26ba25
--- a/include/hw/i386/intel_iommu.h
26ba25
+++ b/include/hw/i386/intel_iommu.h
26ba25
@@ -300,6 +300,12 @@ struct IntelIOMMUState {
26ba25
     OnOffAuto intr_eim;             /* Toggle for EIM cabability */
26ba25
     bool buggy_eim;                 /* Force buggy EIM unless eim=off */
26ba25
     uint8_t aw_bits;                /* Host/IOVA address width (in bits) */
26ba25
+
26ba25
+    /*
26ba25
+     * Protects IOMMU states in general.  Currently it protects the
26ba25
+     * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace.
26ba25
+     */
26ba25
+    QemuMutex iommu_lock;
26ba25
 };
26ba25
 
26ba25
 /* Find the VTD Address space associated with the given bus pointer,
26ba25
-- 
26ba25
1.8.3.1
26ba25