ae23c9
From a4d88508d1d4f8995d15c1ed822104e46c7b9624 Mon Sep 17 00:00:00 2001
ae23c9
From: Peter Xu <peterx@redhat.com>
ae23c9
Date: Fri, 12 Oct 2018 07:58:40 +0100
ae23c9
Subject: [PATCH 10/17] intel-iommu: add iommu lock
ae23c9
ae23c9
RH-Author: Peter Xu <peterx@redhat.com>
ae23c9
Message-id: <20181012075846.25449-4-peterx@redhat.com>
ae23c9
Patchwork-id: 82675
ae23c9
O-Subject: [RHEL-8 qemu-kvm PATCH 3/9] intel-iommu: add iommu lock
ae23c9
Bugzilla: 1450712
ae23c9
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
ae23c9
RH-Acked-by: Xiao Wang <jasowang@redhat.com>
ae23c9
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
ae23c9
ae23c9
SECURITY IMPLICATION: this patch fixes a potential race when multiple
ae23c9
threads access the IOMMU IOTLB cache.
ae23c9
ae23c9
Add a per-iommu big lock to protect IOMMU status.  Currently the only
ae23c9
thing to be protected is the IOTLB/context cache, since that can be
ae23c9
accessed even without BQL, e.g., in IO dataplane.
ae23c9
ae23c9
Note that we don't need to protect device page tables since that's fully
ae23c9
controlled by the guest kernel.  However there is still possibility that
ae23c9
malicious drivers will program the device to not obey the rule.  In that
ae23c9
case QEMU can't really do anything useful, instead the guest itself will
ae23c9
be responsible for all uncertainties.
ae23c9
ae23c9
CC: QEMU Stable <qemu-stable@nongnu.org>
ae23c9
Reported-by: Fam Zheng <famz@redhat.com>
ae23c9
Signed-off-by: Peter Xu <peterx@redhat.com>
ae23c9
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
ae23c9
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
ae23c9
(cherry picked from commit 1d9efa73e12ddf361ea997c2d532cc4afa6674d1)
ae23c9
Signed-off-by: Peter Xu <peterx@redhat.com>
ae23c9
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ae23c9
---
ae23c9
 hw/i386/intel_iommu.c         | 56 ++++++++++++++++++++++++++++++++++++-------
ae23c9
 include/hw/i386/intel_iommu.h |  6 +++++
ae23c9
 2 files changed, 53 insertions(+), 9 deletions(-)
ae23c9
ae23c9
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
ae23c9
index 3df9045..8d4069d 100644
ae23c9
--- a/hw/i386/intel_iommu.c
ae23c9
+++ b/hw/i386/intel_iommu.c
ae23c9
@@ -128,6 +128,16 @@ static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
ae23c9
     return new_val;
ae23c9
 }
ae23c9
 
ae23c9
+static inline void vtd_iommu_lock(IntelIOMMUState *s)
ae23c9
+{
ae23c9
+    qemu_mutex_lock(&s->iommu_lock);
ae23c9
+}
ae23c9
+
ae23c9
+static inline void vtd_iommu_unlock(IntelIOMMUState *s)
ae23c9
+{
ae23c9
+    qemu_mutex_unlock(&s->iommu_lock);
ae23c9
+}
ae23c9
+
ae23c9
 /* GHashTable functions */
ae23c9
 static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
ae23c9
 {
ae23c9
@@ -172,9 +182,9 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
ae23c9
 }
ae23c9
 
ae23c9
 /* Reset all the gen of VTDAddressSpace to zero and set the gen of
ae23c9
- * IntelIOMMUState to 1.
ae23c9
+ * IntelIOMMUState to 1.  Must be called with IOMMU lock held.
ae23c9
  */
ae23c9
-static void vtd_reset_context_cache(IntelIOMMUState *s)
ae23c9
+static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
ae23c9
 {
ae23c9
     VTDAddressSpace *vtd_as;
ae23c9
     VTDBus *vtd_bus;
ae23c9
@@ -197,12 +207,20 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
ae23c9
     s->context_cache_gen = 1;
ae23c9
 }
ae23c9
 
ae23c9
-static void vtd_reset_iotlb(IntelIOMMUState *s)
ae23c9
+/* Must be called with IOMMU lock held. */
ae23c9
+static void vtd_reset_iotlb_locked(IntelIOMMUState *s)
ae23c9
 {
ae23c9
     assert(s->iotlb);
ae23c9
     g_hash_table_remove_all(s->iotlb);
ae23c9
 }
ae23c9
 
ae23c9
+static void vtd_reset_iotlb(IntelIOMMUState *s)
ae23c9
+{
ae23c9
+    vtd_iommu_lock(s);
ae23c9
+    vtd_reset_iotlb_locked(s);
ae23c9
+    vtd_iommu_unlock(s);
ae23c9
+}
ae23c9
+
ae23c9
 static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
ae23c9
                                   uint32_t level)
ae23c9
 {
ae23c9
@@ -215,6 +233,7 @@ static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
ae23c9
     return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
ae23c9
 }
ae23c9
 
ae23c9
+/* Must be called with IOMMU lock held */
ae23c9
 static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
ae23c9
                                        hwaddr addr)
ae23c9
 {
ae23c9
@@ -235,6 +254,7 @@ out:
ae23c9
     return entry;
ae23c9
 }
ae23c9
 
ae23c9
+/* Must be with IOMMU lock held */
ae23c9
 static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
ae23c9
                              uint16_t domain_id, hwaddr addr, uint64_t slpte,
ae23c9
                              uint8_t access_flags, uint32_t level)
ae23c9
@@ -246,7 +266,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
ae23c9
     trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
ae23c9
     if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
ae23c9
         trace_vtd_iotlb_reset("iotlb exceeds size limit");
ae23c9
-        vtd_reset_iotlb(s);
ae23c9
+        vtd_reset_iotlb_locked(s);
ae23c9
     }
ae23c9
 
ae23c9
     entry->gfn = gfn;
ae23c9
@@ -1106,7 +1126,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
ae23c9
     IntelIOMMUState *s = vtd_as->iommu_state;
ae23c9
     VTDContextEntry ce;
ae23c9
     uint8_t bus_num = pci_bus_num(bus);
ae23c9
-    VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
ae23c9
+    VTDContextCacheEntry *cc_entry;
ae23c9
     uint64_t slpte, page_mask;
ae23c9
     uint32_t level;
ae23c9
     uint16_t source_id = vtd_make_source_id(bus_num, devfn);
ae23c9
@@ -1123,6 +1143,10 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
ae23c9
      */
ae23c9
     assert(!vtd_is_interrupt_addr(addr));
ae23c9
 
ae23c9
+    vtd_iommu_lock(s);
ae23c9
+
ae23c9
+    cc_entry = &vtd_as->context_cache_entry;
ae23c9
+
ae23c9
     /* Try to fetch slpte form IOTLB */
ae23c9
     iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
ae23c9
     if (iotlb_entry) {
ae23c9
@@ -1182,7 +1206,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
ae23c9
          * IOMMU region can be swapped back.
ae23c9
          */
ae23c9
         vtd_pt_enable_fast_path(s, source_id);
ae23c9
-
ae23c9
+        vtd_iommu_unlock(s);
ae23c9
         return true;
ae23c9
     }
ae23c9
 
ae23c9
@@ -1203,6 +1227,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
ae23c9
     vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
ae23c9
                      access_flags, level);
ae23c9
 out:
ae23c9
+    vtd_iommu_unlock(s);
ae23c9
     entry->iova = addr & page_mask;
ae23c9
     entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask;
ae23c9
     entry->addr_mask = ~page_mask;
ae23c9
@@ -1210,6 +1235,7 @@ out:
ae23c9
     return true;
ae23c9
 
ae23c9
 error:
ae23c9
+    vtd_iommu_unlock(s);
ae23c9
     entry->iova = 0;
ae23c9
     entry->translated_addr = 0;
ae23c9
     entry->addr_mask = 0;
ae23c9
@@ -1258,10 +1284,13 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
ae23c9
 static void vtd_context_global_invalidate(IntelIOMMUState *s)
ae23c9
 {
ae23c9
     trace_vtd_inv_desc_cc_global();
ae23c9
+    /* Protects context cache */
ae23c9
+    vtd_iommu_lock(s);
ae23c9
     s->context_cache_gen++;
ae23c9
     if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
ae23c9
-        vtd_reset_context_cache(s);
ae23c9
+        vtd_reset_context_cache_locked(s);
ae23c9
     }
ae23c9
+    vtd_iommu_unlock(s);
ae23c9
     vtd_switch_address_space_all(s);
ae23c9
     /*
ae23c9
      * From VT-d spec 6.5.2.1, a global context entry invalidation
ae23c9
@@ -1313,7 +1342,9 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
ae23c9
             if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
ae23c9
                 trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
ae23c9
                                              VTD_PCI_FUNC(devfn_it));
ae23c9
+                vtd_iommu_lock(s);
ae23c9
                 vtd_as->context_cache_entry.context_cache_gen = 0;
ae23c9
+                vtd_iommu_unlock(s);
ae23c9
                 /*
ae23c9
                  * Do switch address space when needed, in case if the
ae23c9
                  * device passthrough bit is switched.
ae23c9
@@ -1377,8 +1408,10 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
ae23c9
 
ae23c9
     trace_vtd_inv_desc_iotlb_domain(domain_id);
ae23c9
 
ae23c9
+    vtd_iommu_lock(s);
ae23c9
     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
ae23c9
                                 &domain_id);
ae23c9
+    vtd_iommu_unlock(s);
ae23c9
 
ae23c9
     QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
ae23c9
         if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
ae23c9
@@ -1426,7 +1459,9 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
ae23c9
     info.domain_id = domain_id;
ae23c9
     info.addr = addr;
ae23c9
     info.mask = ~((1 << am) - 1);
ae23c9
+    vtd_iommu_lock(s);
ae23c9
     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info;;
ae23c9
+    vtd_iommu_unlock(s);
ae23c9
     vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
ae23c9
 }
ae23c9
 
ae23c9
@@ -2929,8 +2964,10 @@ static void vtd_init(IntelIOMMUState *s)
ae23c9
         s->cap |= VTD_CAP_CM;
ae23c9
     }
ae23c9
 
ae23c9
-    vtd_reset_context_cache(s);
ae23c9
-    vtd_reset_iotlb(s);
ae23c9
+    vtd_iommu_lock(s);
ae23c9
+    vtd_reset_context_cache_locked(s);
ae23c9
+    vtd_reset_iotlb_locked(s);
ae23c9
+    vtd_iommu_unlock(s);
ae23c9
 
ae23c9
     /* Define registers with default values and bit semantics */
ae23c9
     vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0);
ae23c9
@@ -3070,6 +3107,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
ae23c9
     }
ae23c9
 
ae23c9
     QLIST_INIT(&s->vtd_as_with_notifiers);
ae23c9
+    qemu_mutex_init(&s->iommu_lock);
ae23c9
     memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
ae23c9
     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
ae23c9
                           "intel_iommu", DMAR_REG_SIZE);
ae23c9
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
ae23c9
index 032e33b..016e74b 100644
ae23c9
--- a/include/hw/i386/intel_iommu.h
ae23c9
+++ b/include/hw/i386/intel_iommu.h
ae23c9
@@ -300,6 +300,12 @@ struct IntelIOMMUState {
ae23c9
     OnOffAuto intr_eim;             /* Toggle for EIM cabability */
ae23c9
     bool buggy_eim;                 /* Force buggy EIM unless eim=off */
ae23c9
     uint8_t aw_bits;                /* Host/IOVA address width (in bits) */
ae23c9
+
ae23c9
+    /*
ae23c9
+     * Protects IOMMU states in general.  Currently it protects the
ae23c9
+     * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace.
ae23c9
+     */
ae23c9
+    QemuMutex iommu_lock;
ae23c9
 };
ae23c9
 
ae23c9
 /* Find the VTD Address space associated with the given bus pointer,
ae23c9
-- 
ae23c9
1.8.3.1
ae23c9