|
|
1bdc94 |
From c865a8af8574b64c06b9cbdf080d93e75dd8019c Mon Sep 17 00:00:00 2001
|
|
|
1bdc94 |
From: Peter Xu <peterx@redhat.com>
|
|
|
1bdc94 |
Date: Mon, 3 Sep 2018 04:52:35 +0200
|
|
|
1bdc94 |
Subject: [PATCH 20/29] intel-iommu: add iommu lock
|
|
|
1bdc94 |
|
|
|
1bdc94 |
RH-Author: Peter Xu <peterx@redhat.com>
|
|
|
1bdc94 |
Message-id: <20180903045241.6456-4-peterx@redhat.com>
|
|
|
1bdc94 |
Patchwork-id: 82022
|
|
|
1bdc94 |
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 3/9] intel-iommu: add iommu lock
|
|
|
1bdc94 |
Bugzilla: 1623859
|
|
|
1bdc94 |
RH-Acked-by: Xiao Wang <jasowang@redhat.com>
|
|
|
1bdc94 |
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
|
|
|
1bdc94 |
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
1bdc94 |
|
|
|
1bdc94 |
SECURITY IMPLICATION: this patch fixes a potential race when multiple
|
|
|
1bdc94 |
threads access the IOMMU IOTLB cache.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Add a per-iommu big lock to protect IOMMU status. Currently the only
|
|
|
1bdc94 |
thing to be protected is the IOTLB/context cache, since that can be
|
|
|
1bdc94 |
accessed even without BQL, e.g., in IO dataplane.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Note that we don't need to protect device page tables since that's fully
|
|
|
1bdc94 |
controlled by the guest kernel. However there is still possibility that
|
|
|
1bdc94 |
malicious drivers will program the device to not obey the rule. In that
|
|
|
1bdc94 |
case QEMU can't really do anything useful, instead the guest itself will
|
|
|
1bdc94 |
be responsible for all uncertainties.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
CC: QEMU Stable <qemu-stable@nongnu.org>
|
|
|
1bdc94 |
Reported-by: Fam Zheng <famz@redhat.com>
|
|
|
1bdc94 |
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
|
1bdc94 |
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
1bdc94 |
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
1bdc94 |
(cherry picked from commit 1d9efa73e12ddf361ea997c2d532cc4afa6674d1)
|
|
|
1bdc94 |
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
|
1bdc94 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
1bdc94 |
---
|
|
|
1bdc94 |
hw/i386/intel_iommu.c | 56 ++++++++++++++++++++++++++++++++++++-------
|
|
|
1bdc94 |
include/hw/i386/intel_iommu.h | 6 +++++
|
|
|
1bdc94 |
2 files changed, 53 insertions(+), 9 deletions(-)
|
|
|
1bdc94 |
|
|
|
1bdc94 |
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
|
|
|
1bdc94 |
index 3df9045..8d4069d 100644
|
|
|
1bdc94 |
--- a/hw/i386/intel_iommu.c
|
|
|
1bdc94 |
+++ b/hw/i386/intel_iommu.c
|
|
|
1bdc94 |
@@ -128,6 +128,16 @@ static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
|
|
|
1bdc94 |
return new_val;
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+static inline void vtd_iommu_lock(IntelIOMMUState *s)
|
|
|
1bdc94 |
+{
|
|
|
1bdc94 |
+ qemu_mutex_lock(&s->iommu_lock);
|
|
|
1bdc94 |
+}
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+static inline void vtd_iommu_unlock(IntelIOMMUState *s)
|
|
|
1bdc94 |
+{
|
|
|
1bdc94 |
+ qemu_mutex_unlock(&s->iommu_lock);
|
|
|
1bdc94 |
+}
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
/* GHashTable functions */
|
|
|
1bdc94 |
static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
|
|
|
1bdc94 |
{
|
|
|
1bdc94 |
@@ -172,9 +182,9 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
/* Reset all the gen of VTDAddressSpace to zero and set the gen of
|
|
|
1bdc94 |
- * IntelIOMMUState to 1.
|
|
|
1bdc94 |
+ * IntelIOMMUState to 1. Must be called with IOMMU lock held.
|
|
|
1bdc94 |
*/
|
|
|
1bdc94 |
-static void vtd_reset_context_cache(IntelIOMMUState *s)
|
|
|
1bdc94 |
+static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
|
|
|
1bdc94 |
{
|
|
|
1bdc94 |
VTDAddressSpace *vtd_as;
|
|
|
1bdc94 |
VTDBus *vtd_bus;
|
|
|
1bdc94 |
@@ -197,12 +207,20 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
|
|
|
1bdc94 |
s->context_cache_gen = 1;
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
-static void vtd_reset_iotlb(IntelIOMMUState *s)
|
|
|
1bdc94 |
+/* Must be called with IOMMU lock held. */
|
|
|
1bdc94 |
+static void vtd_reset_iotlb_locked(IntelIOMMUState *s)
|
|
|
1bdc94 |
{
|
|
|
1bdc94 |
assert(s->iotlb);
|
|
|
1bdc94 |
g_hash_table_remove_all(s->iotlb);
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+static void vtd_reset_iotlb(IntelIOMMUState *s)
|
|
|
1bdc94 |
+{
|
|
|
1bdc94 |
+ vtd_iommu_lock(s);
|
|
|
1bdc94 |
+ vtd_reset_iotlb_locked(s);
|
|
|
1bdc94 |
+ vtd_iommu_unlock(s);
|
|
|
1bdc94 |
+}
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
|
|
|
1bdc94 |
uint32_t level)
|
|
|
1bdc94 |
{
|
|
|
1bdc94 |
@@ -215,6 +233,7 @@ static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
|
|
|
1bdc94 |
return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+/* Must be called with IOMMU lock held */
|
|
|
1bdc94 |
static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
|
|
|
1bdc94 |
hwaddr addr)
|
|
|
1bdc94 |
{
|
|
|
1bdc94 |
@@ -235,6 +254,7 @@ out:
|
|
|
1bdc94 |
return entry;
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+/* Must be with IOMMU lock held */
|
|
|
1bdc94 |
static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
|
|
|
1bdc94 |
uint16_t domain_id, hwaddr addr, uint64_t slpte,
|
|
|
1bdc94 |
uint8_t access_flags, uint32_t level)
|
|
|
1bdc94 |
@@ -246,7 +266,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
|
|
|
1bdc94 |
trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
|
|
|
1bdc94 |
if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
|
|
|
1bdc94 |
trace_vtd_iotlb_reset("iotlb exceeds size limit");
|
|
|
1bdc94 |
- vtd_reset_iotlb(s);
|
|
|
1bdc94 |
+ vtd_reset_iotlb_locked(s);
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
entry->gfn = gfn;
|
|
|
1bdc94 |
@@ -1106,7 +1126,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
|
|
|
1bdc94 |
IntelIOMMUState *s = vtd_as->iommu_state;
|
|
|
1bdc94 |
VTDContextEntry ce;
|
|
|
1bdc94 |
uint8_t bus_num = pci_bus_num(bus);
|
|
|
1bdc94 |
- VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
|
|
|
1bdc94 |
+ VTDContextCacheEntry *cc_entry;
|
|
|
1bdc94 |
uint64_t slpte, page_mask;
|
|
|
1bdc94 |
uint32_t level;
|
|
|
1bdc94 |
uint16_t source_id = vtd_make_source_id(bus_num, devfn);
|
|
|
1bdc94 |
@@ -1123,6 +1143,10 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
|
|
|
1bdc94 |
*/
|
|
|
1bdc94 |
assert(!vtd_is_interrupt_addr(addr));
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+ vtd_iommu_lock(s);
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ cc_entry = &vtd_as->context_cache_entry;
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
/* Try to fetch slpte form IOTLB */
|
|
|
1bdc94 |
iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
|
|
|
1bdc94 |
if (iotlb_entry) {
|
|
|
1bdc94 |
@@ -1182,7 +1206,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
|
|
|
1bdc94 |
* IOMMU region can be swapped back.
|
|
|
1bdc94 |
*/
|
|
|
1bdc94 |
vtd_pt_enable_fast_path(s, source_id);
|
|
|
1bdc94 |
-
|
|
|
1bdc94 |
+ vtd_iommu_unlock(s);
|
|
|
1bdc94 |
return true;
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
@@ -1203,6 +1227,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
|
|
|
1bdc94 |
vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
|
|
|
1bdc94 |
access_flags, level);
|
|
|
1bdc94 |
out:
|
|
|
1bdc94 |
+ vtd_iommu_unlock(s);
|
|
|
1bdc94 |
entry->iova = addr & page_mask;
|
|
|
1bdc94 |
entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask;
|
|
|
1bdc94 |
entry->addr_mask = ~page_mask;
|
|
|
1bdc94 |
@@ -1210,6 +1235,7 @@ out:
|
|
|
1bdc94 |
return true;
|
|
|
1bdc94 |
|
|
|
1bdc94 |
error:
|
|
|
1bdc94 |
+ vtd_iommu_unlock(s);
|
|
|
1bdc94 |
entry->iova = 0;
|
|
|
1bdc94 |
entry->translated_addr = 0;
|
|
|
1bdc94 |
entry->addr_mask = 0;
|
|
|
1bdc94 |
@@ -1258,10 +1284,13 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
|
|
|
1bdc94 |
static void vtd_context_global_invalidate(IntelIOMMUState *s)
|
|
|
1bdc94 |
{
|
|
|
1bdc94 |
trace_vtd_inv_desc_cc_global();
|
|
|
1bdc94 |
+ /* Protects context cache */
|
|
|
1bdc94 |
+ vtd_iommu_lock(s);
|
|
|
1bdc94 |
s->context_cache_gen++;
|
|
|
1bdc94 |
if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
|
|
|
1bdc94 |
- vtd_reset_context_cache(s);
|
|
|
1bdc94 |
+ vtd_reset_context_cache_locked(s);
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
+ vtd_iommu_unlock(s);
|
|
|
1bdc94 |
vtd_switch_address_space_all(s);
|
|
|
1bdc94 |
/*
|
|
|
1bdc94 |
* From VT-d spec 6.5.2.1, a global context entry invalidation
|
|
|
1bdc94 |
@@ -1313,7 +1342,9 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
|
|
|
1bdc94 |
if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
|
|
|
1bdc94 |
trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
|
|
|
1bdc94 |
VTD_PCI_FUNC(devfn_it));
|
|
|
1bdc94 |
+ vtd_iommu_lock(s);
|
|
|
1bdc94 |
vtd_as->context_cache_entry.context_cache_gen = 0;
|
|
|
1bdc94 |
+ vtd_iommu_unlock(s);
|
|
|
1bdc94 |
/*
|
|
|
1bdc94 |
* Do switch address space when needed, in case if the
|
|
|
1bdc94 |
* device passthrough bit is switched.
|
|
|
1bdc94 |
@@ -1377,8 +1408,10 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
|
|
|
1bdc94 |
|
|
|
1bdc94 |
trace_vtd_inv_desc_iotlb_domain(domain_id);
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+ vtd_iommu_lock(s);
|
|
|
1bdc94 |
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
|
|
|
1bdc94 |
&domain_id);
|
|
|
1bdc94 |
+ vtd_iommu_unlock(s);
|
|
|
1bdc94 |
|
|
|
1bdc94 |
QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
|
|
|
1bdc94 |
if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
|
|
|
1bdc94 |
@@ -1426,7 +1459,9 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
|
|
|
1bdc94 |
info.domain_id = domain_id;
|
|
|
1bdc94 |
info.addr = addr;
|
|
|
1bdc94 |
info.mask = ~((1 << am) - 1);
|
|
|
1bdc94 |
+ vtd_iommu_lock(s);
|
|
|
1bdc94 |
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info;;
|
|
|
1bdc94 |
+ vtd_iommu_unlock(s);
|
|
|
1bdc94 |
vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
@@ -2929,8 +2964,10 @@ static void vtd_init(IntelIOMMUState *s)
|
|
|
1bdc94 |
s->cap |= VTD_CAP_CM;
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
- vtd_reset_context_cache(s);
|
|
|
1bdc94 |
- vtd_reset_iotlb(s);
|
|
|
1bdc94 |
+ vtd_iommu_lock(s);
|
|
|
1bdc94 |
+ vtd_reset_context_cache_locked(s);
|
|
|
1bdc94 |
+ vtd_reset_iotlb_locked(s);
|
|
|
1bdc94 |
+ vtd_iommu_unlock(s);
|
|
|
1bdc94 |
|
|
|
1bdc94 |
/* Define registers with default values and bit semantics */
|
|
|
1bdc94 |
vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0);
|
|
|
1bdc94 |
@@ -3070,6 +3107,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
QLIST_INIT(&s->vtd_as_with_notifiers);
|
|
|
1bdc94 |
+ qemu_mutex_init(&s->iommu_lock);
|
|
|
1bdc94 |
memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
|
|
|
1bdc94 |
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
|
|
|
1bdc94 |
"intel_iommu", DMAR_REG_SIZE);
|
|
|
1bdc94 |
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
|
|
|
1bdc94 |
index 032e33b..016e74b 100644
|
|
|
1bdc94 |
--- a/include/hw/i386/intel_iommu.h
|
|
|
1bdc94 |
+++ b/include/hw/i386/intel_iommu.h
|
|
|
1bdc94 |
@@ -300,6 +300,12 @@ struct IntelIOMMUState {
|
|
|
1bdc94 |
OnOffAuto intr_eim; /* Toggle for EIM cabability */
|
|
|
1bdc94 |
bool buggy_eim; /* Force buggy EIM unless eim=off */
|
|
|
1bdc94 |
uint8_t aw_bits; /* Host/IOVA address width (in bits) */
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ /*
|
|
|
1bdc94 |
+ * Protects IOMMU states in general. Currently it protects the
|
|
|
1bdc94 |
+ * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace.
|
|
|
1bdc94 |
+ */
|
|
|
1bdc94 |
+ QemuMutex iommu_lock;
|
|
|
1bdc94 |
};
|
|
|
1bdc94 |
|
|
|
1bdc94 |
/* Find the VTD Address space associated with the given bus pointer,
|
|
|
1bdc94 |
--
|
|
|
1bdc94 |
1.8.3.1
|
|
|
1bdc94 |
|