yeahuh / rpms / qemu-kvm

Forked from rpms/qemu-kvm 2 years ago
Clone
5d360b
From a33e922436f708fe4881da4b6f363c49db5af581 Mon Sep 17 00:00:00 2001
5d360b
From: Alex Williamson <alex.williamson@redhat.com>
5d360b
Date: Fri, 29 Sep 2017 21:46:02 +0200
5d360b
Subject: [PATCH 14/27] vfio: Generalize region support
5d360b
5d360b
RH-Author: Alex Williamson <alex.williamson@redhat.com>
5d360b
Message-id: <20170929214601.16765.68107.stgit@gimli.home>
5d360b
Patchwork-id: 76772
5d360b
O-Subject: [RHEL-7.5 qemu-kvm PATCH 14/16] vfio: Generalize region support
5d360b
Bugzilla: 1494181
5d360b
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5d360b
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
5d360b
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
5d360b
5d360b
Upstream: db0da029a1853d46c90a6c0790ce6ca77fd46ea3
5d360b
RHEL: MemoryRegions still destroyed from exitfn, so finalize is called
5d360b
      immediately after exit with memory_region_destroy().
5d360b
5d360b
Both platform and PCI vfio drivers create a "slow", I/O memory region
5d360b
with one or more mmap memory regions overlayed when supported by the
5d360b
device. Generalize this to a set of common helpers in the core that
5d360b
pulls the region info from vfio, fills the region data, configures
5d360b
slow mapping, and adds helpers for comleting the mmap, enable/disable,
5d360b
and teardown.  This can be immediately used by the PCI MSI-X code,
5d360b
which needs to mmap around the MSI-X vector table.
5d360b
5d360b
This also changes VFIORegion.mem to be dynamically allocated because
5d360b
otherwise we don't know how the caller has allocated VFIORegion and
5d360b
therefore don't know whether to unreference it to destroy the
5d360b
MemoryRegion or not.
5d360b
5d360b
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
5d360b
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
5d360b
---
5d360b
 hw/misc/vfio.c | 360 +++++++++++++++++++++++++++++++++++++++------------------
5d360b
 trace-events   |   9 ++
5d360b
 2 files changed, 258 insertions(+), 111 deletions(-)
5d360b
5d360b
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
5d360b
index 57a0065..d634531 100644
5d360b
--- a/hw/misc/vfio.c
5d360b
+++ b/hw/misc/vfio.c
5d360b
@@ -39,6 +39,7 @@
5d360b
 #include "qemu/range.h"
5d360b
 #include "sysemu/kvm.h"
5d360b
 #include "sysemu/sysemu.h"
5d360b
+#include "trace.h"
5d360b
 
5d360b
 /* #define DEBUG_VFIO */
5d360b
 #ifdef DEBUG_VFIO
5d360b
@@ -84,14 +85,21 @@ typedef struct VFIOQuirk {
5d360b
     } data;
5d360b
 } VFIOQuirk;
5d360b
 
5d360b
+typedef struct VFIOMmap {
5d360b
+    MemoryRegion mem;
5d360b
+    void *mmap;
5d360b
+    off_t offset;
5d360b
+    size_t size;
5d360b
+} VFIOMmap;
5d360b
+
5d360b
 typedef struct VFIORegion {
5d360b
     struct VFIODevice *vbasedev;
5d360b
     off_t fd_offset; /* offset of region within device fd */
5d360b
-    MemoryRegion mem; /* slow, read/write access */
5d360b
-    MemoryRegion mmap_mem; /* direct mapped access */
5d360b
-    void *mmap;
5d360b
+    MemoryRegion *mem; /* slow, read/write access */
5d360b
     size_t size;
5d360b
     uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
5d360b
+    uint32_t nr_mmaps;
5d360b
+    VFIOMmap *mmaps;
5d360b
     uint8_t nr; /* cache the region number for debug */
5d360b
 } VFIORegion;
5d360b
 
5d360b
@@ -294,6 +302,9 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
5d360b
 static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
5d360b
 static int vfio_get_region_info(VFIODevice *vbasedev, int index,
5d360b
                                 struct vfio_region_info **info);
5d360b
+static void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
5d360b
+static void vfio_region_exit(VFIORegion *region);
5d360b
+static void vfio_region_finalize(VFIORegion *region);
5d360b
 
5d360b
 /*
5d360b
  * Common VFIO interrupt disable
5d360b
@@ -1681,7 +1692,7 @@ static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr)
5d360b
     memory_region_init_io(&quirk->mem,
5d360b
                           &vfio_generic_window_quirk, quirk,
5d360b
                           "vfio-ati-bar4-window-quirk", 8);
5d360b
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
5d360b
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
5d360b
                           quirk->data.base_offset, &quirk->mem, 1);
5d360b
 
5d360b
     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
5d360b
@@ -1714,7 +1725,7 @@ static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr)
5d360b
     memory_region_init_io(&quirk->mem, &vfio_generic_quirk, quirk,
5d360b
                           "vfio-ati-bar2-4000-quirk",
5d360b
                           TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
5d360b
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
5d360b
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
5d360b
                           quirk->data.address_match & TARGET_PAGE_MASK,
5d360b
                           &quirk->mem, 1);
5d360b
 
5d360b
@@ -1939,7 +1950,7 @@ static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr)
5d360b
     memory_region_init_io(&quirk->mem,
5d360b
                           &vfio_nvidia_bar5_window_quirk, quirk,
5d360b
                           "vfio-nvidia-bar5-window-quirk", 16);
5d360b
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
5d360b
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
5d360b
                                         0, &quirk->mem, 1);
5d360b
 
5d360b
     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
5d360b
@@ -1977,7 +1988,7 @@ static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr)
5d360b
     memory_region_init_io(&quirk->mem, &vfio_generic_quirk,
5d360b
                           quirk, "vfio-nvidia-bar0-88000-quirk",
5d360b
                           TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
5d360b
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
5d360b
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
5d360b
                           quirk->data.address_match & TARGET_PAGE_MASK,
5d360b
                           &quirk->mem, 1);
5d360b
 
5d360b
@@ -2015,7 +2026,7 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr)
5d360b
     memory_region_init_io(&quirk->mem, &vfio_generic_quirk, quirk,
5d360b
                           "vfio-nvidia-bar0-1800-quirk",
5d360b
                           TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
5d360b
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
5d360b
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
5d360b
                           quirk->data.address_match & TARGET_PAGE_MASK,
5d360b
                           &quirk->mem, 1);
5d360b
 
5d360b
@@ -2070,7 +2081,7 @@ static void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
5d360b
 
5d360b
     while (!QLIST_EMPTY(&bar->quirks)) {
5d360b
         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
5d360b
-        memory_region_del_subregion(&bar->region.mem, &quirk->mem);
5d360b
+        memory_region_del_subregion(bar->region.mem, &quirk->mem);
5d360b
         memory_region_destroy(&quirk->mem);
5d360b
         QLIST_REMOVE(quirk, next);
5d360b
         g_free(quirk);
5d360b
@@ -2384,6 +2395,74 @@ static int vfio_setup_msi(VFIOPCIDevice *vdev, int pos)
5d360b
     return 0;
5d360b
 }
5d360b
 
5d360b
+static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev)
5d360b
+{
5d360b
+    off_t start, end;
5d360b
+    VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region;
5d360b
+
5d360b
+    /*
5d360b
+     * We expect to find a single mmap covering the whole BAR, anything else
5d360b
+     * means it's either unsupported or already setup.
5d360b
+     */
5d360b
+    if (region->nr_mmaps != 1 || region->mmaps[0].offset ||
5d360b
+        region->size != region->mmaps[0].size) {
5d360b
+        return;
5d360b
+    }
5d360b
+
5d360b
+    /* MSI-X table start and end aligned to host page size */
5d360b
+    start = vdev->msix->table_offset & TARGET_PAGE_MASK;
5d360b
+    end = TARGET_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
5d360b
+                            (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
5d360b
+
5d360b
+    /*
5d360b
+     * Does the MSI-X table cover the beginning of the BAR?  The whole BAR?
5d360b
+     * NB - Host page size is necessarily a power of two and so is the PCI
5d360b
+     * BAR (not counting EA yet), therefore if we have host page aligned
5d360b
+     * @start and @end, then any remainder of the BAR before or after those
5d360b
+     * must be at least host page sized and therefore mmap'able.
5d360b
+     */
5d360b
+    if (!start) {
5d360b
+        if (end >= region->size) {
5d360b
+            region->nr_mmaps = 0;
5d360b
+            g_free(region->mmaps);
5d360b
+            region->mmaps = NULL;
5d360b
+            trace_vfio_msix_fixup(vdev->vbasedev.name,
5d360b
+                                  vdev->msix->table_bar, 0, 0);
5d360b
+        } else {
5d360b
+            region->mmaps[0].offset = end;
5d360b
+            region->mmaps[0].size = region->size - end;
5d360b
+            trace_vfio_msix_fixup(vdev->vbasedev.name,
5d360b
+                              vdev->msix->table_bar, region->mmaps[0].offset,
5d360b
+                              region->mmaps[0].offset + region->mmaps[0].size);
5d360b
+        }
5d360b
+
5d360b
+    /* Maybe it's aligned at the end of the BAR */
5d360b
+    } else if (end >= region->size) {
5d360b
+        region->mmaps[0].size = start;
5d360b
+        trace_vfio_msix_fixup(vdev->vbasedev.name,
5d360b
+                              vdev->msix->table_bar, region->mmaps[0].offset,
5d360b
+                              region->mmaps[0].offset + region->mmaps[0].size);
5d360b
+
5d360b
+    /* Otherwise it must split the BAR */
5d360b
+    } else {
5d360b
+        region->nr_mmaps = 2;
5d360b
+        region->mmaps = g_renew(VFIOMmap, region->mmaps, 2);
5d360b
+
5d360b
+        memcpy(&region->mmaps[1], &region->mmaps[0], sizeof(VFIOMmap));
5d360b
+
5d360b
+        region->mmaps[0].size = start;
5d360b
+        trace_vfio_msix_fixup(vdev->vbasedev.name,
5d360b
+                              vdev->msix->table_bar, region->mmaps[0].offset,
5d360b
+                              region->mmaps[0].offset + region->mmaps[0].size);
5d360b
+
5d360b
+        region->mmaps[1].offset = end;
5d360b
+        region->mmaps[1].size = region->size - end;
5d360b
+        trace_vfio_msix_fixup(vdev->vbasedev.name,
5d360b
+                              vdev->msix->table_bar, region->mmaps[1].offset,
5d360b
+                              region->mmaps[1].offset + region->mmaps[1].size);
5d360b
+    }
5d360b
+}
5d360b
+
5d360b
 /*
5d360b
  * We don't have any control over how pci_add_capability() inserts
5d360b
  * capabilities into the chain.  In order to setup MSI-X we need a
5d360b
@@ -2461,6 +2540,8 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev)
5d360b
         }
5d360b
     }
5d360b
 
5d360b
+    vfio_pci_fixup_msix_region(vdev);
5d360b
+
5d360b
     return 0;
5d360b
 }
5d360b
 
5d360b
@@ -2469,9 +2550,9 @@ static int vfio_setup_msix(VFIOPCIDevice *vdev, int pos)
5d360b
     int ret;
5d360b
 
5d360b
     ret = msix_init(&vdev->pdev, vdev->msix->entries,
5d360b
-                    &vdev->bars[vdev->msix->table_bar].region.mem,
5d360b
+                    vdev->bars[vdev->msix->table_bar].region.mem,
5d360b
                     vdev->msix->table_bar, vdev->msix->table_offset,
5d360b
-                    &vdev->bars[vdev->msix->pba_bar].region.mem,
5d360b
+                    vdev->bars[vdev->msix->pba_bar].region.mem,
5d360b
                     vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
5d360b
     if (ret < 0) {
5d360b
         if (ret == -ENOTSUP) {
5d360b
@@ -2490,8 +2571,8 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev)
5d360b
 
5d360b
     if (vdev->msix) {
5d360b
         msix_uninit(&vdev->pdev,
5d360b
-                    &vdev->bars[vdev->msix->table_bar].region.mem,
5d360b
-                    &vdev->bars[vdev->msix->pba_bar].region.mem);
5d360b
+                    vdev->bars[vdev->msix->table_bar].region.mem,
5d360b
+                    vdev->bars[vdev->msix->pba_bar].region.mem);
5d360b
     }
5d360b
 }
5d360b
 
5d360b
@@ -2503,16 +2584,7 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled)
5d360b
     int i;
5d360b
 
5d360b
     for (i = 0; i < PCI_ROM_SLOT; i++) {
5d360b
-        VFIOBAR *bar = &vdev->bars[i];
5d360b
-
5d360b
-        if (!bar->region.size) {
5d360b
-            continue;
5d360b
-        }
5d360b
-
5d360b
-        memory_region_set_enabled(&bar->region.mmap_mem, enabled);
5d360b
-        if (vdev->msix && vdev->msix->table_bar == i) {
5d360b
-            memory_region_set_enabled(&vdev->msix->mmap_mem, enabled);
5d360b
-        }
5d360b
+        vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled);
5d360b
     }
5d360b
 }
5d360b
 
5d360b
@@ -2526,65 +2598,171 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
5d360b
 
5d360b
     vfio_bar_quirk_teardown(vdev, nr);
5d360b
 
5d360b
-    memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem);
5d360b
-    munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
5d360b
-    memory_region_destroy(&bar->region.mmap_mem);
5d360b
+    vfio_region_exit(&bar->region);
5d360b
+    vfio_region_finalize(&bar->region);
5d360b
+}
5d360b
+
5d360b
+static int vfio_region_setup(Object *obj, VFIODevice *vbasedev,
5d360b
+                             VFIORegion *region, int index, const char *name)
5d360b
+{
5d360b
+    struct vfio_region_info *info;
5d360b
+    int ret;
5d360b
+
5d360b
+    ret = vfio_get_region_info(vbasedev, index, &info;;
5d360b
+    if (ret) {
5d360b
+        return ret;
5d360b
+    }
5d360b
+
5d360b
+    region->vbasedev = vbasedev;
5d360b
+    region->flags = info->flags;
5d360b
+    region->size = info->size;
5d360b
+    region->fd_offset = info->offset;
5d360b
+    region->nr = index;
5d360b
 
5d360b
-    if (vdev->msix && vdev->msix->table_bar == nr) {
5d360b
-        memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem);
5d360b
-        munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
5d360b
-        memory_region_destroy(&vdev->msix->mmap_mem);
5d360b
+    if (region->size) {
5d360b
+        region->mem = g_new0(MemoryRegion, 1);
5d360b
+        memory_region_init_io(region->mem, &vfio_region_ops,
5d360b
+                              region, name, region->size);
5d360b
+
5d360b
+        if (VFIO_ALLOW_MMAP &&
5d360b
+            region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
5d360b
+            !(region->size & ~TARGET_PAGE_MASK)) {
5d360b
+
5d360b
+            region->nr_mmaps = 1;
5d360b
+            region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
5d360b
+
5d360b
+            region->mmaps[0].offset = 0;
5d360b
+            region->mmaps[0].size = region->size;
5d360b
+        }
5d360b
     }
5d360b
 
5d360b
-    memory_region_destroy(&bar->region.mem);
5d360b
+    g_free(info);
5d360b
+
5d360b
+    trace_vfio_region_setup(vbasedev->name, index, name,
5d360b
+                            region->flags, region->fd_offset, region->size);
5d360b
+    return 0;
5d360b
 }
5d360b
 
5d360b
-static int vfio_mmap_region(Object *obj, VFIORegion *region,
5d360b
-                            MemoryRegion *mem, MemoryRegion *submem,
5d360b
-                            void **map, size_t size, off_t offset,
5d360b
-                            const char *name)
5d360b
+static int vfio_region_mmap(VFIORegion *region)
5d360b
 {
5d360b
-    int ret = 0;
5d360b
-    VFIODevice *vbasedev = region->vbasedev;
5d360b
+    int i, prot = 0;
5d360b
+    char *name;
5d360b
+
5d360b
+    if (!region->mem) {
5d360b
+        return 0;
5d360b
+    }
5d360b
+
5d360b
+    prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
5d360b
+    prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
5d360b
+
5d360b
+    for (i = 0; i < region->nr_mmaps; i++) {
5d360b
+        region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
5d360b
+                                     MAP_SHARED, region->vbasedev->fd,
5d360b
+                                     region->fd_offset +
5d360b
+                                     region->mmaps[i].offset);
5d360b
+        if (region->mmaps[i].mmap == MAP_FAILED) {
5d360b
+            int ret = -errno;
5d360b
 
5d360b
-    if (VFIO_ALLOW_MMAP && size && region->flags &
5d360b
-        VFIO_REGION_INFO_FLAG_MMAP) {
5d360b
-        int prot = 0;
5d360b
+            trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
5d360b
+                                         region->fd_offset +
5d360b
+                                         region->mmaps[i].offset,
5d360b
+                                         region->fd_offset +
5d360b
+                                         region->mmaps[i].offset +
5d360b
+                                         region->mmaps[i].size - 1, ret);
5d360b
 
5d360b
-        if (region->flags & VFIO_REGION_INFO_FLAG_READ) {
5d360b
-            prot |= PROT_READ;
5d360b
+            region->mmaps[i].mmap = NULL;
5d360b
+
5d360b
+            for (i--; i >= 0; i--) {
5d360b
+                memory_region_del_subregion(region->mem, &region->mmaps[i].mem);
5d360b
+                munmap(region->mmaps[i].mmap, region->mmaps[i].size);
5d360b
+                memory_region_destroy(&region->mmaps[i].mem);
5d360b
+                region->mmaps[i].mmap = NULL;
5d360b
+            }
5d360b
+
5d360b
+            return ret;
5d360b
         }
5d360b
 
5d360b
-        if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) {
5d360b
-            prot |= PROT_WRITE;
5d360b
+        name = g_strdup_printf("%s mmaps[%d]",
5d360b
+                               memory_region_name(region->mem), i);
5d360b
+        memory_region_init_ram_ptr(&region->mmaps[i].mem,
5d360b
+                                   name, region->mmaps[i].size,
5d360b
+                                   region->mmaps[i].mmap);
5d360b
+        g_free(name);
5d360b
+        memory_region_set_skip_dump(&region->mmaps[i].mem);
5d360b
+        memory_region_add_subregion(region->mem, region->mmaps[i].offset,
5d360b
+                                    &region->mmaps[i].mem);
5d360b
+
5d360b
+        trace_vfio_region_mmap(memory_region_name(&region->mmaps[i].mem),
5d360b
+                               region->mmaps[i].offset,
5d360b
+                               region->mmaps[i].offset +
5d360b
+                               region->mmaps[i].size - 1);
5d360b
+    }
5d360b
+
5d360b
+    return 0;
5d360b
+}
5d360b
+
5d360b
+static void vfio_region_exit(VFIORegion *region)
5d360b
+{
5d360b
+    int i;
5d360b
+
5d360b
+    if (!region->mem) {
5d360b
+        return;
5d360b
+    }
5d360b
+
5d360b
+    for (i = 0; i < region->nr_mmaps; i++) {
5d360b
+        if (region->mmaps[i].mmap) {
5d360b
+            memory_region_del_subregion(region->mem, &region->mmaps[i].mem);
5d360b
         }
5d360b
+    }
5d360b
+
5d360b
+    trace_vfio_region_exit(region->vbasedev->name, region->nr);
5d360b
+}
5d360b
+
5d360b
+static void vfio_region_finalize(VFIORegion *region)
5d360b
+{
5d360b
+    int i;
5d360b
+
5d360b
+    if (!region->mem) {
5d360b
+        return;
5d360b
+    }
5d360b
 
5d360b
-        *map = mmap(NULL, size, prot, MAP_SHARED,
5d360b
-                    vbasedev->fd, region->fd_offset + offset);
5d360b
-        if (*map == MAP_FAILED) {
5d360b
-            *map = NULL;
5d360b
-            ret = -errno;
5d360b
-            goto empty_region;
5d360b
+    for (i = 0; i < region->nr_mmaps; i++) {
5d360b
+        if (region->mmaps[i].mmap) {
5d360b
+            munmap(region->mmaps[i].mmap, region->mmaps[i].size);
5d360b
+            memory_region_destroy(&region->mmaps[i].mem);
5d360b
         }
5d360b
+    }
5d360b
 
5d360b
-        memory_region_init_ram_ptr(submem, name, size, *map);
5d360b
-        memory_region_set_skip_dump(submem);
5d360b
-    } else {
5d360b
-empty_region:
5d360b
-        /* Create a zero sized sub-region to make cleanup easy. */
5d360b
-        memory_region_init(submem, name, 0);
5d360b
+    memory_region_destroy(region->mem);
5d360b
+
5d360b
+    g_free(region->mem);
5d360b
+    g_free(region->mmaps);
5d360b
+
5d360b
+    trace_vfio_region_finalize(region->vbasedev->name, region->nr);
5d360b
+}
5d360b
+
5d360b
+static void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled)
5d360b
+{
5d360b
+    int i;
5d360b
+
5d360b
+    if (!region->mem) {
5d360b
+        return;
5d360b
     }
5d360b
 
5d360b
-    memory_region_add_subregion(mem, offset, submem);
5d360b
+    for (i = 0; i < region->nr_mmaps; i++) {
5d360b
+        if (region->mmaps[i].mmap) {
5d360b
+            memory_region_set_enabled(&region->mmaps[i].mem, enabled);
5d360b
+        }
5d360b
+    }
5d360b
 
5d360b
-    return ret;
5d360b
+    trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem),
5d360b
+                                        enabled);
5d360b
 }
5d360b
 
5d360b
 static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
5d360b
 {
5d360b
     VFIOBAR *bar = &vdev->bars[nr];
5d360b
     uint64_t size = bar->region.size;
5d360b
-    char name[64];
5d360b
     uint32_t pci_bar;
5d360b
     uint8_t type;
5d360b
     int ret;
5d360b
@@ -2594,8 +2772,6 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
5d360b
         return;
5d360b
     }
5d360b
 
5d360b
-    snprintf(name, sizeof(name), "VFIO %s BAR %d", vdev->vbasedev.name, nr);
5d360b
-
5d360b
     /* Determine what type of BAR this is for registration */
5d360b
     ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
5d360b
                 vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
5d360b
@@ -2610,40 +2786,11 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
5d360b
     type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
5d360b
                                     ~PCI_BASE_ADDRESS_MEM_MASK);
5d360b
 
5d360b
-    /* A "slow" read/write mapping underlies all BARs */
5d360b
-    memory_region_init_io(&bar->region.mem, &vfio_region_ops,
5d360b
-                          bar, name, size);
5d360b
-    pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem);
5d360b
-
5d360b
-    /*
5d360b
-     * We can't mmap areas overlapping the MSIX vector table, so we
5d360b
-     * potentially insert a direct-mapped subregion before and after it.
5d360b
-     */
5d360b
-    if (vdev->msix && vdev->msix->table_bar == nr) {
5d360b
-        size = vdev->msix->table_offset & TARGET_PAGE_MASK;
5d360b
-    }
5d360b
-
5d360b
-    strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
5d360b
-    if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
5d360b
-                      &bar->region.mmap_mem, &bar->region.mmap,
5d360b
-                      size, 0, name)) {
5d360b
-        error_report("%s unsupported. Performance may be slow", name);
5d360b
-    }
5d360b
-
5d360b
-    if (vdev->msix && vdev->msix->table_bar == nr) {
5d360b
-        uint64_t start;
5d360b
+    pci_register_bar(&vdev->pdev, nr, type, bar->region.mem);
5d360b
 
5d360b
-        start = TARGET_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
5d360b
-                                  (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
5d360b
-
5d360b
-        size = start < bar->region.size ? bar->region.size - start : 0;
5d360b
-        strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
5d360b
-        /* VFIOMSIXInfo contains another MemoryRegion for this mapping */
5d360b
-        if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
5d360b
-                          &vdev->msix->mmap_mem,
5d360b
-                          &vdev->msix->mmap, size, start, name)) {
5d360b
-            error_report("%s unsupported. Performance may be slow", name);
5d360b
-        }
5d360b
+    if (vfio_region_mmap(&bar->region)) {
5d360b
+        error_report("Failed to mmap %s BAR %d. Performance may be slow",
5d360b
+                     vdev->vbasedev.name, nr);
5d360b
     }
5d360b
 
5d360b
     vfio_bar_quirk_setup(vdev, nr);
5d360b
@@ -3531,25 +3678,18 @@ static int vfio_get_device(VFIOGroup *group, const char *name,
5d360b
     }
5d360b
 
5d360b
     for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
5d360b
-        ret = vfio_get_region_info(&vdev->vbasedev, i, &reg_info);
5d360b
+        char *name = g_strdup_printf("%s BAR %d", vdev->vbasedev.name, i);
5d360b
+
5d360b
+        ret = vfio_region_setup(OBJECT(vdev), &vdev->vbasedev,
5d360b
+                                &vdev->bars[i].region, i, name);
5d360b
+        g_free(name);
5d360b
+
5d360b
         if (ret) {
5d360b
             error_report("vfio: Error getting region %d info: %m", i);
5d360b
             goto error;
5d360b
         }
5d360b
 
5d360b
-        DPRINTF("Device %s region %d:\n", name, i);
5d360b
-        DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
5d360b
-                (unsigned long)reg_info->size, (unsigned long)reg_info->offset,
5d360b
-                (unsigned long)reg_info->flags);
5d360b
-
5d360b
-        vdev->bars[i].region.vbasedev = &vdev->vbasedev;
5d360b
-        vdev->bars[i].region.flags = reg_info->flags;
5d360b
-        vdev->bars[i].region.size = reg_info->size;
5d360b
-        vdev->bars[i].region.fd_offset = reg_info->offset;
5d360b
-        vdev->bars[i].region.nr = i;
5d360b
         QLIST_INIT(&vdev->bars[i].quirks);
5d360b
-
5d360b
-        g_free(reg_info);
5d360b
     }
5d360b
 
5d360b
     ret = vfio_get_region_info(&vdev->vbasedev,
5d360b
@@ -3644,10 +3784,8 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
5d360b
     DPRINTF("vfio_put_device: close vdev->vbasedev.fd\n");
5d360b
     close(vdev->vbasedev.fd);
5d360b
     g_free(vdev->vbasedev.name);
5d360b
-    if (vdev->msix) {
5d360b
-        g_free(vdev->msix);
5d360b
-        vdev->msix = NULL;
5d360b
-    }
5d360b
+    g_free(vdev->msix);
5d360b
+
5d360b
 }
5d360b
 
5d360b
 static int vfio_get_region_info(VFIODevice *vbasedev, int index,
5d360b
diff --git a/trace-events b/trace-events
5d360b
index 6cd46e9..cc62b0b 100644
5d360b
--- a/trace-events
5d360b
+++ b/trace-events
5d360b
@@ -1155,3 +1155,12 @@ kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
5d360b
 # qom/object.c
5d360b
 object_dynamic_cast_assert(const char *type, const char *target, const char *file, int line, const char *func) "%s->%s (%s:%d:%s)"
5d360b
 object_class_dynamic_cast_assert(const char *type, const char *target, const char *file, int line, const char *func) "%s->%s (%s:%d:%s)"
5d360b
+
5d360b
+# hw/misc/vfio.c
5d360b
+vfio_msix_fixup(const char *name, int bar, uint64_t start, uint64_t end) " (%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]"
5d360b
+vfio_region_setup(const char *dev, int index, const char *name, unsigned long flags, unsigned long offset, unsigned long size) "Device %s, region %d \"%s\", flags: %lx, offset: %lx, size: %lx"
5d360b
+vfio_region_mmap_fault(const char *name, int index, unsigned long offset, unsigned long size, int fault) "Region %s mmaps[%d], [%lx - %lx], fault: %d"
5d360b
+vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Region %s [%lx - %lx]"
5d360b
+vfio_region_exit(const char *name, int index) "Device %s, region %d"
5d360b
+vfio_region_finalize(const char *name, int index) "Device %s, region %d"
5d360b
+vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d"
5d360b
-- 
5d360b
1.8.3.1
5d360b