From 725ad24f771adcc8aeacaf27b21a6d95cab41e69 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 29 Sep 2017 21:45:02 +0200
Subject: [PATCH 07/27] hw/vfio/pci: Introduce VFIORegion
RH-Author: Alex Williamson <alex.williamson@redhat.com>
Message-id: <20170929214502.16765.46217.stgit@gimli.home>
Patchwork-id: 76765
O-Subject: [RHEL-7.5 qemu-kvm PATCH 07/16] hw/vfio/pci: Introduce VFIORegion
Bugzilla: 1494181
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
From: Eric Auger <eric.auger@linaro.org>
Upstream: a664477db8dac84cc046e9d79701eefda1d58703
This structure is going to be shared by VFIOPCIDevice and
VFIOPlatformDevice. VFIOBAR includes it.
vfio_eoi becomes an ops of VFIODevice specialized by parent device.
This makes possible to transform vfio_bar_write/read into generic
vfio_region_write/read that will be used by VFIOPlatformDevice too.
vfio_mmap_bar becomes vfio_map_region
Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/misc/vfio.c | 188 +++++++++++++++++++++++++++++----------------------------
1 file changed, 97 insertions(+), 91 deletions(-)
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 3e559ed..92414b9 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -84,15 +84,19 @@ typedef struct VFIOQuirk {
} data;
} VFIOQuirk;
-typedef struct VFIOBAR {
- off_t fd_offset; /* offset of BAR within device fd */
- int fd; /* device fd, allows us to pass VFIOBAR as opaque data */
+typedef struct VFIORegion {
+ struct VFIODevice *vbasedev;
+ off_t fd_offset; /* offset of region within device fd */
MemoryRegion mem; /* slow, read/write access */
MemoryRegion mmap_mem; /* direct mapped access */
void *mmap;
size_t size;
uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
- uint8_t nr; /* cache the BAR number for debug */
+ uint8_t nr; /* cache the region number for debug */
+} VFIORegion;
+
+typedef struct VFIOBAR {
+ VFIORegion region;
bool ioport;
bool mem64;
QLIST_HEAD(, VFIOQuirk) quirks;
@@ -194,6 +198,7 @@ typedef struct VFIODevice {
struct VFIODeviceOps {
void (*vfio_compute_needs_reset)(VFIODevice *vdev);
int (*vfio_hot_reset_multi)(VFIODevice *vdev);
+ void (*vfio_eoi)(VFIODevice *vdev);
};
typedef struct VFIOPCIDevice {
@@ -383,8 +388,10 @@ static void vfio_intx_interrupt(void *opaque)
}
}
-static void vfio_eoi(VFIOPCIDevice *vdev)
+static void vfio_eoi(VFIODevice *vbasedev)
{
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+
if (!vdev->intx.pending) {
return;
}
@@ -394,7 +401,7 @@ static void vfio_eoi(VFIOPCIDevice *vdev)
vdev->intx.pending = false;
qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
- vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
+ vfio_unmask_single_irqindex(vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
}
static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev)
@@ -549,7 +556,7 @@ static void vfio_update_irq(PCIDevice *pdev)
vfio_enable_intx_kvm(vdev);
/* Re-enable the interrupt in cased we missed an EOI */
- vfio_eoi(vdev);
+ vfio_eoi(&vdev->vbasedev);
}
static int vfio_enable_intx(VFIOPCIDevice *vdev)
@@ -1086,10 +1093,11 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
/*
* IO Port/MMIO - Beware of the endians, VFIO is always little endian
*/
-static void vfio_bar_write(void *opaque, hwaddr addr,
- uint64_t data, unsigned size)
+static void vfio_region_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
{
- VFIOBAR *bar = opaque;
+ VFIORegion *region = opaque;
+ VFIODevice *vbasedev = region->vbasedev;
union {
uint8_t byte;
uint16_t word;
@@ -1112,21 +1120,15 @@ static void vfio_bar_write(void *opaque, hwaddr addr,
break;
}
- if (pwrite(bar->fd, &buf, size, bar->fd_offset + addr) != size) {
- error_report("%s(,0x%"HWADDR_PRIx", 0x%"PRIx64", %d) failed: %m",
- __func__, addr, data, size);
+ if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
+ error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
+ ",%d) failed: %m",
+ __func__, vbasedev->name, region->nr,
+ addr, data, size);
}
-#ifdef DEBUG_VFIO
- {
- VFIOPCIDevice *vdev = container_of(bar, VFIOPCIDevice, bars[bar->nr]);
-
- DPRINTF("%s(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx", 0x%"PRIx64
- ", %d)\n", __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function, bar->nr, addr,
- data, size);
- }
-#endif
+ DPRINTF("%s(%s:BAR%d+0x%"HWADDR_PRIx", 0x%"PRIx64", %d)\n", __func__,
+ vbasedev->name, region->nr, addr, data, size);
/*
* A read or write to a BAR always signals an INTx EOI. This will
@@ -1136,13 +1138,14 @@ static void vfio_bar_write(void *opaque, hwaddr addr,
* which access will service the interrupt, so we're potentially
* getting quite a few host interrupts per guest interrupt.
*/
- vfio_eoi(container_of(bar, VFIOPCIDevice, bars[bar->nr]));
+ vbasedev->ops->vfio_eoi(vbasedev);
}
-static uint64_t vfio_bar_read(void *opaque,
- hwaddr addr, unsigned size)
+static uint64_t vfio_region_read(void *opaque,
+ hwaddr addr, unsigned size)
{
- VFIOBAR *bar = opaque;
+ VFIORegion *region = opaque;
+ VFIODevice *vbasedev = region->vbasedev;
union {
uint8_t byte;
uint16_t word;
@@ -1151,9 +1154,10 @@ static uint64_t vfio_bar_read(void *opaque,
} buf;
uint64_t data = 0;
- if (pread(bar->fd, &buf, size, bar->fd_offset + addr) != size) {
- error_report("%s(,0x%"HWADDR_PRIx", %d) failed: %m",
- __func__, addr, size);
+ if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
+ error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m",
+ __func__, vbasedev->name, region->nr,
+ addr, size);
return (uint64_t)-1;
}
@@ -1172,26 +1176,18 @@ static uint64_t vfio_bar_read(void *opaque,
break;
}
-#ifdef DEBUG_VFIO
- {
- VFIOPCIDevice *vdev = container_of(bar, VFIOPCIDevice, bars[bar->nr]);
-
- DPRINTF("%s(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx
- ", %d) = 0x%"PRIx64"\n", __func__, vdev->host.domain,
- vdev->host.bus, vdev->host.slot, vdev->host.function,
- bar->nr, addr, size, data);
- }
-#endif
+ DPRINTF("%s(%s:BAR%d+0x%"HWADDR_PRIx", %d) = 0x%"PRIx64"\n", __func__,
+ vbasedev->name, region->nr, addr, size, data);
/* Same as write above */
- vfio_eoi(container_of(bar, VFIOPCIDevice, bars[bar->nr]));
+ vbasedev->ops->vfio_eoi(vbasedev);
return data;
}
-static const MemoryRegionOps vfio_bar_ops = {
- .read = vfio_bar_read,
- .write = vfio_bar_write,
+static const MemoryRegionOps vfio_region_ops = {
+ .read = vfio_region_read,
+ .write = vfio_region_write,
.endianness = DEVICE_LITTLE_ENDIAN,
};
@@ -1505,8 +1501,8 @@ static uint64_t vfio_generic_window_quirk_read(void *opaque,
vdev->host.bus, vdev->host.slot, vdev->host.function,
quirk->data.bar, addr, size, data);
} else {
- data = vfio_bar_read(&vdev->bars[quirk->data.bar],
- addr + quirk->data.base_offset, size);
+ data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
+ addr + quirk->data.base_offset, size);
}
return data;
@@ -1556,7 +1552,7 @@ static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr,
return;
}
- vfio_bar_write(&vdev->bars[quirk->data.bar],
+ vfio_region_write(&vdev->bars[quirk->data.bar].region,
addr + quirk->data.base_offset, data, size);
}
@@ -1590,7 +1586,8 @@ static uint64_t vfio_generic_quirk_read(void *opaque,
vdev->host.bus, vdev->host.slot, vdev->host.function,
quirk->data.bar, addr + base, size, data);
} else {
- data = vfio_bar_read(&vdev->bars[quirk->data.bar], addr + base, size);
+ data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
+ addr + base, size);
}
return data;
@@ -1619,7 +1616,8 @@ static void vfio_generic_quirk_write(void *opaque, hwaddr addr,
vdev->host.domain, vdev->host.bus, vdev->host.slot,
vdev->host.function, quirk->data.bar, addr + base, data, size);
} else {
- vfio_bar_write(&vdev->bars[quirk->data.bar], addr + base, data, size);
+ vfio_region_write(&vdev->bars[quirk->data.bar].region,
+ addr + base, data, size);
}
}
@@ -1672,7 +1670,7 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
* As long as the BAR is >= 256 bytes it will be aligned such that the
* lower byte is always zero. Filter out anything else, if it exists.
*/
- if (!vdev->bars[4].ioport || vdev->bars[4].size < 256) {
+ if (!vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
return;
}
@@ -1725,7 +1723,7 @@ static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(&quirk->mem,
&vfio_generic_window_quirk, quirk,
"vfio-ati-bar4-window-quirk", 8);
- memory_region_add_subregion_overlap(&vdev->bars[nr].mem,
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
quirk->data.base_offset, &quirk->mem, 1);
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -1759,7 +1757,7 @@ static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(&quirk->mem, &vfio_generic_quirk, quirk,
"vfio-ati-bar2-4000-quirk",
TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
- memory_region_add_subregion_overlap(&vdev->bars[nr].mem,
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
quirk->data.address_match & TARGET_PAGE_MASK,
&quirk->mem, 1);
@@ -1878,7 +1876,7 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
VFIOQuirk *quirk;
if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA ||
- !vdev->bars[1].size) {
+ !vdev->bars[1].region.size) {
return;
}
@@ -1986,7 +1984,8 @@ static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(&quirk->mem,
&vfio_nvidia_bar5_window_quirk, quirk,
"vfio-nvidia-bar5-window-quirk", 16);
- memory_region_add_subregion_overlap(&vdev->bars[nr].mem, 0, &quirk->mem, 1);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ 0, &quirk->mem, 1);
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -2024,7 +2023,7 @@ static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(&quirk->mem, &vfio_generic_quirk,
quirk, "vfio-nvidia-bar0-88000-quirk",
TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
- memory_region_add_subregion_overlap(&vdev->bars[nr].mem,
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
quirk->data.address_match & TARGET_PAGE_MASK,
&quirk->mem, 1);
@@ -2050,7 +2049,8 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr)
/* Log the chipset ID */
DPRINTF("Nvidia NV%02x\n",
- (unsigned int)(vfio_bar_read(&vdev->bars[0], 0, 4) >> 20) & 0xff);
+ (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20)
+ & 0xff);
quirk = g_malloc0(sizeof(*quirk));
quirk->vdev = vdev;
@@ -2062,7 +2062,7 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(&quirk->mem, &vfio_generic_quirk, quirk,
"vfio-nvidia-bar0-1800-quirk",
TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
- memory_region_add_subregion_overlap(&vdev->bars[nr].mem,
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
quirk->data.address_match & TARGET_PAGE_MASK,
&quirk->mem, 1);
@@ -2118,7 +2118,7 @@ static void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
while (!QLIST_EMPTY(&bar->quirks)) {
VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
- memory_region_del_subregion(&bar->mem, &quirk->mem);
+ memory_region_del_subregion(&bar->region.mem, &quirk->mem);
memory_region_destroy(&quirk->mem);
QLIST_REMOVE(quirk, next);
g_free(quirk);
@@ -2495,7 +2495,7 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev)
* specific quirk if the device is known or we have a broken configuration.
*/
if (vdev->msix->pba_offset >=
- vdev->bars[vdev->msix->pba_bar].size) {
+ vdev->bars[vdev->msix->pba_bar].region.size) {
PCIDevice *pdev = &vdev->pdev;
uint16_t vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
@@ -2524,9 +2524,9 @@ static int vfio_setup_msix(VFIOPCIDevice *vdev, int pos)
int ret;
ret = msix_init(&vdev->pdev, vdev->msix->entries,
- &vdev->bars[vdev->msix->table_bar].mem,
+ &vdev->bars[vdev->msix->table_bar].region.mem,
vdev->msix->table_bar, vdev->msix->table_offset,
- &vdev->bars[vdev->msix->pba_bar].mem,
+ &vdev->bars[vdev->msix->pba_bar].region.mem,
vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
if (ret < 0) {
if (ret == -ENOTSUP) {
@@ -2544,8 +2544,9 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev)
msi_uninit(&vdev->pdev);
if (vdev->msix) {
- msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem,
- &vdev->bars[vdev->msix->pba_bar].mem);
+ msix_uninit(&vdev->pdev,
+ &vdev->bars[vdev->msix->table_bar].region.mem,
+ &vdev->bars[vdev->msix->pba_bar].region.mem);
}
}
@@ -2559,11 +2560,11 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled)
for (i = 0; i < PCI_ROM_SLOT; i++) {
VFIOBAR *bar = &vdev->bars[i];
- if (!bar->size) {
+ if (!bar->region.size) {
continue;
}
- memory_region_set_enabled(&bar->mmap_mem, enabled);
+ memory_region_set_enabled(&bar->region.mmap_mem, enabled);
if (vdev->msix && vdev->msix->table_bar == i) {
memory_region_set_enabled(&vdev->msix->mmap_mem, enabled);
}
@@ -2574,45 +2575,47 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
- if (!bar->size) {
+ if (!bar->region.size) {
return;
}
vfio_bar_quirk_teardown(vdev, nr);
- memory_region_del_subregion(&bar->mem, &bar->mmap_mem);
- munmap(bar->mmap, memory_region_size(&bar->mmap_mem));
- memory_region_destroy(&bar->mmap_mem);
+ memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem);
+ munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
+ memory_region_destroy(&bar->region.mmap_mem);
if (vdev->msix && vdev->msix->table_bar == nr) {
- memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem);
+ memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem);
munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
memory_region_destroy(&vdev->msix->mmap_mem);
}
- memory_region_destroy(&bar->mem);
+ memory_region_destroy(&bar->region.mem);
}
-static int vfio_mmap_bar(VFIOPCIDevice *vdev, VFIOBAR *bar,
- MemoryRegion *mem, MemoryRegion *submem,
- void **map, size_t size, off_t offset,
- const char *name)
+static int vfio_mmap_region(Object *obj, VFIORegion *region,
+ MemoryRegion *mem, MemoryRegion *submem,
+ void **map, size_t size, off_t offset,
+ const char *name)
{
int ret = 0;
+ VFIODevice *vbasedev = region->vbasedev;
- if (VFIO_ALLOW_MMAP && size && bar->flags & VFIO_REGION_INFO_FLAG_MMAP) {
+ if (VFIO_ALLOW_MMAP && size && region->flags &
+ VFIO_REGION_INFO_FLAG_MMAP) {
int prot = 0;
- if (bar->flags & VFIO_REGION_INFO_FLAG_READ) {
+ if (region->flags & VFIO_REGION_INFO_FLAG_READ) {
prot |= PROT_READ;
}
- if (bar->flags & VFIO_REGION_INFO_FLAG_WRITE) {
+ if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) {
prot |= PROT_WRITE;
}
*map = mmap(NULL, size, prot, MAP_SHARED,
- bar->fd, bar->fd_offset + offset);
+ vbasedev->fd, region->fd_offset + offset);
if (*map == MAP_FAILED) {
*map = NULL;
ret = -errno;
@@ -2635,7 +2638,7 @@ empty_region:
static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
- uint64_t size = bar->size;
+ uint64_t size = bar->region.size;
char name[64];
uint32_t pci_bar;
uint8_t type;
@@ -2665,9 +2668,9 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
~PCI_BASE_ADDRESS_MEM_MASK);
/* A "slow" read/write mapping underlies all BARs */
- memory_region_init_io(&bar->mem, &vfio_bar_ops,
+ memory_region_init_io(&bar->region.mem, &vfio_region_ops,
bar, name, size);
- pci_register_bar(&vdev->pdev, nr, type, &bar->mem);
+ pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem);
/*
* We can't mmap areas overlapping the MSIX vector table, so we
@@ -2678,8 +2681,9 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
}
strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
- if (vfio_mmap_bar(vdev, bar, &bar->mem,
- &bar->mmap_mem, &bar->mmap, size, 0, name)) {
+ if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
+ &bar->region.mmap_mem, &bar->region.mmap,
+ size, 0, name)) {
error_report("%s unsupported. Performance may be slow", name);
}
@@ -2689,10 +2693,11 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
start = TARGET_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
(vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
- size = start < bar->size ? bar->size - start : 0;
+ size = start < bar->region.size ? bar->region.size - start : 0;
strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
/* VFIOMSIXInfo contains another MemoryRegion for this mapping */
- if (vfio_mmap_bar(vdev, bar, &bar->mem, &vdev->msix->mmap_mem,
+ if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
+ &vdev->msix->mmap_mem,
&vdev->msix->mmap, size, start, name)) {
error_report("%s unsupported. Performance may be slow", name);
}
@@ -3281,6 +3286,7 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev)
static VFIODeviceOps vfio_pci_ops = {
.vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
.vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
+ .vfio_eoi = vfio_eoi,
};
static void vfio_reset_handler(void *opaque)
@@ -3607,11 +3613,11 @@ static int vfio_get_device(VFIOGroup *group, const char *name,
(unsigned long)reg_info.size, (unsigned long)reg_info.offset,
(unsigned long)reg_info.flags);
- vdev->bars[i].flags = reg_info.flags;
- vdev->bars[i].size = reg_info.size;
- vdev->bars[i].fd_offset = reg_info.offset;
- vdev->bars[i].fd = vdev->vbasedev.fd;
- vdev->bars[i].nr = i;
+ vdev->bars[i].region.vbasedev = &vdev->vbasedev;
+ vdev->bars[i].region.flags = reg_info.flags;
+ vdev->bars[i].region.size = reg_info.size;
+ vdev->bars[i].region.fd_offset = reg_info.offset;
+ vdev->bars[i].region.nr = i;
QLIST_INIT(&vdev->bars[i].quirks);
}
--
1.8.3.1