From 725ad24f771adcc8aeacaf27b21a6d95cab41e69 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Sep 2017 21:45:02 +0200 Subject: [PATCH 07/27] hw/vfio/pci: Introduce VFIORegion RH-Author: Alex Williamson Message-id: <20170929214502.16765.46217.stgit@gimli.home> Patchwork-id: 76765 O-Subject: [RHEL-7.5 qemu-kvm PATCH 07/16] hw/vfio/pci: Introduce VFIORegion Bugzilla: 1494181 RH-Acked-by: Paolo Bonzini RH-Acked-by: Auger Eric RH-Acked-by: Miroslav Rezanina From: Eric Auger Upstream: a664477db8dac84cc046e9d79701eefda1d58703 This structure is going to be shared by VFIOPCIDevice and VFIOPlatformDevice. VFIOBAR includes it. vfio_eoi becomes an ops of VFIODevice specialized by parent device. This makes possible to transform vfio_bar_write/read into generic vfio_region_write/read that will be used by VFIOPlatformDevice too. vfio_mmap_bar becomes vfio_map_region Signed-off-by: Eric Auger Signed-off-by: Alex Williamson Signed-off-by: Miroslav Rezanina --- hw/misc/vfio.c | 188 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 97 insertions(+), 91 deletions(-) diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index 3e559ed..92414b9 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -84,15 +84,19 @@ typedef struct VFIOQuirk { } data; } VFIOQuirk; -typedef struct VFIOBAR { - off_t fd_offset; /* offset of BAR within device fd */ - int fd; /* device fd, allows us to pass VFIOBAR as opaque data */ +typedef struct VFIORegion { + struct VFIODevice *vbasedev; + off_t fd_offset; /* offset of region within device fd */ MemoryRegion mem; /* slow, read/write access */ MemoryRegion mmap_mem; /* direct mapped access */ void *mmap; size_t size; uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ - uint8_t nr; /* cache the BAR number for debug */ + uint8_t nr; /* cache the region number for debug */ +} VFIORegion; + +typedef struct VFIOBAR { + VFIORegion region; bool ioport; bool mem64; QLIST_HEAD(, VFIOQuirk) quirks; @@ -194,6 +198,7 @@ typedef struct VFIODevice { struct VFIODeviceOps { void (*vfio_compute_needs_reset)(VFIODevice *vdev); int (*vfio_hot_reset_multi)(VFIODevice *vdev); + void (*vfio_eoi)(VFIODevice *vdev); }; typedef struct VFIOPCIDevice { @@ -383,8 +388,10 @@ static void vfio_intx_interrupt(void *opaque) } } -static void vfio_eoi(VFIOPCIDevice *vdev) +static void vfio_eoi(VFIODevice *vbasedev) { + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); + if (!vdev->intx.pending) { return; } @@ -394,7 +401,7 @@ static void vfio_eoi(VFIOPCIDevice *vdev) vdev->intx.pending = false; qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0); - vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); + vfio_unmask_single_irqindex(vbasedev, VFIO_PCI_INTX_IRQ_INDEX); } static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) @@ -549,7 +556,7 @@ static void vfio_update_irq(PCIDevice *pdev) vfio_enable_intx_kvm(vdev); /* Re-enable the interrupt in cased we missed an EOI */ - vfio_eoi(vdev); + vfio_eoi(&vdev->vbasedev); } static int vfio_enable_intx(VFIOPCIDevice *vdev) @@ -1086,10 +1093,11 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) /* * IO Port/MMIO - Beware of the endians, VFIO is always little endian */ -static void vfio_bar_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) +static void vfio_region_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) { - VFIOBAR *bar = opaque; + VFIORegion *region = opaque; + VFIODevice *vbasedev = region->vbasedev; union { uint8_t byte; uint16_t word; @@ -1112,21 +1120,15 @@ static void vfio_bar_write(void *opaque, hwaddr addr, break; } - if (pwrite(bar->fd, &buf, size, bar->fd_offset + addr) != size) { - error_report("%s(,0x%"HWADDR_PRIx", 0x%"PRIx64", %d) failed: %m", - __func__, addr, data, size); + if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 + ",%d) failed: %m", + __func__, vbasedev->name, region->nr, + addr, data, size); } -#ifdef DEBUG_VFIO - { - VFIOPCIDevice *vdev = container_of(bar, VFIOPCIDevice, bars[bar->nr]); - - DPRINTF("%s(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx", 0x%"PRIx64 - ", %d)\n", __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, bar->nr, addr, - data, size); - } -#endif + DPRINTF("%s(%s:BAR%d+0x%"HWADDR_PRIx", 0x%"PRIx64", %d)\n", __func__, + vbasedev->name, region->nr, addr, data, size); /* * A read or write to a BAR always signals an INTx EOI. This will @@ -1136,13 +1138,14 @@ static void vfio_bar_write(void *opaque, hwaddr addr, * which access will service the interrupt, so we're potentially * getting quite a few host interrupts per guest interrupt. */ - vfio_eoi(container_of(bar, VFIOPCIDevice, bars[bar->nr])); + vbasedev->ops->vfio_eoi(vbasedev); } -static uint64_t vfio_bar_read(void *opaque, - hwaddr addr, unsigned size) +static uint64_t vfio_region_read(void *opaque, + hwaddr addr, unsigned size) { - VFIOBAR *bar = opaque; + VFIORegion *region = opaque; + VFIODevice *vbasedev = region->vbasedev; union { uint8_t byte; uint16_t word; @@ -1151,9 +1154,10 @@ static uint64_t vfio_bar_read(void *opaque, } buf; uint64_t data = 0; - if (pread(bar->fd, &buf, size, bar->fd_offset + addr) != size) { - error_report("%s(,0x%"HWADDR_PRIx", %d) failed: %m", - __func__, addr, size); + if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", + __func__, vbasedev->name, region->nr, + addr, size); return (uint64_t)-1; } @@ -1172,26 +1176,18 @@ static uint64_t vfio_bar_read(void *opaque, break; } -#ifdef DEBUG_VFIO - { - VFIOPCIDevice *vdev = container_of(bar, VFIOPCIDevice, bars[bar->nr]); - - DPRINTF("%s(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx - ", %d) = 0x%"PRIx64"\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, - bar->nr, addr, size, data); - } -#endif + DPRINTF("%s(%s:BAR%d+0x%"HWADDR_PRIx", %d) = 0x%"PRIx64"\n", __func__, + vbasedev->name, region->nr, addr, size, data); /* Same as write above */ - vfio_eoi(container_of(bar, VFIOPCIDevice, bars[bar->nr])); + vbasedev->ops->vfio_eoi(vbasedev); return data; } -static const MemoryRegionOps vfio_bar_ops = { - .read = vfio_bar_read, - .write = vfio_bar_write, +static const MemoryRegionOps vfio_region_ops = { + .read = vfio_region_read, + .write = vfio_region_write, .endianness = DEVICE_LITTLE_ENDIAN, }; @@ -1505,8 +1501,8 @@ static uint64_t vfio_generic_window_quirk_read(void *opaque, vdev->host.bus, vdev->host.slot, vdev->host.function, quirk->data.bar, addr, size, data); } else { - data = vfio_bar_read(&vdev->bars[quirk->data.bar], - addr + quirk->data.base_offset, size); + data = vfio_region_read(&vdev->bars[quirk->data.bar].region, + addr + quirk->data.base_offset, size); } return data; @@ -1556,7 +1552,7 @@ static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, return; } - vfio_bar_write(&vdev->bars[quirk->data.bar], + vfio_region_write(&vdev->bars[quirk->data.bar].region, addr + quirk->data.base_offset, data, size); } @@ -1590,7 +1586,8 @@ static uint64_t vfio_generic_quirk_read(void *opaque, vdev->host.bus, vdev->host.slot, vdev->host.function, quirk->data.bar, addr + base, size, data); } else { - data = vfio_bar_read(&vdev->bars[quirk->data.bar], addr + base, size); + data = vfio_region_read(&vdev->bars[quirk->data.bar].region, + addr + base, size); } return data; @@ -1619,7 +1616,8 @@ static void vfio_generic_quirk_write(void *opaque, hwaddr addr, vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function, quirk->data.bar, addr + base, data, size); } else { - vfio_bar_write(&vdev->bars[quirk->data.bar], addr + base, data, size); + vfio_region_write(&vdev->bars[quirk->data.bar].region, + addr + base, data, size); } } @@ -1672,7 +1670,7 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) * As long as the BAR is >= 256 bytes it will be aligned such that the * lower byte is always zero. Filter out anything else, if it exists. */ - if (!vdev->bars[4].ioport || vdev->bars[4].size < 256) { + if (!vdev->bars[4].ioport || vdev->bars[4].region.size < 256) { return; } @@ -1725,7 +1723,7 @@ static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem, &vfio_generic_window_quirk, quirk, "vfio-ati-bar4-window-quirk", 8); - memory_region_add_subregion_overlap(&vdev->bars[nr].mem, + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, quirk->data.base_offset, &quirk->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -1759,7 +1757,7 @@ static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem, &vfio_generic_quirk, quirk, "vfio-ati-bar2-4000-quirk", TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); - memory_region_add_subregion_overlap(&vdev->bars[nr].mem, + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, quirk->data.address_match & TARGET_PAGE_MASK, &quirk->mem, 1); @@ -1878,7 +1876,7 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) VFIOQuirk *quirk; if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA || - !vdev->bars[1].size) { + !vdev->bars[1].region.size) { return; } @@ -1986,7 +1984,8 @@ static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem, &vfio_nvidia_bar5_window_quirk, quirk, "vfio-nvidia-bar5-window-quirk", 16); - memory_region_add_subregion_overlap(&vdev->bars[nr].mem, 0, &quirk->mem, 1); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + 0, &quirk->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -2024,7 +2023,7 @@ static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem, &vfio_generic_quirk, quirk, "vfio-nvidia-bar0-88000-quirk", TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); - memory_region_add_subregion_overlap(&vdev->bars[nr].mem, + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, quirk->data.address_match & TARGET_PAGE_MASK, &quirk->mem, 1); @@ -2050,7 +2049,8 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) /* Log the chipset ID */ DPRINTF("Nvidia NV%02x\n", - (unsigned int)(vfio_bar_read(&vdev->bars[0], 0, 4) >> 20) & 0xff); + (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20) + & 0xff); quirk = g_malloc0(sizeof(*quirk)); quirk->vdev = vdev; @@ -2062,7 +2062,7 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem, &vfio_generic_quirk, quirk, "vfio-nvidia-bar0-1800-quirk", TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); - memory_region_add_subregion_overlap(&vdev->bars[nr].mem, + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, quirk->data.address_match & TARGET_PAGE_MASK, &quirk->mem, 1); @@ -2118,7 +2118,7 @@ static void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) while (!QLIST_EMPTY(&bar->quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks); - memory_region_del_subregion(&bar->mem, &quirk->mem); + memory_region_del_subregion(&bar->region.mem, &quirk->mem); memory_region_destroy(&quirk->mem); QLIST_REMOVE(quirk, next); g_free(quirk); @@ -2495,7 +2495,7 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev) * specific quirk if the device is known or we have a broken configuration. */ if (vdev->msix->pba_offset >= - vdev->bars[vdev->msix->pba_bar].size) { + vdev->bars[vdev->msix->pba_bar].region.size) { PCIDevice *pdev = &vdev->pdev; uint16_t vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); @@ -2524,9 +2524,9 @@ static int vfio_setup_msix(VFIOPCIDevice *vdev, int pos) int ret; ret = msix_init(&vdev->pdev, vdev->msix->entries, - &vdev->bars[vdev->msix->table_bar].mem, + &vdev->bars[vdev->msix->table_bar].region.mem, vdev->msix->table_bar, vdev->msix->table_offset, - &vdev->bars[vdev->msix->pba_bar].mem, + &vdev->bars[vdev->msix->pba_bar].region.mem, vdev->msix->pba_bar, vdev->msix->pba_offset, pos); if (ret < 0) { if (ret == -ENOTSUP) { @@ -2544,8 +2544,9 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev) msi_uninit(&vdev->pdev); if (vdev->msix) { - msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem, - &vdev->bars[vdev->msix->pba_bar].mem); + msix_uninit(&vdev->pdev, + &vdev->bars[vdev->msix->table_bar].region.mem, + &vdev->bars[vdev->msix->pba_bar].region.mem); } } @@ -2559,11 +2560,11 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled) for (i = 0; i < PCI_ROM_SLOT; i++) { VFIOBAR *bar = &vdev->bars[i]; - if (!bar->size) { + if (!bar->region.size) { continue; } - memory_region_set_enabled(&bar->mmap_mem, enabled); + memory_region_set_enabled(&bar->region.mmap_mem, enabled); if (vdev->msix && vdev->msix->table_bar == i) { memory_region_set_enabled(&vdev->msix->mmap_mem, enabled); } @@ -2574,45 +2575,47 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; - if (!bar->size) { + if (!bar->region.size) { return; } vfio_bar_quirk_teardown(vdev, nr); - memory_region_del_subregion(&bar->mem, &bar->mmap_mem); - munmap(bar->mmap, memory_region_size(&bar->mmap_mem)); - memory_region_destroy(&bar->mmap_mem); + memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem); + munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem)); + memory_region_destroy(&bar->region.mmap_mem); if (vdev->msix && vdev->msix->table_bar == nr) { - memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem); + memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem); munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem)); memory_region_destroy(&vdev->msix->mmap_mem); } - memory_region_destroy(&bar->mem); + memory_region_destroy(&bar->region.mem); } -static int vfio_mmap_bar(VFIOPCIDevice *vdev, VFIOBAR *bar, - MemoryRegion *mem, MemoryRegion *submem, - void **map, size_t size, off_t offset, - const char *name) +static int vfio_mmap_region(Object *obj, VFIORegion *region, + MemoryRegion *mem, MemoryRegion *submem, + void **map, size_t size, off_t offset, + const char *name) { int ret = 0; + VFIODevice *vbasedev = region->vbasedev; - if (VFIO_ALLOW_MMAP && size && bar->flags & VFIO_REGION_INFO_FLAG_MMAP) { + if (VFIO_ALLOW_MMAP && size && region->flags & + VFIO_REGION_INFO_FLAG_MMAP) { int prot = 0; - if (bar->flags & VFIO_REGION_INFO_FLAG_READ) { + if (region->flags & VFIO_REGION_INFO_FLAG_READ) { prot |= PROT_READ; } - if (bar->flags & VFIO_REGION_INFO_FLAG_WRITE) { + if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) { prot |= PROT_WRITE; } *map = mmap(NULL, size, prot, MAP_SHARED, - bar->fd, bar->fd_offset + offset); + vbasedev->fd, region->fd_offset + offset); if (*map == MAP_FAILED) { *map = NULL; ret = -errno; @@ -2635,7 +2638,7 @@ empty_region: static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; - uint64_t size = bar->size; + uint64_t size = bar->region.size; char name[64]; uint32_t pci_bar; uint8_t type; @@ -2665,9 +2668,9 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) ~PCI_BASE_ADDRESS_MEM_MASK); /* A "slow" read/write mapping underlies all BARs */ - memory_region_init_io(&bar->mem, &vfio_bar_ops, + memory_region_init_io(&bar->region.mem, &vfio_region_ops, bar, name, size); - pci_register_bar(&vdev->pdev, nr, type, &bar->mem); + pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem); /* * We can't mmap areas overlapping the MSIX vector table, so we @@ -2678,8 +2681,9 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) } strncat(name, " mmap", sizeof(name) - strlen(name) - 1); - if (vfio_mmap_bar(vdev, bar, &bar->mem, - &bar->mmap_mem, &bar->mmap, size, 0, name)) { + if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, + &bar->region.mmap_mem, &bar->region.mmap, + size, 0, name)) { error_report("%s unsupported. Performance may be slow", name); } @@ -2689,10 +2693,11 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) start = TARGET_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); - size = start < bar->size ? bar->size - start : 0; + size = start < bar->region.size ? bar->region.size - start : 0; strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); /* VFIOMSIXInfo contains another MemoryRegion for this mapping */ - if (vfio_mmap_bar(vdev, bar, &bar->mem, &vdev->msix->mmap_mem, + if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, + &vdev->msix->mmap_mem, &vdev->msix->mmap, size, start, name)) { error_report("%s unsupported. Performance may be slow", name); } @@ -3281,6 +3286,7 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev) static VFIODeviceOps vfio_pci_ops = { .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, + .vfio_eoi = vfio_eoi, }; static void vfio_reset_handler(void *opaque) @@ -3607,11 +3613,11 @@ static int vfio_get_device(VFIOGroup *group, const char *name, (unsigned long)reg_info.size, (unsigned long)reg_info.offset, (unsigned long)reg_info.flags); - vdev->bars[i].flags = reg_info.flags; - vdev->bars[i].size = reg_info.size; - vdev->bars[i].fd_offset = reg_info.offset; - vdev->bars[i].fd = vdev->vbasedev.fd; - vdev->bars[i].nr = i; + vdev->bars[i].region.vbasedev = &vdev->vbasedev; + vdev->bars[i].region.flags = reg_info.flags; + vdev->bars[i].region.size = reg_info.size; + vdev->bars[i].region.fd_offset = reg_info.offset; + vdev->bars[i].region.nr = i; QLIST_INIT(&vdev->bars[i].quirks); } -- 1.8.3.1