218e99
From f3b05560b20866cadb604f0a5a6f4a7698d2e07b Mon Sep 17 00:00:00 2001
218e99
From: Alex Williamson <alex.williamson@redhat.com>
218e99
Date: Tue, 5 Nov 2013 15:37:35 +0100
218e99
Subject: [PATCH 14/25] vfio-pci: Lazy PCI option ROM loading
218e99
218e99
RH-Author: Alex Williamson <alex.williamson@redhat.com>
218e99
Message-id: <20131105153734.16057.77668.stgit@bling.home>
218e99
Patchwork-id: 55423
218e99
O-Subject: [RHEL7 qemu-kvm PATCH 2/5] vfio-pci: Lazy PCI option ROM loading
218e99
Bugzilla: 1026550
218e99
RH-Acked-by: Bandan Das <bsd@redhat.com>
218e99
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
218e99
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
218e99
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
218e99
218e99
Bugzilla: 1026550
218e99
Upstream commit: 6f864e6ec8812d5a5525a7861ca599c6bcabdebe
218e99
218e99
During vfio-pci initfn, the device is not always in a state where the
218e99
option ROM can be read.  In the case of graphics cards, there's often
218e99
no per function reset, which means we have host driver state affecting
218e99
whether the option ROM is usable.  Ideally we want to move reading the
218e99
option ROM past any co-assigned device resets to the point where the
218e99
guest first tries to read the ROM itself.
218e99
218e99
To accomplish this, we switch the memory region for the option rom to
218e99
an I/O region rather than a memory mapped region.  This has the side
218e99
benefit that we don't waste KVM memory slots for a BAR where we don't
218e99
care about performance.  This also allows us to delay loading the ROM
218e99
from the device until the first read by the guest.  We then use the
218e99
PCI config space size of the ROM BAR when setting up the BAR through
218e99
QEMU PCI.
218e99
218e99
Another benefit of this approach is that previously when a user set
218e99
the ROM to a file using the romfile= option, we still probed VFIO for
218e99
the parameters of the ROM, which can result in dmesg errors about an
218e99
invalid ROM.  We now only probe VFIO to get the ROM contents if the
218e99
guest actually tries to read the ROM.
218e99
218e99
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
218e99
---
218e99
 hw/misc/vfio.c |  184 +++++++++++++++++++++++++++++++++++++-------------------
218e99
 1 file changed, 122 insertions(+), 62 deletions(-)
218e99
218e99
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
218e99
---
218e99
 hw/misc/vfio.c |  184 +++++++++++++++++++++++++++++++++++++-------------------
218e99
 1 files changed, 122 insertions(+), 62 deletions(-)
218e99
218e99
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
218e99
index 8e69182..8d84891 100644
218e99
--- a/hw/misc/vfio.c
218e99
+++ b/hw/misc/vfio.c
218e99
@@ -166,6 +166,7 @@ typedef struct VFIODevice {
218e99
     off_t config_offset; /* Offset of config space region within device fd */
218e99
     unsigned int rom_size;
218e99
     off_t rom_offset; /* Offset of ROM region within device fd */
218e99
+    void *rom;
218e99
     int msi_cap_size;
218e99
     VFIOMSIVector *msi_vectors;
218e99
     VFIOMSIXInfo *msix;
218e99
@@ -1058,6 +1059,125 @@ static const MemoryRegionOps vfio_bar_ops = {
218e99
     .endianness = DEVICE_LITTLE_ENDIAN,
218e99
 };
218e99
 
218e99
+static void vfio_pci_load_rom(VFIODevice *vdev)
218e99
+{
218e99
+    struct vfio_region_info reg_info = {
218e99
+        .argsz = sizeof(reg_info),
218e99
+        .index = VFIO_PCI_ROM_REGION_INDEX
218e99
+    };
218e99
+    uint64_t size;
218e99
+    off_t off = 0;
218e99
+    size_t bytes;
218e99
+
218e99
+    if (ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
218e99
+        error_report("vfio: Error getting ROM info: %m");
218e99
+        return;
218e99
+    }
218e99
+
218e99
+    DPRINTF("Device %04x:%02x:%02x.%x ROM:\n", vdev->host.domain,
218e99
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
218e99
+    DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
218e99
+            (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
218e99
+            (unsigned long)reg_info.flags);
218e99
+
218e99
+    vdev->rom_size = size = reg_info.size;
218e99
+    vdev->rom_offset = reg_info.offset;
218e99
+
218e99
+    if (!vdev->rom_size) {
218e99
+        return;
218e99
+    }
218e99
+
218e99
+    vdev->rom = g_malloc(size);
218e99
+    memset(vdev->rom, 0xff, size);
218e99
+
218e99
+    while (size) {
218e99
+        bytes = pread(vdev->fd, vdev->rom + off, size, vdev->rom_offset + off);
218e99
+        if (bytes == 0) {
218e99
+            break;
218e99
+        } else if (bytes > 0) {
218e99
+            off += bytes;
218e99
+            size -= bytes;
218e99
+        } else {
218e99
+            if (errno == EINTR || errno == EAGAIN) {
218e99
+                continue;
218e99
+            }
218e99
+            error_report("vfio: Error reading device ROM: %m");
218e99
+            break;
218e99
+        }
218e99
+    }
218e99
+}
218e99
+
218e99
+static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
218e99
+{
218e99
+    VFIODevice *vdev = opaque;
218e99
+    uint64_t val = ((uint64_t)1 << (size * 8)) - 1;
218e99
+
218e99
+    /* Load the ROM lazily when the guest tries to read it */
218e99
+    if (unlikely(!vdev->rom)) {
218e99
+        vfio_pci_load_rom(vdev);
218e99
+    }
218e99
+
218e99
+    memcpy(&val, vdev->rom + addr,
218e99
+           (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
218e99
+
218e99
+    DPRINTF("%s(%04x:%02x:%02x.%x, 0x%"HWADDR_PRIx", 0x%x) = 0x%"PRIx64"\n",
218e99
+            __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,
218e99
+            vdev->host.function, addr, size, val);
218e99
+
218e99
+    return val;
218e99
+}
218e99
+
218e99
+static const MemoryRegionOps vfio_rom_ops = {
218e99
+    .read = vfio_rom_read,
218e99
+    .endianness = DEVICE_LITTLE_ENDIAN,
218e99
+};
218e99
+
218e99
+static void vfio_pci_size_rom(VFIODevice *vdev)
218e99
+{
218e99
+    uint32_t orig, size = (uint32_t)PCI_ROM_ADDRESS_MASK;
218e99
+    off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
218e99
+    char name[32];
218e99
+
218e99
+    if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
218e99
+        return;
218e99
+    }
218e99
+
218e99
+    /*
218e99
+     * Use the same size ROM BAR as the physical device.  The contents
218e99
+     * will get filled in later when the guest tries to read it.
218e99
+     */
218e99
+    if (pread(vdev->fd, &orig, 4, offset) != 4 ||
218e99
+        pwrite(vdev->fd, &size, 4, offset) != 4 ||
218e99
+        pread(vdev->fd, &size, 4, offset) != 4 ||
218e99
+        pwrite(vdev->fd, &orig, 4, offset) != 4) {
218e99
+        error_report("%s(%04x:%02x:%02x.%x) failed: %m",
218e99
+                     __func__, vdev->host.domain, vdev->host.bus,
218e99
+                     vdev->host.slot, vdev->host.function);
218e99
+        return;
218e99
+    }
218e99
+
218e99
+    size = ~(size & PCI_ROM_ADDRESS_MASK) + 1;
218e99
+
218e99
+    if (!size) {
218e99
+        return;
218e99
+    }
218e99
+
218e99
+    DPRINTF("%04x:%02x:%02x.%x ROM size 0x%x\n", vdev->host.domain,
218e99
+            vdev->host.bus, vdev->host.slot, vdev->host.function, size);
218e99
+
218e99
+    snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
218e99
+             vdev->host.domain, vdev->host.bus, vdev->host.slot,
218e99
+             vdev->host.function);
218e99
+
218e99
+    memory_region_init_io(&vdev->pdev.rom,
218e99
+                          &vfio_rom_ops, vdev, name, size);
218e99
+
218e99
+    pci_register_bar(&vdev->pdev, PCI_ROM_SLOT,
218e99
+                     PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom);
218e99
+
218e99
+    vdev->pdev.has_rom = true;
218e99
+}
218e99
+
218e99
 static void vfio_vga_write(void *opaque, hwaddr addr,
218e99
                            uint64_t data, unsigned size)
218e99
 {
218e99
@@ -2633,51 +2753,6 @@ static int vfio_add_capabilities(VFIODevice *vdev)
218e99
     return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
218e99
 }
218e99
 
218e99
-static int vfio_load_rom(VFIODevice *vdev)
218e99
-{
218e99
-    uint64_t size = vdev->rom_size;
218e99
-    char name[32];
218e99
-    off_t off = 0, voff = vdev->rom_offset;
218e99
-    ssize_t bytes;
218e99
-    void *ptr;
218e99
-
218e99
-    /* If loading ROM from file, pci handles it */
218e99
-    if (vdev->pdev.romfile || !vdev->pdev.rom_bar || !size) {
218e99
-        return 0;
218e99
-    }
218e99
-
218e99
-    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
218e99
-            vdev->host.bus, vdev->host.slot, vdev->host.function);
218e99
-
218e99
-    snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
218e99
-             vdev->host.domain, vdev->host.bus, vdev->host.slot,
218e99
-             vdev->host.function);
218e99
-    memory_region_init_ram(&vdev->pdev.rom, name, size);
218e99
-    ptr = memory_region_get_ram_ptr(&vdev->pdev.rom);
218e99
-    memset(ptr, 0xff, size);
218e99
-
218e99
-    while (size) {
218e99
-        bytes = pread(vdev->fd, ptr + off, size, voff + off);
218e99
-        if (bytes == 0) {
218e99
-            break; /* expect that we could get back less than the ROM BAR */
218e99
-        } else if (bytes > 0) {
218e99
-            off += bytes;
218e99
-            size -= bytes;
218e99
-        } else {
218e99
-            if (errno == EINTR || errno == EAGAIN) {
218e99
-                continue;
218e99
-            }
218e99
-            error_report("vfio: Error reading device ROM: %m");
218e99
-            memory_region_destroy(&vdev->pdev.rom);
218e99
-            return -errno;
218e99
-        }
218e99
-    }
218e99
-
218e99
-    pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, 0, &vdev->pdev.rom);
218e99
-    vdev->pdev.has_rom = true;
218e99
-    return 0;
218e99
-}
218e99
-
218e99
 static int vfio_connect_container(VFIOGroup *group)
218e99
 {
218e99
     VFIOContainer *container;
218e99
@@ -2911,22 +2986,6 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
218e99
         QLIST_INIT(&vdev->bars[i].quirks);
218e99
     }
218e99
 
218e99
-    reg_info.index = VFIO_PCI_ROM_REGION_INDEX;
218e99
-
218e99
-    ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
218e99
-    if (ret) {
218e99
-        error_report("vfio: Error getting ROM info: %m");
218e99
-        goto error;
218e99
-    }
218e99
-
218e99
-    DPRINTF("Device %s ROM:\n", name);
218e99
-    DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
218e99
-            (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
218e99
-            (unsigned long)reg_info.flags);
218e99
-
218e99
-    vdev->rom_size = reg_info.size;
218e99
-    vdev->rom_offset = reg_info.offset;
218e99
-
218e99
     reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX;
218e99
 
218e99
     ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
218e99
@@ -3224,7 +3283,7 @@ static int vfio_initfn(PCIDevice *pdev)
218e99
     memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24);
218e99
     memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4);
218e99
 
218e99
-    vfio_load_rom(vdev);
218e99
+    vfio_pci_size_rom(vdev);
218e99
 
218e99
     ret = vfio_early_setup_msix(vdev);
218e99
     if (ret) {
218e99
@@ -3289,6 +3348,7 @@ static void vfio_exitfn(PCIDevice *pdev)
218e99
     vfio_teardown_msi(vdev);
218e99
     vfio_unmap_bars(vdev);
218e99
     g_free(vdev->emulated_config_bits);
218e99
+    g_free(vdev->rom);
218e99
     vfio_put_device(vdev);
218e99
     vfio_put_group(group);
218e99
 }
218e99
-- 
218e99
1.7.1
218e99