Blob Blame History Raw
From acd0e88a7222dac83caf4d507a1bfce7cd0ea734 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 29 Sep 2017 21:45:14 +0200
Subject: [PATCH 09/27] vfio: Add sysfsdev property for pci & platform

RH-Author: Alex Williamson <alex.williamson@redhat.com>
Message-id: <20170929214514.16765.36252.stgit@gimli.home>
Patchwork-id: 76768
O-Subject: [RHEL-7.5 qemu-kvm PATCH 09/16] vfio: Add sysfsdev property for pci & platform
Bugzilla: 1494181
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>

Upstream: 7df9381b7aa56c897e344f3bfe43bf5848bbd3e0
RHEL: Dropped platform

vfio-pci currently requires a host= parameter, which comes in the
form of a PCI address in [domain:]<bus:slot.function> notation.  We
expect to find a matching entry in sysfs for that under
/sys/bus/pci/devices/.  vfio-platform takes a similar approach, but
defines the host= parameter to be a string, which can be matched
directly under /sys/bus/platform/devices/.  On the PCI side, we have
some interest in using vfio to expose vGPU devices.  These are not
actual discrete PCI devices, so they don't have a compatible host PCI
bus address or a device link where QEMU wants to look for it.  There's
also really no requirement that vfio can only be used to expose
physical devices, a new vfio bus and iommu driver could expose a
completely emulated device.  To fit within the vfio framework, it
would need a kernel struct device and associated IOMMU group, but
those are easy constraints to manage.

To support such devices, which would include vGPUs, that honor the
VFIO PCI programming API, but are not necessarily backed by a unique
PCI address, add support for specifying any device in sysfs.  The
vfio API already has support for probing the device type to ensure
compatibility with either vfio-pci or vfio-platform.

With this, a vfio-pci device could either be specified as:

-device vfio-pci,host=02:00.0

or

-device vfio-pci,sysfsdev=/sys/devices/pci0000:00/0000:00:1c.0/0000:02:00.0

or even

-device vfio-pci,sysfsdev=/sys/bus/pci/devices/0000:02:00.0

When vGPU support comes along, this might look something more like:

-device vfio-pci,sysfsdev=/sys/devices/virtual/intel-vgpu/vgpu0@0000:00:02.0

NB - This is only a made up example path

The same change is made for vfio-platform, specifying sysfsdev has
precedence over the old host option.

Tested-by: Eric Auger <eric.auger@linaro.org>
Reviewed-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
 hw/misc/vfio.c | 131 ++++++++++++++++++++++++---------------------------------
 1 file changed, 54 insertions(+), 77 deletions(-)

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 0d88313..64d4dc7 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -187,6 +187,7 @@ typedef struct VFIODeviceOps VFIODeviceOps;
 typedef struct VFIODevice {
     QLIST_ENTRY(VFIODevice) next;
     struct VFIOGroup *group;
+    char *sysfsdev;
     char *name;
     int fd;
     int type;
@@ -1288,12 +1289,8 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
     if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
         /* Since pci handles romfile, just print a message and return */
         if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
-            error_printf("Warning : Device at %04x:%02x:%02x.%x "
-                         "is known to cause system instability issues during "
-                         "option rom execution. "
-                         "Proceeding anyway since user specified romfile\n",
-                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
-                         vdev->host.function);
+            error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n",
+                         vdev->vbasedev.name);
         }
         return;
     }
@@ -1306,9 +1303,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
         pwrite(fd, &size, 4, offset) != 4 ||
         pread(fd, &size, 4, offset) != 4 ||
         pwrite(fd, &orig, 4, offset) != 4) {
-        error_report("%s(%04x:%02x:%02x.%x) failed: %m",
-                     __func__, vdev->host.domain, vdev->host.bus,
-                     vdev->host.slot, vdev->host.function);
+        error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name);
         return;
     }
 
@@ -1320,29 +1315,18 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
 
     if (vfio_blacklist_opt_rom(vdev)) {
         if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
-            error_printf("Warning : Device at %04x:%02x:%02x.%x "
-                         "is known to cause system instability issues during "
-                         "option rom execution. "
-                         "Proceeding anyway since user specified non zero value for "
-                         "rombar\n",
-                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
-                         vdev->host.function);
+            error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n",
+                         vdev->vbasedev.name);
         } else {
-            error_printf("Warning : Rom loading for device at "
-                         "%04x:%02x:%02x.%x has been disabled due to "
-                         "system instability issues. "
-                         "Specify rombar=1 or romfile to force\n",
-                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
-                         vdev->host.function);
+            error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n",
+                         vdev->vbasedev.name);
             return;
         }
     }
 
     DPRINTF("%s ROM size 0x%x\n", vdev->vbasedev.name, size);
 
-    snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
-             vdev->host.domain, vdev->host.bus, vdev->host.slot,
-             vdev->host.function);
+    snprintf(name, sizeof(name), "vfio[%s].rom", vdev->vbasedev.name);
 
     memory_region_init_io(&vdev->pdev.rom,
                           &vfio_rom_ops, vdev, name, size);
@@ -2112,9 +2096,8 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
         ret = pread(vdev->vbasedev.fd, &phys_val, len,
                     vdev->config_offset + addr);
         if (ret != len) {
-            error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m",
-                         __func__, vdev->host.domain, vdev->host.bus,
-                         vdev->host.slot, vdev->host.function, addr, len);
+            error_report("%s(%s, 0x%x, 0x%x) failed: %m",
+                         __func__, vdev->vbasedev.name, addr, len);
             return -errno;
         }
         phys_val = le32_to_cpu(phys_val);
@@ -2140,9 +2123,8 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
     /* Write everything to VFIO, let it filter out what we can't write */
     if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr)
                 != len) {
-        error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m",
-                     __func__, vdev->host.domain, vdev->host.bus,
-                     vdev->host.slot, vdev->host.function, addr, val, len);
+        error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m",
+                     __func__, vdev->vbasedev.name, addr, val, len);
     }
 
     /* MSI/MSI-X Enabling/Disabling */
@@ -2610,9 +2592,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
         return;
     }
 
-    snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d",
-             vdev->host.domain, vdev->host.bus, vdev->host.slot,
-             vdev->host.function, nr);
+    snprintf(name, sizeof(name), "VFIO %s BAR %d", vdev->vbasedev.name, nr);
 
     /* Determine what type of BAR this is for registration */
     ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
@@ -2946,9 +2926,8 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     }
 
     if (ret < 0) {
-        error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability "
-                     "0x%x[0x%x]@0x%x: %d", vdev->host.domain,
-                     vdev->host.bus, vdev->host.slot, vdev->host.function,
+        error_report("vfio: %s Error adding PCI capability "
+                     "0x%x[0x%x]@0x%x: %d", vdev->vbasedev.name,
                      cap_id, size, pos, ret);
         return ret;
     }
@@ -3010,11 +2989,14 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
     vfio_enable_intx(vdev);
 }
 
-static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
-                                PCIHostDeviceAddress *host2)
+static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
 {
-    return (host1->domain == host2->domain && host1->bus == host2->bus &&
-            host1->slot == host2->slot && host1->function == host2->function);
+    char tmp[13];
+
+    sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain,
+            addr->bus, addr->slot, addr->function);
+
+    return (strcmp(tmp, name) == 0);
 }
 
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
@@ -3040,9 +3022,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     if (ret && errno != ENOSPC) {
         ret = -errno;
         if (!vdev->has_pm_reset) {
-            error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
-                         "no available reset mechanism.", vdev->host.domain,
-                         vdev->host.bus, vdev->host.slot, vdev->host.function);
+            error_report("vfio: Cannot reset device %s, "
+                         "no available reset mechanism.", vdev->vbasedev.name);
         }
         goto out_single;
     }
@@ -3075,7 +3056,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         DPRINTF("\t%04x:%02x:%02x.%x group %d\n", host.domain,
                 host.bus, host.slot, host.function, devices[i].group_id);
 
-        if (vfio_pci_host_match(&host, &vdev->host)) {
+        if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
             continue;
         }
 
@@ -3101,7 +3082,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
                 continue;
             }
             tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
-            if (vfio_pci_host_match(&host, &tmp->host)) {
+            if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
                 if (single) {
                     DPRINTF("vfio: found another in-use device "
                             "%s\n", tmp->vbasedev.name);
@@ -3165,7 +3146,7 @@ out:
         host.slot = PCI_SLOT(devices[i].devfn);
         host.function = PCI_FUNC(devices[i].devfn);
 
-        if (vfio_pci_host_match(&host, &vdev->host)) {
+        if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
             continue;
         }
 
@@ -3184,7 +3165,7 @@ out:
                 continue;
             }
             tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
-            if (vfio_pci_host_match(&host, &tmp->host)) {
+            if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
                 vfio_pci_post_reset(tmp);
                 break;
             }
@@ -3683,10 +3664,7 @@ static void vfio_err_notifier_handler(void *opaque)
      * guest to contain the error.
      */
 
-    error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
-                 "Please collect any data possible and then kill the guest",
-                 __func__, vdev->host.domain, vdev->host.bus,
-                 vdev->host.slot, vdev->host.function);
+    error_report("%s(%s) Unrecoverable error detected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name);
 
     vm_stop(RUN_STATE_INTERNAL_ERROR);
 }
@@ -3867,7 +3845,7 @@ static int vfio_initfn(PCIDevice *pdev)
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
     VFIODevice *vbasedev_iter;
     VFIOGroup *group;
-    char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+    char *tmp, group_path[PATH_MAX], *group_name;
     ssize_t len;
     struct stat st;
     int groupid;
@@ -3885,36 +3863,37 @@ static int vfio_initfn(PCIDevice *pdev)
         return -1;
     }
 
-    /* Check that the host device exists */
-    snprintf(path, sizeof(path),
-             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
-             vdev->host.domain, vdev->host.bus, vdev->host.slot,
-             vdev->host.function);
-    if (stat(path, &st) < 0) {
-        error_report("vfio: error: no such host device: %s", path);
+    if (!vdev->vbasedev.sysfsdev) {
+        vdev->vbasedev.sysfsdev =
+            g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x",
+                            vdev->host.domain, vdev->host.bus,
+                            vdev->host.slot, vdev->host.function);
+    }
+
+    if (stat(vdev->vbasedev.sysfsdev, &st) < 0) {
+        error_report("vfio: error: no such host device: %s",
+                     vdev->vbasedev.sysfsdev);
         return -errno;
     }
 
+    vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev));
     vdev->vbasedev.ops = &vfio_pci_ops;
-
     vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
-    vdev->vbasedev.name = g_strdup_printf("%04x:%02x:%02x.%01x",
-                                          vdev->host.domain, vdev->host.bus,
-                                          vdev->host.slot, vdev->host.function);
 
-    strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
+    tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev);
+    len = readlink(tmp, group_path, sizeof(group_path));
+    g_free(tmp);
 
-    len = readlink(path, iommu_group_path, sizeof(path));
-    if (len <= 0 || len >= sizeof(path)) {
+    if (len <= 0 || len >= sizeof(group_path)) {
         error_report("vfio: error no iommu_group for device");
         return len < 0 ? -errno : -ENAMETOOLONG;
     }
 
-    iommu_group_path[len] = 0;
-    group_name = basename(iommu_group_path);
+    group_path[len] = 0;
 
+    group_name = basename(group_path);
     if (sscanf(group_name, "%d", &groupid) != 1) {
-        error_report("vfio: error reading %s: %m", path);
+        error_report("vfio: error reading %s: %m", group_path);
         return -errno;
     }
 
@@ -3926,21 +3905,18 @@ static int vfio_initfn(PCIDevice *pdev)
         return -ENOENT;
     }
 
-    snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x",
-            vdev->host.domain, vdev->host.bus, vdev->host.slot,
-            vdev->host.function);
-
     QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
         if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
-            error_report("vfio: error: device %s is already attached", path);
+            error_report("vfio: error: device %s is already attached",
+                         vdev->vbasedev.name);
             vfio_put_group(group);
             return -EBUSY;
         }
     }
 
-    ret = vfio_get_device(group, path, vdev);
+    ret = vfio_get_device(group, vdev->vbasedev.name, vdev);
     if (ret) {
-        error_report("vfio: failed to get device %s", path);
+        error_report("vfio: failed to get device %s", vdev->vbasedev.name);
         vfio_put_group(group);
         return ret;
     }
@@ -4086,6 +4062,7 @@ post_reset:
 
 static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
+    DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev),
     DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
                        intx.mmap_timeout, 1100),
     DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
-- 
1.8.3.1