5d360b
From acd0e88a7222dac83caf4d507a1bfce7cd0ea734 Mon Sep 17 00:00:00 2001
5d360b
From: Alex Williamson <alex.williamson@redhat.com>
5d360b
Date: Fri, 29 Sep 2017 21:45:14 +0200
5d360b
Subject: [PATCH 09/27] vfio: Add sysfsdev property for pci & platform
5d360b
5d360b
RH-Author: Alex Williamson <alex.williamson@redhat.com>
5d360b
Message-id: <20170929214514.16765.36252.stgit@gimli.home>
5d360b
Patchwork-id: 76768
5d360b
O-Subject: [RHEL-7.5 qemu-kvm PATCH 09/16] vfio: Add sysfsdev property for pci & platform
5d360b
Bugzilla: 1494181
5d360b
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5d360b
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
5d360b
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
5d360b
5d360b
Upstream: 7df9381b7aa56c897e344f3bfe43bf5848bbd3e0
5d360b
RHEL: Dropped platform
5d360b
5d360b
vfio-pci currently requires a host= parameter, which comes in the
5d360b
form of a PCI address in [domain:]<bus:slot.function> notation.  We
5d360b
expect to find a matching entry in sysfs for that under
5d360b
/sys/bus/pci/devices/.  vfio-platform takes a similar approach, but
5d360b
defines the host= parameter to be a string, which can be matched
5d360b
directly under /sys/bus/platform/devices/.  On the PCI side, we have
5d360b
some interest in using vfio to expose vGPU devices.  These are not
5d360b
actual discrete PCI devices, so they don't have a compatible host PCI
5d360b
bus address or a device link where QEMU wants to look for it.  There's
5d360b
also really no requirement that vfio can only be used to expose
5d360b
physical devices, a new vfio bus and iommu driver could expose a
5d360b
completely emulated device.  To fit within the vfio framework, it
5d360b
would need a kernel struct device and associated IOMMU group, but
5d360b
those are easy constraints to manage.
5d360b
5d360b
To support such devices, which would include vGPUs, that honor the
5d360b
VFIO PCI programming API, but are not necessarily backed by a unique
5d360b
PCI address, add support for specifying any device in sysfs.  The
5d360b
vfio API already has support for probing the device type to ensure
5d360b
compatibility with either vfio-pci or vfio-platform.
5d360b
5d360b
With this, a vfio-pci device could either be specified as:
5d360b
5d360b
-device vfio-pci,host=02:00.0
5d360b
5d360b
or
5d360b
5d360b
-device vfio-pci,sysfsdev=/sys/devices/pci0000:00/0000:00:1c.0/0000:02:00.0
5d360b
5d360b
or even
5d360b
5d360b
-device vfio-pci,sysfsdev=/sys/bus/pci/devices/0000:02:00.0
5d360b
5d360b
When vGPU support comes along, this might look something more like:
5d360b
5d360b
-device vfio-pci,sysfsdev=/sys/devices/virtual/intel-vgpu/vgpu0@0000:00:02.0
5d360b
5d360b
NB - This is only a made up example path
5d360b
5d360b
The same change is made for vfio-platform, specifying sysfsdev has
5d360b
precedence over the old host option.
5d360b
5d360b
Tested-by: Eric Auger <eric.auger@linaro.org>
5d360b
Reviewed-by: Eric Auger <eric.auger@linaro.org>
5d360b
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
5d360b
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
5d360b
---
5d360b
 hw/misc/vfio.c | 131 ++++++++++++++++++++++++---------------------------------
5d360b
 1 file changed, 54 insertions(+), 77 deletions(-)
5d360b
5d360b
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
5d360b
index 0d88313..64d4dc7 100644
5d360b
--- a/hw/misc/vfio.c
5d360b
+++ b/hw/misc/vfio.c
5d360b
@@ -187,6 +187,7 @@ typedef struct VFIODeviceOps VFIODeviceOps;
5d360b
 typedef struct VFIODevice {
5d360b
     QLIST_ENTRY(VFIODevice) next;
5d360b
     struct VFIOGroup *group;
5d360b
+    char *sysfsdev;
5d360b
     char *name;
5d360b
     int fd;
5d360b
     int type;
5d360b
@@ -1288,12 +1289,8 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
5d360b
     if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
5d360b
         /* Since pci handles romfile, just print a message and return */
5d360b
         if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
5d360b
-            error_printf("Warning : Device at %04x:%02x:%02x.%x "
5d360b
-                         "is known to cause system instability issues during "
5d360b
-                         "option rom execution. "
5d360b
-                         "Proceeding anyway since user specified romfile\n",
5d360b
-                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
5d360b
-                         vdev->host.function);
5d360b
+            error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n",
5d360b
+                         vdev->vbasedev.name);
5d360b
         }
5d360b
         return;
5d360b
     }
5d360b
@@ -1306,9 +1303,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
5d360b
         pwrite(fd, &size, 4, offset) != 4 ||
5d360b
         pread(fd, &size, 4, offset) != 4 ||
5d360b
         pwrite(fd, &orig, 4, offset) != 4) {
5d360b
-        error_report("%s(%04x:%02x:%02x.%x) failed: %m",
5d360b
-                     __func__, vdev->host.domain, vdev->host.bus,
5d360b
-                     vdev->host.slot, vdev->host.function);
5d360b
+        error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name);
5d360b
         return;
5d360b
     }
5d360b
 
5d360b
@@ -1320,29 +1315,18 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
5d360b
 
5d360b
     if (vfio_blacklist_opt_rom(vdev)) {
5d360b
         if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
5d360b
-            error_printf("Warning : Device at %04x:%02x:%02x.%x "
5d360b
-                         "is known to cause system instability issues during "
5d360b
-                         "option rom execution. "
5d360b
-                         "Proceeding anyway since user specified non zero value for "
5d360b
-                         "rombar\n",
5d360b
-                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
5d360b
-                         vdev->host.function);
5d360b
+            error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n",
5d360b
+                         vdev->vbasedev.name);
5d360b
         } else {
5d360b
-            error_printf("Warning : Rom loading for device at "
5d360b
-                         "%04x:%02x:%02x.%x has been disabled due to "
5d360b
-                         "system instability issues. "
5d360b
-                         "Specify rombar=1 or romfile to force\n",
5d360b
-                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
5d360b
-                         vdev->host.function);
5d360b
+            error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n",
5d360b
+                         vdev->vbasedev.name);
5d360b
             return;
5d360b
         }
5d360b
     }
5d360b
 
5d360b
     DPRINTF("%s ROM size 0x%x\n", vdev->vbasedev.name, size);
5d360b
 
5d360b
-    snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
5d360b
-             vdev->host.domain, vdev->host.bus, vdev->host.slot,
5d360b
-             vdev->host.function);
5d360b
+    snprintf(name, sizeof(name), "vfio[%s].rom", vdev->vbasedev.name);
5d360b
 
5d360b
     memory_region_init_io(&vdev->pdev.rom,
5d360b
                           &vfio_rom_ops, vdev, name, size);
5d360b
@@ -2112,9 +2096,8 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
5d360b
         ret = pread(vdev->vbasedev.fd, &phys_val, len,
5d360b
                     vdev->config_offset + addr);
5d360b
         if (ret != len) {
5d360b
-            error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m",
5d360b
-                         __func__, vdev->host.domain, vdev->host.bus,
5d360b
-                         vdev->host.slot, vdev->host.function, addr, len);
5d360b
+            error_report("%s(%s, 0x%x, 0x%x) failed: %m",
5d360b
+                         __func__, vdev->vbasedev.name, addr, len);
5d360b
             return -errno;
5d360b
         }
5d360b
         phys_val = le32_to_cpu(phys_val);
5d360b
@@ -2140,9 +2123,8 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
5d360b
     /* Write everything to VFIO, let it filter out what we can't write */
5d360b
     if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr)
5d360b
                 != len) {
5d360b
-        error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m",
5d360b
-                     __func__, vdev->host.domain, vdev->host.bus,
5d360b
-                     vdev->host.slot, vdev->host.function, addr, val, len);
5d360b
+        error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m",
5d360b
+                     __func__, vdev->vbasedev.name, addr, val, len);
5d360b
     }
5d360b
 
5d360b
     /* MSI/MSI-X Enabling/Disabling */
5d360b
@@ -2610,9 +2592,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
5d360b
         return;
5d360b
     }
5d360b
 
5d360b
-    snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d",
5d360b
-             vdev->host.domain, vdev->host.bus, vdev->host.slot,
5d360b
-             vdev->host.function, nr);
5d360b
+    snprintf(name, sizeof(name), "VFIO %s BAR %d", vdev->vbasedev.name, nr);
5d360b
 
5d360b
     /* Determine what type of BAR this is for registration */
5d360b
     ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
5d360b
@@ -2946,9 +2926,8 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
5d360b
     }
5d360b
 
5d360b
     if (ret < 0) {
5d360b
-        error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability "
5d360b
-                     "0x%x[0x%x]@0x%x: %d", vdev->host.domain,
5d360b
-                     vdev->host.bus, vdev->host.slot, vdev->host.function,
5d360b
+        error_report("vfio: %s Error adding PCI capability "
5d360b
+                     "0x%x[0x%x]@0x%x: %d", vdev->vbasedev.name,
5d360b
                      cap_id, size, pos, ret);
5d360b
         return ret;
5d360b
     }
5d360b
@@ -3010,11 +2989,14 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
5d360b
     vfio_enable_intx(vdev);
5d360b
 }
5d360b
 
5d360b
-static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
5d360b
-                                PCIHostDeviceAddress *host2)
5d360b
+static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
5d360b
 {
5d360b
-    return (host1->domain == host2->domain && host1->bus == host2->bus &&
5d360b
-            host1->slot == host2->slot && host1->function == host2->function);
5d360b
+    char tmp[13];
5d360b
+
5d360b
+    sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain,
5d360b
+            addr->bus, addr->slot, addr->function);
5d360b
+
5d360b
+    return (strcmp(tmp, name) == 0);
5d360b
 }
5d360b
 
5d360b
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
5d360b
@@ -3040,9 +3022,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
5d360b
     if (ret && errno != ENOSPC) {
5d360b
         ret = -errno;
5d360b
         if (!vdev->has_pm_reset) {
5d360b
-            error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
5d360b
-                         "no available reset mechanism.", vdev->host.domain,
5d360b
-                         vdev->host.bus, vdev->host.slot, vdev->host.function);
5d360b
+            error_report("vfio: Cannot reset device %s, "
5d360b
+                         "no available reset mechanism.", vdev->vbasedev.name);
5d360b
         }
5d360b
         goto out_single;
5d360b
     }
5d360b
@@ -3075,7 +3056,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
5d360b
         DPRINTF("\t%04x:%02x:%02x.%x group %d\n", host.domain,
5d360b
                 host.bus, host.slot, host.function, devices[i].group_id);
5d360b
 
5d360b
-        if (vfio_pci_host_match(&host, &vdev->host)) {
5d360b
+        if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
5d360b
             continue;
5d360b
         }
5d360b
 
5d360b
@@ -3101,7 +3082,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
5d360b
                 continue;
5d360b
             }
5d360b
             tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
5d360b
-            if (vfio_pci_host_match(&host, &tmp->host)) {
5d360b
+            if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
5d360b
                 if (single) {
5d360b
                     DPRINTF("vfio: found another in-use device "
5d360b
                             "%s\n", tmp->vbasedev.name);
5d360b
@@ -3165,7 +3146,7 @@ out:
5d360b
         host.slot = PCI_SLOT(devices[i].devfn);
5d360b
         host.function = PCI_FUNC(devices[i].devfn);
5d360b
 
5d360b
-        if (vfio_pci_host_match(&host, &vdev->host)) {
5d360b
+        if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
5d360b
             continue;
5d360b
         }
5d360b
 
5d360b
@@ -3184,7 +3165,7 @@ out:
5d360b
                 continue;
5d360b
             }
5d360b
             tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
5d360b
-            if (vfio_pci_host_match(&host, &tmp->host)) {
5d360b
+            if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
5d360b
                 vfio_pci_post_reset(tmp);
5d360b
                 break;
5d360b
             }
5d360b
@@ -3683,10 +3664,7 @@ static void vfio_err_notifier_handler(void *opaque)
5d360b
      * guest to contain the error.
5d360b
      */
5d360b
 
5d360b
-    error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
5d360b
-                 "Please collect any data possible and then kill the guest",
5d360b
-                 __func__, vdev->host.domain, vdev->host.bus,
5d360b
-                 vdev->host.slot, vdev->host.function);
5d360b
+    error_report("%s(%s) Unrecoverable error detected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name);
5d360b
 
5d360b
     vm_stop(RUN_STATE_INTERNAL_ERROR);
5d360b
 }
5d360b
@@ -3867,7 +3845,7 @@ static int vfio_initfn(PCIDevice *pdev)
5d360b
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
5d360b
     VFIODevice *vbasedev_iter;
5d360b
     VFIOGroup *group;
5d360b
-    char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
5d360b
+    char *tmp, group_path[PATH_MAX], *group_name;
5d360b
     ssize_t len;
5d360b
     struct stat st;
5d360b
     int groupid;
5d360b
@@ -3885,36 +3863,37 @@ static int vfio_initfn(PCIDevice *pdev)
5d360b
         return -1;
5d360b
     }
5d360b
 
5d360b
-    /* Check that the host device exists */
5d360b
-    snprintf(path, sizeof(path),
5d360b
-             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
5d360b
-             vdev->host.domain, vdev->host.bus, vdev->host.slot,
5d360b
-             vdev->host.function);
5d360b
-    if (stat(path, &st) < 0) {
5d360b
-        error_report("vfio: error: no such host device: %s", path);
5d360b
+    if (!vdev->vbasedev.sysfsdev) {
5d360b
+        vdev->vbasedev.sysfsdev =
5d360b
+            g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x",
5d360b
+                            vdev->host.domain, vdev->host.bus,
5d360b
+                            vdev->host.slot, vdev->host.function);
5d360b
+    }
5d360b
+
5d360b
+    if (stat(vdev->vbasedev.sysfsdev, &st) < 0) {
5d360b
+        error_report("vfio: error: no such host device: %s",
5d360b
+                     vdev->vbasedev.sysfsdev);
5d360b
         return -errno;
5d360b
     }
5d360b
 
5d360b
+    vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev));
5d360b
     vdev->vbasedev.ops = &vfio_pci_ops;
5d360b
-
5d360b
     vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
5d360b
-    vdev->vbasedev.name = g_strdup_printf("%04x:%02x:%02x.%01x",
5d360b
-                                          vdev->host.domain, vdev->host.bus,
5d360b
-                                          vdev->host.slot, vdev->host.function);
5d360b
 
5d360b
-    strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
5d360b
+    tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev);
5d360b
+    len = readlink(tmp, group_path, sizeof(group_path));
5d360b
+    g_free(tmp);
5d360b
 
5d360b
-    len = readlink(path, iommu_group_path, sizeof(path));
5d360b
-    if (len <= 0 || len >= sizeof(path)) {
5d360b
+    if (len <= 0 || len >= sizeof(group_path)) {
5d360b
         error_report("vfio: error no iommu_group for device");
5d360b
         return len < 0 ? -errno : -ENAMETOOLONG;
5d360b
     }
5d360b
 
5d360b
-    iommu_group_path[len] = 0;
5d360b
-    group_name = basename(iommu_group_path);
5d360b
+    group_path[len] = 0;
5d360b
 
5d360b
+    group_name = basename(group_path);
5d360b
     if (sscanf(group_name, "%d", &groupid) != 1) {
5d360b
-        error_report("vfio: error reading %s: %m", path);
5d360b
+        error_report("vfio: error reading %s: %m", group_path);
5d360b
         return -errno;
5d360b
     }
5d360b
 
5d360b
@@ -3926,21 +3905,18 @@ static int vfio_initfn(PCIDevice *pdev)
5d360b
         return -ENOENT;
5d360b
     }
5d360b
 
5d360b
-    snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x",
5d360b
-            vdev->host.domain, vdev->host.bus, vdev->host.slot,
5d360b
-            vdev->host.function);
5d360b
-
5d360b
     QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
5d360b
         if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
5d360b
-            error_report("vfio: error: device %s is already attached", path);
5d360b
+            error_report("vfio: error: device %s is already attached",
5d360b
+                         vdev->vbasedev.name);
5d360b
             vfio_put_group(group);
5d360b
             return -EBUSY;
5d360b
         }
5d360b
     }
5d360b
 
5d360b
-    ret = vfio_get_device(group, path, vdev);
5d360b
+    ret = vfio_get_device(group, vdev->vbasedev.name, vdev);
5d360b
     if (ret) {
5d360b
-        error_report("vfio: failed to get device %s", path);
5d360b
+        error_report("vfio: failed to get device %s", vdev->vbasedev.name);
5d360b
         vfio_put_group(group);
5d360b
         return ret;
5d360b
     }
5d360b
@@ -4086,6 +4062,7 @@ post_reset:
5d360b
 
5d360b
 static Property vfio_pci_dev_properties[] = {
5d360b
     DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
5d360b
+    DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev),
5d360b
     DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
5d360b
                        intx.mmap_timeout, 1100),
5d360b
     DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
5d360b
-- 
5d360b
1.8.3.1
5d360b