Blob Blame History Raw
From 909d3ff4c0e4ab7486c60dade9900462c896d3ba Mon Sep 17 00:00:00 2001
Message-Id: <909d3ff4c0e4ab7486c60dade9900462c896d3ba@dist-git>
From: Erik Skultety <eskultet@redhat.com>
Date: Thu, 18 May 2017 14:02:53 +0200
Subject: [PATCH] nodedev: Introduce the mdev capability to a PCI parent device

The parent device needs to report the generic stuff about the supported
mediated devices types, like device API, available instances, type name,
etc. Therefore this patch introduces a new nested capability element of
type 'mdev_types' with the resulting XML of the following format:

<device>
  ...
  <capability type='pci'>
    ...
    <capability type='mdev_types'>
      <type id='vendor_supplied_id'>
        <name>optional_vendor_supplied_codename</name>
        <deviceAPI>vfio-pci</deviceAPI>
        <availableInstances>NUM</availableInstances>
      </type>
        ...
      <type>
        ...
      </type>
    </capability>
  </capability>
  ...
</device>

https://bugzilla.redhat.com/show_bug.cgi?id=1452072

Signed-off-by: Erik Skultety <eskultet@redhat.com>
(cherry picked from commit 500cbc066a5362834462c4eefb260b7c96a8554f)
Signed-off-by: Erik Skultety <eskultet@redhat.com>
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
---
 docs/schemas/nodedev.rng                           |  26 +++++
 src/conf/node_device_conf.c                        | 101 +++++++++++++++++
 src/conf/node_device_conf.h                        |  15 +++
 src/conf/virnodedeviceobj.c                        |  20 +++-
 src/libvirt_private.syms                           |   1 +
 src/node_device/node_device_udev.c                 | 119 +++++++++++++++++++++
 .../pci_0000_02_10_7_mdev_types.xml                |  32 ++++++
 tests/nodedevxml2xmltest.c                         |   1 +
 8 files changed, 313 insertions(+), 2 deletions(-)
 create mode 100644 tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml

diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng
index 0f90a73c8..e0a2c5032 100644
--- a/docs/schemas/nodedev.rng
+++ b/docs/schemas/nodedev.rng
@@ -205,6 +205,32 @@
     </optional>
 
     <optional>
+      <element name='capability'>
+        <attribute name='type'>
+          <value>mdev_types</value>
+        </attribute>
+        <oneOrMore>
+          <element name='type'>
+            <attribute name='id'>
+              <data type='string'/>
+            </attribute>
+            <optional>
+              <element name='name'><text/></element>
+            </optional>
+            <element name='deviceAPI'>
+              <choice>
+                <value>vfio-pci</value>
+              </choice>
+            </element>
+            <element name='availableInstances'>
+              <ref name='unsignedInt'/>
+            </element>
+          </element>
+        </oneOrMore>
+      </element>
+   </optional>
+
+    <optional>
       <element name='iommuGroup'>
         <attribute name='number'>
           <ref name='unsignedInt'/>
diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c
index 90a087a37..de8ba8f9d 100644
--- a/src/conf/node_device_conf.c
+++ b/src/conf/node_device_conf.c
@@ -89,6 +89,19 @@ virNodeDevCapsDefParseString(const char *xpath,
 
 
 void
+virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type)
+{
+    if (!type)
+        return;
+
+    VIR_FREE(type->id);
+    VIR_FREE(type->name);
+    VIR_FREE(type->device_api);
+    VIR_FREE(type);
+}
+
+
+void
 virNodeDeviceDefFree(virNodeDeviceDefPtr def)
 {
     virNodeDevCapsDefPtr caps;
@@ -265,6 +278,27 @@ virNodeDeviceCapPCIDefFormat(virBufferPtr buf,
         virBufferAsprintf(buf, "<capability type='%s'/>\n",
                           virPCIHeaderTypeToString(data->pci_dev.hdrType));
     }
+    if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_MDEV) {
+        virBufferAddLit(buf, "<capability type='mdev_types'>\n");
+        virBufferAdjustIndent(buf, 2);
+        for (i = 0; i < data->pci_dev.nmdev_types; i++) {
+            virNodeDevCapMdevTypePtr type = data->pci_dev.mdev_types[i];
+            virBufferEscapeString(buf, "<type id='%s'>\n", type->id);
+            virBufferAdjustIndent(buf, 2);
+            if (type->name)
+                virBufferEscapeString(buf, "<name>%s</name>\n",
+                                      type->name);
+            virBufferEscapeString(buf, "<deviceAPI>%s</deviceAPI>\n",
+                                  type->device_api);
+            virBufferAsprintf(buf,
+                              "<availableInstances>%u</availableInstances>\n",
+                              type->available_instances);
+            virBufferAdjustIndent(buf, -2);
+            virBufferAddLit(buf, "</type>\n");
+        }
+        virBufferAdjustIndent(buf, -2);
+        virBufferAddLit(buf, "</capability>\n");
+    }
     if (data->pci_dev.nIommuGroupDevices) {
         virBufferAsprintf(buf, "<iommuGroup number='%d'>\n",
                           data->pci_dev.iommuGroupNumber);
@@ -1365,6 +1399,67 @@ virNodeDevPCICapSRIOVVirtualParseXML(xmlXPathContextPtr ctxt,
 
 
 static int
+virNodeDevPCICapMdevTypesParseXML(xmlXPathContextPtr ctxt,
+                                  virNodeDevCapPCIDevPtr pci_dev)
+{
+    int ret = -1;
+    xmlNodePtr orignode = NULL;
+    xmlNodePtr *nodes = NULL;
+    int nmdev_types = -1;
+    virNodeDevCapMdevTypePtr type = NULL;
+    size_t i;
+
+    if ((nmdev_types = virXPathNodeSet("./type", ctxt, &nodes)) < 0)
+        goto cleanup;
+
+    orignode = ctxt->node;
+    for (i = 0; i < nmdev_types; i++) {
+        ctxt->node = nodes[i];
+
+        if (VIR_ALLOC(type) < 0)
+            goto cleanup;
+
+        if (!(type->id = virXPathString("string(./@id[1])", ctxt))) {
+            virReportError(VIR_ERR_XML_ERROR, "%s",
+                           _("missing 'id' attribute for mediated device's "
+                             "<type> element"));
+            goto cleanup;
+        }
+
+        if (!(type->device_api = virXPathString("string(./deviceAPI[1])", ctxt))) {
+            virReportError(VIR_ERR_XML_ERROR,
+                           _("missing device API for mediated device type '%s'"),
+                           type->id);
+            goto cleanup;
+        }
+
+        if (virXPathUInt("number(./availableInstances)", ctxt,
+                         &type->available_instances) < 0) {
+            virReportError(VIR_ERR_XML_ERROR,
+                           _("missing number of available instances for "
+                             "mediated device type '%s'"),
+                           type->id);
+            goto cleanup;
+        }
+
+        type->name = virXPathString("string(./name)", ctxt);
+
+        if (VIR_APPEND_ELEMENT(pci_dev->mdev_types,
+                               pci_dev->nmdev_types, type) < 0)
+            goto cleanup;
+    }
+
+    pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV;
+    ret = 0;
+ cleanup:
+    VIR_FREE(nodes);
+    virNodeDevCapMdevTypeFree(type);
+    ctxt->node = orignode;
+    return ret;
+}
+
+
+static int
 virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
                                 xmlNodePtr node,
                                 virNodeDevCapPCIDevPtr pci_dev)
@@ -1386,6 +1481,9 @@ virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
     } else if (STREQ(type, "virt_functions") &&
                virNodeDevPCICapSRIOVVirtualParseXML(ctxt, pci_dev) < 0) {
         goto cleanup;
+    } else if (STREQ(type, "mdev_types") &&
+        virNodeDevPCICapMdevTypesParseXML(ctxt, pci_dev) < 0) {
+        goto cleanup;
     } else {
         int hdrType = virPCIHeaderTypeFromString(type);
 
@@ -1898,6 +1996,9 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps)
             VIR_FREE(data->pci_dev.iommuGroupDevices[i]);
         VIR_FREE(data->pci_dev.iommuGroupDevices);
         virPCIEDeviceInfoFree(data->pci_dev.pci_express);
+        for (i = 0; i < data->pci_dev.nmdev_types; i++)
+            virNodeDevCapMdevTypeFree(data->pci_dev.mdev_types[i]);
+        VIR_FREE(data->pci_dev.mdev_types);
         break;
     case VIR_NODE_DEV_CAP_USB_DEV:
         VIR_FREE(data->usb_dev.product_name);
diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h
index e168f2e27..18aaff8b5 100644
--- a/src/conf/node_device_conf.h
+++ b/src/conf/node_device_conf.h
@@ -95,6 +95,7 @@ typedef enum {
     VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION     = (1 << 0),
     VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION      = (1 << 1),
     VIR_NODE_DEV_CAP_FLAG_PCIE                      = (1 << 2),
+    VIR_NODE_DEV_CAP_FLAG_PCI_MDEV                  = (1 << 3),
 } virNodeDevPCICapFlags;
 
 typedef enum {
@@ -133,6 +134,15 @@ struct _virNodeDevCapSystem {
     virNodeDevCapSystemFirmware firmware;
 };
 
+typedef struct _virNodeDevCapMdevType virNodeDevCapMdevType;
+typedef virNodeDevCapMdevType *virNodeDevCapMdevTypePtr;
+struct _virNodeDevCapMdevType {
+    char *id;
+    char *name;
+    char *device_api;
+    unsigned int available_instances;
+};
+
 typedef struct _virNodeDevCapPCIDev virNodeDevCapPCIDev;
 typedef virNodeDevCapPCIDev *virNodeDevCapPCIDevPtr;
 struct _virNodeDevCapPCIDev {
@@ -156,6 +166,8 @@ struct _virNodeDevCapPCIDev {
     int numa_node;
     virPCIEDeviceInfoPtr pci_express;
     int hdrType; /* enum virPCIHeaderType or -1 */
+    virNodeDevCapMdevTypePtr *mdev_types;
+    size_t nmdev_types;
 };
 
 typedef struct _virNodeDevCapUSBDev virNodeDevCapUSBDev;
@@ -340,6 +352,9 @@ virNodeDeviceDefFree(virNodeDeviceDefPtr def);
 void
 virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps);
 
+void
+virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type);
+
 # define VIR_CONNECT_LIST_NODE_DEVICES_FILTERS_CAP \
                 (VIR_CONNECT_LIST_NODE_DEVICES_CAP_SYSTEM        | \
                  VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV       | \
diff --git a/src/conf/virnodedeviceobj.c b/src/conf/virnodedeviceobj.c
index 21d5d3f75..ac25fb598 100644
--- a/src/conf/virnodedeviceobj.c
+++ b/src/conf/virnodedeviceobj.c
@@ -42,11 +42,13 @@ virNodeDeviceObjHasCap(const virNodeDeviceObj *dev,
         virNodeDevCapTypeToString(VIR_NODE_DEV_CAP_FC_HOST);
     const char *vports_cap =
         virNodeDevCapTypeToString(VIR_NODE_DEV_CAP_VPORTS);
+    const char *mdev_types =
+        virNodeDevCapTypeToString(VIR_NODE_DEV_CAP_MDEV_TYPES);
 
     while (caps) {
-        if (STREQ(cap, virNodeDevCapTypeToString(caps->data.type)))
+        if (STREQ(cap, virNodeDevCapTypeToString(caps->data.type))) {
             return 1;
-        else if (caps->data.type == VIR_NODE_DEV_CAP_SCSI_HOST)
+        } else if (caps->data.type == VIR_NODE_DEV_CAP_SCSI_HOST) {
             if ((STREQ(cap, fc_host_cap) &&
                 (caps->data.scsi_host.flags &
                  VIR_NODE_DEV_CAP_FLAG_HBA_FC_HOST)) ||
@@ -54,6 +56,13 @@ virNodeDeviceObjHasCap(const virNodeDeviceObj *dev,
                 (caps->data.scsi_host.flags &
                  VIR_NODE_DEV_CAP_FLAG_HBA_VPORT_OPS)))
                 return 1;
+        } else if (caps->data.type == VIR_NODE_DEV_CAP_PCI_DEV) {
+            if ((STREQ(cap, mdev_types)) &&
+                (caps->data.pci_dev.flags &
+                 VIR_NODE_DEV_CAP_FLAG_PCI_MDEV))
+                return 1;
+        }
+
         caps = caps->next;
     }
     return 0;
@@ -468,6 +477,13 @@ virNodeDeviceCapMatch(virNodeDeviceObjPtr devobj,
                  VIR_NODE_DEV_CAP_FLAG_HBA_VPORT_OPS))
                 return true;
         }
+
+        if (cap->data.type == VIR_NODE_DEV_CAP_PCI_DEV) {
+            if (type == VIR_NODE_DEV_CAP_MDEV_TYPES &&
+                (cap->data.pci_dev.flags &
+                 VIR_NODE_DEV_CAP_FLAG_PCI_MDEV))
+                return true;
+        }
     }
 
     return false;
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 7e1a06db3..343966cd0 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -692,6 +692,7 @@ virNetDevIPRouteParseXML;
 
 
 # conf/node_device_conf.h
+virNodeDevCapMdevTypeFree;
 virNodeDevCapsDefFree;
 virNodeDevCapTypeFromString;
 virNodeDevCapTypeToString;
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
index d4489e2a5..b89099c82 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -314,6 +314,119 @@ static int udevTranslatePCIIds(unsigned int vendor,
 }
 
 
+static int
+udevFillMdevType(struct udev_device *device,
+                 const char *dir,
+                 virNodeDevCapMdevTypePtr type)
+{
+    int ret = -1;
+    char *attrpath = NULL;
+
+#define MDEV_GET_SYSFS_ATTR(attr_name, cb, ...)                             \
+    do {                                                                    \
+        if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0)           \
+            goto cleanup;                                                   \
+                                                                            \
+        if (cb(device, attrpath, __VA_ARGS__) < 0)                          \
+            goto cleanup;                                                   \
+                                                                            \
+        VIR_FREE(attrpath);                                                 \
+    } while (0)                                                             \
+
+    if (VIR_STRDUP(type->id, last_component(dir)) < 0)
+        goto cleanup;
+
+    /* query udev for the attributes under subdirectories using the relative
+     * path stored in @dir, i.e. 'mdev_supported_types/<type_id>'
+     */
+    MDEV_GET_SYSFS_ATTR(name, udevGetStringSysfsAttr, &type->name);
+    MDEV_GET_SYSFS_ATTR(device_api, udevGetStringSysfsAttr, &type->device_api);
+    MDEV_GET_SYSFS_ATTR(available_instances, udevGetUintSysfsAttr,
+                        &type->available_instances, 10);
+
+#undef MDEV_GET_SYSFS_ATTR
+
+    ret = 0;
+ cleanup:
+    VIR_FREE(attrpath);
+    return ret;
+}
+
+
+static int
+udevPCIGetMdevTypesCap(struct udev_device *device,
+                       virNodeDevCapPCIDevPtr pcidata)
+{
+    int ret = -1;
+    int dirret = -1;
+    DIR *dir = NULL;
+    struct dirent *entry;
+    char *path = NULL;
+    char *tmppath = NULL;
+    virNodeDevCapMdevTypePtr type = NULL;
+    virNodeDevCapMdevTypePtr *types = NULL;
+    size_t ntypes = 0;
+    size_t i;
+
+    if (virAsprintf(&path, "%s/mdev_supported_types",
+                    udev_device_get_syspath(device)) < 0)
+        return -1;
+
+    if ((dirret = virDirOpenIfExists(&dir, path)) < 0)
+        goto cleanup;
+
+    if (dirret == 0) {
+        ret = 0;
+        goto cleanup;
+    }
+
+    if (VIR_ALLOC(types) < 0)
+        goto cleanup;
+
+    /* UDEV doesn't report attributes under subdirectories by default but is
+     * able to query them if the path to the attribute is relative to the
+     * device's base path, e.g. /sys/devices/../0000:00:01.0/ is the device's
+     * base path as udev reports it, but we're interested in attributes under
+     * /sys/devices/../0000:00:01.0/mdev_supported_types/<type>/. So, we need to
+     * scan the subdirectories ourselves.
+     */
+    while ((dirret = virDirRead(dir, &entry, path)) > 0) {
+        if (VIR_ALLOC(type) < 0)
+            goto cleanup;
+
+        /* construct the relative mdev type path bit for udev */
+        if (virAsprintf(&tmppath, "mdev_supported_types/%s", entry->d_name) < 0)
+            goto cleanup;
+
+        if (udevFillMdevType(device, tmppath, type) < 0)
+            goto cleanup;
+
+        if (VIR_APPEND_ELEMENT(types, ntypes, type) < 0)
+            goto cleanup;
+
+        VIR_FREE(tmppath);
+    }
+
+    if (dirret < 0)
+        goto cleanup;
+
+    VIR_STEAL_PTR(pcidata->mdev_types, types);
+    pcidata->nmdev_types = ntypes;
+    pcidata->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV;
+    ntypes = 0;
+    ret = 0;
+ cleanup:
+    virNodeDevCapMdevTypeFree(type);
+    for (i = 0; i < ntypes; i++)
+        virNodeDevCapMdevTypeFree(types[i]);
+    VIR_FREE(types);
+    VIR_FREE(path);
+    VIR_FREE(tmppath);
+    VIR_DIR_CLOSE(dir);
+    return ret;
+}
+
+
 static int udevProcessPCI(struct udev_device *device,
                           virNodeDeviceDefPtr def)
 {
@@ -404,6 +517,12 @@ static int udevProcessPCI(struct udev_device *device,
         }
     }
 
+    /* check whether the device is mediated devices framework capable, if so,
+     * process it
+     */
+    if (udevPCIGetMdevTypesCap(device, pci_dev) < 0)
+        goto cleanup;
+
     ret = 0;
 
  cleanup:
diff --git a/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml
new file mode 100644
index 000000000..a2d57569a
--- /dev/null
+++ b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml
@@ -0,0 +1,32 @@
+<device>
+  <name>pci_0000_02_10_7</name>
+  <parent>pci_0000_00_04_0</parent>
+  <capability type='pci'>
+    <domain>0</domain>
+    <bus>2</bus>
+    <slot>16</slot>
+    <function>7</function>
+    <product id='0x10ca'>82576 Virtual Function</product>
+    <vendor id='0x8086'>Intel Corporation</vendor>
+    <capability type='mdev_types'>
+      <type id='foo1'>
+        <name>bar1</name>
+        <deviceAPI>vfio-pci</deviceAPI>
+        <availableInstances>1</availableInstances>
+      </type>
+      <type id='foo2'>
+        <name>bar2</name>
+        <deviceAPI>vfio-pci</deviceAPI>
+        <availableInstances>2</availableInstances>
+      </type>
+    </capability>
+    <iommuGroup number='31'>
+      <address domain='0x0000' bus='0x02' slot='0x10' function='0x7'/>
+    </iommuGroup>
+    <numa node='0'/>
+    <pci-express>
+      <link validity='cap' port='0' speed='2.5' width='4'/>
+      <link validity='sta' width='0'/>
+    </pci-express>
+  </capability>
+</device>
diff --git a/tests/nodedevxml2xmltest.c b/tests/nodedevxml2xmltest.c
index 5e1ae170c..eb5c50b86 100644
--- a/tests/nodedevxml2xmltest.c
+++ b/tests/nodedevxml2xmltest.c
@@ -101,6 +101,7 @@ mymain(void)
     DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all");
     DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all_header_type");
     DO_TEST("drm_renderD129");
+    DO_TEST("pci_0000_02_10_7_mdev_types");
 
     return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
 }
-- 
2.13.0