2d1356
From 00d879937512b24db576a39fc94c78b427f8916d Mon Sep 17 00:00:00 2001
2d1356
Message-Id: <00d879937512b24db576a39fc94c78b427f8916d@dist-git>
2d1356
From: Martin Kletzander <mkletzan@redhat.com>
2d1356
Date: Mon, 23 May 2016 18:10:04 +0200
2d1356
Subject: [PATCH] nodedev: Expose PCI header type
2d1356
2d1356
RHEL-7.2.z: https://bugzilla.redhat.com/show_bug.cgi?id=1331328
2d1356
Upstream:   https://bugzilla.redhat.com/show_bug.cgi?id=1317531
2d1356
2d1356
If we expose this information, which is one byte in every PCI config
2d1356
file, we let all mgmt apps know whether the device itself is an endpoint
2d1356
or not so it's easier for them to decide whether such device can be
2d1356
passed through into a VM (endpoint) or not (*-bridge).
2d1356
2d1356
Signed-off-by: Martin Kletzander <mkletzan@redhat.com>
2d1356
(cherry picked from commit d77ffb6876e87a5c6f4c74c49cf0d89ade4f8326)
2d1356
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
2d1356
---
2d1356
 docs/formatnode.html.in                            | 36 ++++++++++++++--------
2d1356
 docs/schemas/nodedev.rng                           | 11 +++++++
2d1356
 src/conf/node_device_conf.c                        | 20 ++++++++++++
2d1356
 src/conf/node_device_conf.h                        |  1 +
2d1356
 src/libvirt_private.syms                           |  3 ++
2d1356
 src/node_device/node_device_udev.c                 |  3 ++
2d1356
 src/util/virpci.c                                  | 33 ++++++++++++++++++++
2d1356
 src/util/virpci.h                                  | 12 ++++++++
2d1356
 .../pci_0000_00_02_0_header_type.xml               | 15 +++++++++
2d1356
 .../pci_0000_00_1c_0_header_type.xml               | 20 ++++++++++++
2d1356
 tests/nodedevxml2xmltest.c                         |  2 ++
2d1356
 11 files changed, 144 insertions(+), 12 deletions(-)
2d1356
 create mode 100644 tests/nodedevschemadata/pci_0000_00_02_0_header_type.xml
2d1356
 create mode 100644 tests/nodedevschemadata/pci_0000_00_1c_0_header_type.xml
2d1356
2d1356
diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in
2d1356
index 3ff1bef..a14bcfd 100644
2d1356
--- a/docs/formatnode.html.in
2d1356
+++ b/docs/formatnode.html.in
2d1356
@@ -97,18 +97,30 @@
2d1356
               
2d1356
                 This optional element can occur multiple times. If it
2d1356
                 exists, it has a mandatory type attribute
2d1356
-                which will be set to
2d1356
-                either physical_function
2d1356
-                or virtual_functions. If the type
2d1356
-                is physical_function, there will be a
2d1356
-                single address subelement which contains
2d1356
-                the PCI address of the SRIOV Physical Function (PF)
2d1356
-                that is the parent of this device (and this device is,
2d1356
-                by implication, an SRIOV Virtual Function (VF)). If
2d1356
-                the type is virtual_functions, then this
2d1356
-                device is an SRIOV PF, and the capability element will
2d1356
-                have a list of address subelements, one
2d1356
-                for each VF on this PF.
2d1356
+                which will be set to:
2d1356
+                
2d1356
+                  
physical_function
2d1356
+                  
2d1356
+                    That means there will be a single address
2d1356
+                    subelement which contains the PCI address of the SRIOV
2d1356
+                    Physical Function (PF) that is the parent of this device
2d1356
+                    (and this device is, by implication, an SRIOV Virtual
2d1356
+                    Function (VF)).
2d1356
+                  
2d1356
+                  
virtual_function
2d1356
+                  
2d1356
+                    In this case this device is an SRIOV PF, and the capability
2d1356
+                    element will have a list of address
2d1356
+                    subelements, one for each VF on this PF.
2d1356
+                  
2d1356
+                  
pci-bridge or cardbus-bridge
2d1356
+                  
2d1356
+                    This shows merely that the lower 7 bits of PCI header type
2d1356
+                    have either value of 1 or 2 respectively.  Usually this
2d1356
+                    means such device cannot be used for PCI passthrough.
2d1356
+                    Since 1.3.3
2d1356
+                  
2d1356
+                
2d1356
               
2d1356
               
numa
2d1356
               
2d1356
diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng
2d1356
index 744dccd..949811c 100644
2d1356
--- a/docs/schemas/nodedev.rng
2d1356
+++ b/docs/schemas/nodedev.rng
2d1356
@@ -169,6 +169,17 @@
2d1356
     </optional>
2d1356
 
2d1356
     <optional>
2d1356
+      <element name='capability'>
2d1356
+        <attribute name='type'>
2d1356
+          <choice>
2d1356
+            <value>pci-bridge</value>
2d1356
+            <value>cardbus-bridge</value>
2d1356
+          </choice>
2d1356
+        </attribute>
2d1356
+      </element>
2d1356
+    </optional>
2d1356
+
2d1356
+    <optional>
2d1356
       <element name='pci-express'>
2d1356
         <zeroOrMore>
2d1356
           <element name='link'>
2d1356
diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c
2d1356
index e6f3f27..feefb9a 100644
2d1356
--- a/src/conf/node_device_conf.c
2d1356
+++ b/src/conf/node_device_conf.c
2d1356
@@ -394,6 +394,12 @@ char *virNodeDeviceDefFormat(const virNodeDeviceDef *def)
2d1356
             if (data->pci_dev.numa_node >= 0)
2d1356
                 virBufferAsprintf(&buf, "<numa node='%d'/>\n",
2d1356
                                   data->pci_dev.numa_node);
2d1356
+
2d1356
+            if (data->pci_dev.hdrType) {
2d1356
+                virBufferAsprintf(&buf, "<capability type='%s'/>\n",
2d1356
+                                  virPCIHeaderTypeToString(data->pci_dev.hdrType));
2d1356
+            }
2d1356
+
2d1356
             if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCIE)
2d1356
                 virPCIEDeviceInfoFormat(&buf, data->pci_dev.pci_express);
2d1356
             break;
2d1356
@@ -1264,6 +1270,7 @@ virNodeDevCapPCIDevParseXML(xmlXPathContextPtr ctxt,
2d1356
     xmlNodePtr orignode, iommuGroupNode, pciExpress;
2d1356
     int ret = -1;
2d1356
     virPCIEDeviceInfoPtr pci_express = NULL;
2d1356
+    char *tmp = NULL;
2d1356
 
2d1356
     orignode = ctxt->node;
2d1356
     ctxt->node = node;
2d1356
@@ -1321,6 +1328,18 @@ virNodeDevCapPCIDevParseXML(xmlXPathContextPtr ctxt,
2d1356
                                           _("invalid NUMA node ID supplied for '%s'")) < 0)
2d1356
         goto out;
2d1356
 
2d1356
+    if ((tmp = virXPathString("string(./capability[1]/@type)", ctxt))) {
2d1356
+        int hdrType = virPCIHeaderTypeFromString(tmp);
2d1356
+
2d1356
+        if (hdrType <= 0) {
2d1356
+            virReportError(VIR_ERR_INTERNAL_ERROR,
2d1356
+                           _("Unknown PCI header type '%s'"), tmp);
2d1356
+            goto out;
2d1356
+        }
2d1356
+
2d1356
+        data->pci_dev.hdrType = hdrType;
2d1356
+    }
2d1356
+
2d1356
     if ((pciExpress = virXPathNode("./pci-express[1]", ctxt))) {
2d1356
         if (VIR_ALLOC(pci_express) < 0)
2d1356
             goto out;
2d1356
@@ -1335,6 +1354,7 @@ virNodeDevCapPCIDevParseXML(xmlXPathContextPtr ctxt,
2d1356
 
2d1356
     ret = 0;
2d1356
  out:
2d1356
+    VIR_FREE(tmp);
2d1356
     virPCIEDeviceInfoFree(pci_express);
2d1356
     ctxt->node = orignode;
2d1356
     return ret;
2d1356
diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h
2d1356
index 7dd39ca..73d2a7f 100644
2d1356
--- a/src/conf/node_device_conf.h
2d1356
+++ b/src/conf/node_device_conf.h
2d1356
@@ -118,6 +118,7 @@ typedef struct _virNodeDevCapData {
2d1356
             unsigned int iommuGroupNumber;
2d1356
             int numa_node;
2d1356
             virPCIEDeviceInfoPtr pci_express;
2d1356
+            int hdrType; /* enum virPCIHeaderType or -1 */
2d1356
         } pci_dev;
2d1356
         struct {
2d1356
             unsigned int bus;
2d1356
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
2d1356
index 86909c1..0db30be 100644
2d1356
--- a/src/libvirt_private.syms
2d1356
+++ b/src/libvirt_private.syms
2d1356
@@ -1976,11 +1976,14 @@ virPCIDeviceSetUsedBy;
2d1356
 virPCIDeviceUnbind;
2d1356
 virPCIDeviceWaitForCleanup;
2d1356
 virPCIEDeviceInfoFree;
2d1356
+virPCIGetHeaderType;
2d1356
 virPCIGetNetName;
2d1356
 virPCIGetPhysicalFunction;
2d1356
 virPCIGetVirtualFunctionIndex;
2d1356
 virPCIGetVirtualFunctionInfo;
2d1356
 virPCIGetVirtualFunctions;
2d1356
+virPCIHeaderTypeFromString;
2d1356
+virPCIHeaderTypeToString;
2d1356
 virPCIIsVirtualFunction;
2d1356
 
2d1356
 
2d1356
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
2d1356
index aaee0e5..6bff5ba 100644
2d1356
--- a/src/node_device/node_device_udev.c
2d1356
+++ b/src/node_device/node_device_udev.c
2d1356
@@ -506,6 +506,9 @@ static int udevProcessPCI(struct udev_device *device,
2d1356
 
2d1356
     /* We need to be root to read PCI device configs */
2d1356
     if (priv->privileged) {
2d1356
+        if (virPCIGetHeaderType(pciDev, &data->pci_dev.hdrType) < 0)
2d1356
+            goto out;
2d1356
+
2d1356
         if (virPCIDeviceIsPCIExpress(pciDev) > 0) {
2d1356
             if (VIR_ALLOC(pci_express) < 0)
2d1356
                 goto out;
2d1356
diff --git a/src/util/virpci.c b/src/util/virpci.c
2d1356
index 35b1459..d98db3f 100644
2d1356
--- a/src/util/virpci.c
2d1356
+++ b/src/util/virpci.c
2d1356
@@ -55,6 +55,12 @@ VIR_LOG_INIT("util.pci");
2d1356
 VIR_ENUM_IMPL(virPCIELinkSpeed, VIR_PCIE_LINK_SPEED_LAST,
2d1356
               "", "2.5", "5", "8")
2d1356
 
2d1356
+VIR_ENUM_IMPL(virPCIHeader, VIR_PCI_HEADER_LAST,
2d1356
+              "endpoint",
2d1356
+              "pci-bridge",
2d1356
+              "cardbus-bridge",
2d1356
+);
2d1356
+
2d1356
 struct _virPCIDevice {
2d1356
     unsigned int  domain;
2d1356
     unsigned int  bus;
2d1356
@@ -2872,6 +2878,33 @@ virPCIDeviceGetLinkCapSta(virPCIDevicePtr dev,
2d1356
 }
2d1356
 
2d1356
 
2d1356
+int virPCIGetHeaderType(virPCIDevicePtr dev, int *hdrType)
2d1356
+{
2d1356
+    int fd;
2d1356
+    uint8_t type;
2d1356
+
2d1356
+    *hdrType = -1;
2d1356
+
2d1356
+    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
2d1356
+        return -1;
2d1356
+
2d1356
+    type = virPCIDeviceRead8(dev, fd, PCI_HEADER_TYPE);
2d1356
+
2d1356
+    virPCIDeviceConfigClose(dev, fd);
2d1356
+
2d1356
+    type &= PCI_HEADER_TYPE_MASK;
2d1356
+    if (type >= VIR_PCI_HEADER_LAST) {
2d1356
+        virReportError(VIR_ERR_INTERNAL_ERROR,
2d1356
+                       _("Unknown PCI header type '%d'"), type);
2d1356
+        return -1;
2d1356
+    }
2d1356
+
2d1356
+    *hdrType = type;
2d1356
+
2d1356
+    return 0;
2d1356
+}
2d1356
+
2d1356
+
2d1356
 void
2d1356
 virPCIEDeviceInfoFree(virPCIEDeviceInfoPtr dev)
2d1356
 {
2d1356
diff --git a/src/util/virpci.h b/src/util/virpci.h
2d1356
index 64b9e96..1ed59dd 100644
2d1356
--- a/src/util/virpci.h
2d1356
+++ b/src/util/virpci.h
2d1356
@@ -52,6 +52,16 @@ typedef enum {
2d1356
 
2d1356
 VIR_ENUM_DECL(virPCIELinkSpeed)
2d1356
 
2d1356
+typedef enum {
2d1356
+    VIR_PCI_HEADER_ENDPOINT = 0,
2d1356
+    VIR_PCI_HEADER_PCI_BRIDGE,
2d1356
+    VIR_PCI_HEADER_CARDBUS_BRIDGE,
2d1356
+
2d1356
+    VIR_PCI_HEADER_LAST
2d1356
+} virPCIHeaderType;
2d1356
+
2d1356
+VIR_ENUM_DECL(virPCIHeader)
2d1356
+
2d1356
 typedef struct _virPCIELink virPCIELink;
2d1356
 typedef virPCIELink *virPCIELinkPtr;
2d1356
 struct _virPCIELink {
2d1356
@@ -214,6 +224,8 @@ int virPCIDeviceGetLinkCapSta(virPCIDevicePtr dev,
2d1356
                               unsigned int *sta_speed,
2d1356
                               unsigned int *sta_width);
2d1356
 
2d1356
+int virPCIGetHeaderType(virPCIDevicePtr dev, int *hdrType);
2d1356
+
2d1356
 void virPCIEDeviceInfoFree(virPCIEDeviceInfoPtr dev);
2d1356
 
2d1356
 #endif /* __VIR_PCI_H__ */
2d1356
diff --git a/tests/nodedevschemadata/pci_0000_00_02_0_header_type.xml b/tests/nodedevschemadata/pci_0000_00_02_0_header_type.xml
2d1356
new file mode 100644
2d1356
index 0000000..5150fd1
2d1356
--- /dev/null
2d1356
+++ b/tests/nodedevschemadata/pci_0000_00_02_0_header_type.xml
2d1356
@@ -0,0 +1,15 @@
2d1356
+<device>
2d1356
+  <name>pci_0000_00_02_0</name>
2d1356
+  <parent>computer</parent>
2d1356
+  <capability type='pci'>
2d1356
+    <domain>0</domain>
2d1356
+    <bus>0</bus>
2d1356
+    <slot>2</slot>
2d1356
+    <function>0</function>
2d1356
+    <product id='0x0416'>4th Gen Core Processor Integrated Graphics Controller</product>
2d1356
+    <vendor id='0x8086'>Intel Corporation</vendor>
2d1356
+    <iommuGroup number='1'>
2d1356
+      <address domain='0x0000' bus='0x00' slot='0x02' function='0x0'/>
2d1356
+    </iommuGroup>
2d1356
+  </capability>
2d1356
+</device>
2d1356
diff --git a/tests/nodedevschemadata/pci_0000_00_1c_0_header_type.xml b/tests/nodedevschemadata/pci_0000_00_1c_0_header_type.xml
2d1356
new file mode 100644
2d1356
index 0000000..dea5f05
2d1356
--- /dev/null
2d1356
+++ b/tests/nodedevschemadata/pci_0000_00_1c_0_header_type.xml
2d1356
@@ -0,0 +1,20 @@
2d1356
+<device>
2d1356
+  <name>pci_0000_00_1c_0</name>
2d1356
+  <parent>computer</parent>
2d1356
+  <capability type='pci'>
2d1356
+    <domain>0</domain>
2d1356
+    <bus>0</bus>
2d1356
+    <slot>28</slot>
2d1356
+    <function>0</function>
2d1356
+    <product id='0x8c10'>8 Series/C220 Series Chipset Family PCI Express Root Port #1</product>
2d1356
+    <vendor id='0x8086'>Intel Corporation</vendor>
2d1356
+    <iommuGroup number='8'>
2d1356
+      <address domain='0x0000' bus='0x00' slot='0x1c' function='0x0'/>
2d1356
+    </iommuGroup>
2d1356
+    <capability type='pci-bridge'/>
2d1356
+    <pci-express>
2d1356
+      <link validity='cap' port='1' speed='5' width='1'/>
2d1356
+      <link validity='sta' speed='2.5' width='1'/>
2d1356
+    </pci-express>
2d1356
+  </capability>
2d1356
+</device>
2d1356
diff --git a/tests/nodedevxml2xmltest.c b/tests/nodedevxml2xmltest.c
2d1356
index a37d729..d2cd444 100644
2d1356
--- a/tests/nodedevxml2xmltest.c
2d1356
+++ b/tests/nodedevxml2xmltest.c
2d1356
@@ -91,6 +91,8 @@ mymain(void)
2d1356
     DO_TEST("usb_device_1d6b_1_0000_00_1d_0");
2d1356
     DO_TEST("pci_8086_4238_pcie_wireless");
2d1356
     DO_TEST("pci_8086_0c0c_snd_hda_intel");
2d1356
+    DO_TEST("pci_0000_00_02_0_header_type");
2d1356
+    DO_TEST("pci_0000_00_1c_0_header_type");
2d1356
 
2d1356
     return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
2d1356
 }
2d1356
-- 
2d1356
2.8.3
2d1356