|
|
397dc2 |
From cfe170216accf60938ff4ea9440a4ac78b0bd83f Mon Sep 17 00:00:00 2001
|
|
|
397dc2 |
Message-Id: <cfe170216accf60938ff4ea9440a4ac78b0bd83f@dist-git>
|
|
|
397dc2 |
From: Dmytro Linkin <dlinkin@nvidia.com>
|
|
|
397dc2 |
Date: Thu, 28 Jan 2021 23:17:29 -0500
|
|
|
397dc2 |
Subject: [PATCH] util: Add phys_port_name support on virPCIGetNetName
|
|
|
397dc2 |
|
|
|
397dc2 |
virPCIGetNetName is used to get the name of the netdev associated with
|
|
|
397dc2 |
a particular PCI device. This is used when we have a VF name, but need
|
|
|
397dc2 |
the PF name in order to send a netlink command (e.g. in order to
|
|
|
397dc2 |
get/set the MAC address of the VF).
|
|
|
397dc2 |
|
|
|
397dc2 |
In simple cases there is a single netdev associated with any PCI
|
|
|
397dc2 |
device, so it is easy to figure out the PF netdev for a VF - just look
|
|
|
397dc2 |
for the PCI device that has the VF listed in its "virtfns" directory;
|
|
|
397dc2 |
the only name in the "net" subdirectory of that PCI device's sysfs
|
|
|
397dc2 |
directory is the PF netdev that is upstream of the VF in question.
|
|
|
397dc2 |
|
|
|
397dc2 |
In some cases there can be more than one netdev in a PCI device's net
|
|
|
397dc2 |
directory though. In the past, the only case of this was for SR-IOV
|
|
|
397dc2 |
NICs that could have multiple PF's per PCI device. In this case, all
|
|
|
397dc2 |
PF netdevs associated with a PCI address would be listed in the "net"
|
|
|
397dc2 |
subdirectory of the PCI device's directory in sysfs. At the same time,
|
|
|
397dc2 |
all VF netdevs and all PF netdevs have a phys_port_id in their sysfs,
|
|
|
397dc2 |
so the way to learn the correct PF netdev for a particular VF netdev
|
|
|
397dc2 |
is to search through the list of devices in the net subdirectory of
|
|
|
397dc2 |
the PF's PCI device, looking for the one netdev with a "phys_port_id"
|
|
|
397dc2 |
matching that of the VF netdev.
|
|
|
397dc2 |
|
|
|
397dc2 |
But starting in kernel 5.8, the NVIDIA Mellanox driver began linking
|
|
|
397dc2 |
the VFs' representor netdevs to the PF PCI address [1], and so the VF
|
|
|
397dc2 |
representor netdevs would also show up in the net
|
|
|
397dc2 |
subdirectory. However, all of the devices that do so also only have a
|
|
|
397dc2 |
single PF netdev for any given PCI address.
|
|
|
397dc2 |
|
|
|
397dc2 |
This means that the net directory of the PCI device can still hold
|
|
|
397dc2 |
multiple net devices, but only one of them will be the PF netdev (the
|
|
|
397dc2 |
others are VF representors):
|
|
|
397dc2 |
|
|
|
397dc2 |
$ ls '/sys/bus/pci/devices/0000:82:00.0/net'
|
|
|
397dc2 |
ens1f0 eth0 eth1
|
|
|
397dc2 |
|
|
|
397dc2 |
In this case the way to find the PF device is to look at the
|
|
|
397dc2 |
"phys_port_name" attribute of each netdev in sysfs. All PF devices
|
|
|
397dc2 |
have a phys_port_name matching a particular regex
|
|
|
397dc2 |
|
|
|
397dc2 |
(p[0-9]+$)|(p[0-9]+s[0-9]+$)
|
|
|
397dc2 |
|
|
|
397dc2 |
Since there can only be one PF in the entire list of devices, once we
|
|
|
397dc2 |
match that regex, we've found the PF netdev.
|
|
|
397dc2 |
|
|
|
397dc2 |
[1] - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/
|
|
|
397dc2 |
commit/?id=123f0f53dd64b67e34142485fe866a8a581f12f1
|
|
|
397dc2 |
|
|
|
397dc2 |
Resolves: https://bugzilla.redhat.com/1918708
|
|
|
397dc2 |
Co-Authored-by: Moshe Levi <moshele@nvidia.com>
|
|
|
397dc2 |
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
|
|
|
397dc2 |
Reviewed-by: Adrian Chiris <adrianc@nvidia.com>
|
|
|
397dc2 |
Reviewed-by: Laine Stump <laine@redhat.com>
|
|
|
397dc2 |
(cherry picked from commit 5b1c525b1f3608156884aed0dc5e925306c1e260)
|
|
|
397dc2 |
|
|
|
397dc2 |
Conflicts: src/util/virpci.c - upstream all DIR* were converted to use
|
|
|
397dc2 |
g_autoptr, which permitted virPCIGetNetName() to be
|
|
|
397dc2 |
simplified. Unfortunately, backporting this refactor would require
|
|
|
397dc2 |
backporting an ever-ballooning set of patches, making the
|
|
|
397dc2 |
possibility of causing a regression a very real danger. Instead,
|
|
|
397dc2 |
one small refactor of virPCIGetName() that didn't affect any other
|
|
|
397dc2 |
functions was backported, and this patch (adding phys_port_name
|
|
|
397dc2 |
support) resolved the remaining conflicts by mimicking the current
|
|
|
397dc2 |
upstream version of the function, but with all "return 0" replaced
|
|
|
397dc2 |
by "ret = 0; goto cleanup;" and all "return -1" replaced by "goto
|
|
|
397dc2 |
cleanup;" (the code at cleanup: just closes the DIR* and returns
|
|
|
397dc2 |
the current value of ret). This will assure identical behavior to
|
|
|
397dc2 |
upstream.
|
|
|
397dc2 |
Signed-off-by: Laine Stump <laine@redhat.com>
|
|
|
397dc2 |
Message-Id: <20210129041729.1076345-4-laine@redhat.com>
|
|
|
397dc2 |
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>
|
|
|
397dc2 |
---
|
|
|
397dc2 |
src/util/virpci.c | 93 ++++++++++++++++++++++++++++-------------------
|
|
|
397dc2 |
src/util/virpci.h | 5 +++
|
|
|
397dc2 |
2 files changed, 61 insertions(+), 37 deletions(-)
|
|
|
397dc2 |
|
|
|
397dc2 |
diff --git a/src/util/virpci.c b/src/util/virpci.c
|
|
|
397dc2 |
index 00377eed31..d5c038b7fe 100644
|
|
|
397dc2 |
--- a/src/util/virpci.c
|
|
|
397dc2 |
+++ b/src/util/virpci.c
|
|
|
397dc2 |
@@ -2424,9 +2424,9 @@ virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
|
|
|
397dc2 |
* virPCIGetNetName:
|
|
|
397dc2 |
* @device_link_sysfs_path: sysfs path to the PCI device
|
|
|
397dc2 |
* @idx: used to choose which netdev when there are several
|
|
|
397dc2 |
- * (ignored if physPortID is set)
|
|
|
397dc2 |
+ * (ignored if physPortID is set or physPortName is available)
|
|
|
397dc2 |
* @physPortID: match this string in the netdev's phys_port_id
|
|
|
397dc2 |
- * (or NULL to ignore and use idx instead)
|
|
|
397dc2 |
+ * (or NULL to ignore and use phys_port_name or idx instead)
|
|
|
397dc2 |
* @netname: used to return the name of the netdev
|
|
|
397dc2 |
* (set to NULL (but returns success) if there is no netdev)
|
|
|
397dc2 |
*
|
|
|
397dc2 |
@@ -2460,6 +2460,14 @@ virPCIGetNetName(const char *device_link_sysfs_path,
|
|
|
397dc2 |
}
|
|
|
397dc2 |
|
|
|
397dc2 |
while (virDirRead(dir, &entry, pcidev_sysfs_net_path) > 0) {
|
|
|
397dc2 |
+ /* save the first entry we find to use as a failsafe
|
|
|
397dc2 |
+ * in case we don't match the phys_port_id. This is
|
|
|
397dc2 |
+ * needed because some NIC drivers (e.g. i40e)
|
|
|
397dc2 |
+ * implement phys_port_id for PFs, but not for VFs
|
|
|
397dc2 |
+ */
|
|
|
397dc2 |
+ if (!firstEntryName)
|
|
|
397dc2 |
+ firstEntryName = g_strdup(entry->d_name);
|
|
|
397dc2 |
+
|
|
|
397dc2 |
/* if the caller sent a physPortID, compare it to the
|
|
|
397dc2 |
* physportID of this netdev. If not, look for entry[idx].
|
|
|
397dc2 |
*/
|
|
|
397dc2 |
@@ -2470,50 +2478,61 @@ virPCIGetNetName(const char *device_link_sysfs_path,
|
|
|
397dc2 |
goto cleanup;
|
|
|
397dc2 |
|
|
|
397dc2 |
/* if this one doesn't match, keep looking */
|
|
|
397dc2 |
- if (STRNEQ_NULLABLE(physPortID, thisPhysPortID)) {
|
|
|
397dc2 |
- /* save the first entry we find to use as a failsafe
|
|
|
397dc2 |
- * in case we don't match the phys_port_id. This is
|
|
|
397dc2 |
- * needed because some NIC drivers (e.g. i40e)
|
|
|
397dc2 |
- * implement phys_port_id for PFs, but not for VFs
|
|
|
397dc2 |
- */
|
|
|
397dc2 |
- if (!firstEntryName)
|
|
|
397dc2 |
- firstEntryName = g_strdup(entry->d_name);
|
|
|
397dc2 |
-
|
|
|
397dc2 |
+ if (STRNEQ_NULLABLE(physPortID, thisPhysPortID))
|
|
|
397dc2 |
continue;
|
|
|
397dc2 |
- }
|
|
|
397dc2 |
+
|
|
|
397dc2 |
} else {
|
|
|
397dc2 |
- if (i++ < idx)
|
|
|
397dc2 |
- continue;
|
|
|
397dc2 |
- }
|
|
|
397dc2 |
+ /* Most switch devices use phys_port_name instead of
|
|
|
397dc2 |
+ * phys_port_id.
|
|
|
397dc2 |
+ * NOTE: VFs' representors net devices can be linked to PF's PCI
|
|
|
397dc2 |
+ * device, which mean that there'll be multiple net devices
|
|
|
397dc2 |
+ * instances and to get a proper net device need to match on
|
|
|
397dc2 |
+ * specific regex.
|
|
|
397dc2 |
+ * To get PF netdev, for ex., used following regex:
|
|
|
397dc2 |
+ * "(p[0-9]+$)|(p[0-9]+s[0-9]+$)"
|
|
|
397dc2 |
+ * or to get exact VF's netdev next regex is used:
|
|
|
397dc2 |
+ * "pf0vf1$"
|
|
|
397dc2 |
+ */
|
|
|
397dc2 |
+ g_autofree char *thisPhysPortName = NULL;
|
|
|
397dc2 |
|
|
|
397dc2 |
- *netname = g_strdup(entry->d_name);
|
|
|
397dc2 |
+ if (virNetDevGetPhysPortName(entry->d_name, &thisPhysPortName) < 0)
|
|
|
397dc2 |
+ goto cleanup;
|
|
|
397dc2 |
|
|
|
397dc2 |
- ret = 0;
|
|
|
397dc2 |
- break;
|
|
|
397dc2 |
- }
|
|
|
397dc2 |
+ if (thisPhysPortName) {
|
|
|
397dc2 |
+
|
|
|
397dc2 |
+ /* if this one doesn't match, keep looking */
|
|
|
397dc2 |
+ if (!virStringMatch(thisPhysPortName, VIR_PF_PHYS_PORT_NAME_REGEX))
|
|
|
397dc2 |
+ continue;
|
|
|
397dc2 |
|
|
|
397dc2 |
- if (ret < 0) {
|
|
|
397dc2 |
- if (physPortID) {
|
|
|
397dc2 |
- if (firstEntryName) {
|
|
|
397dc2 |
- /* we didn't match the provided phys_port_id, but this
|
|
|
397dc2 |
- * is probably because phys_port_id isn't implemented
|
|
|
397dc2 |
- * for this NIC driver, so just return the first
|
|
|
397dc2 |
- * (probably only) netname we found.
|
|
|
397dc2 |
- */
|
|
|
397dc2 |
- *netname = firstEntryName;
|
|
|
397dc2 |
- firstEntryName = NULL;
|
|
|
397dc2 |
- ret = 0;
|
|
|
397dc2 |
} else {
|
|
|
397dc2 |
- virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
397dc2 |
- _("Could not find network device with "
|
|
|
397dc2 |
- "phys_port_id '%s' under PCI device at %s"),
|
|
|
397dc2 |
- physPortID, device_link_sysfs_path);
|
|
|
397dc2 |
+
|
|
|
397dc2 |
+ if (i++ < idx)
|
|
|
397dc2 |
+ continue;
|
|
|
397dc2 |
}
|
|
|
397dc2 |
- } else {
|
|
|
397dc2 |
- ret = 0; /* no netdev at the given index is *not* an error */
|
|
|
397dc2 |
}
|
|
|
397dc2 |
+
|
|
|
397dc2 |
+ *netname = g_strdup(entry->d_name);
|
|
|
397dc2 |
+ ret = 0;
|
|
|
397dc2 |
+ goto cleanup;
|
|
|
397dc2 |
}
|
|
|
397dc2 |
- cleanup:
|
|
|
397dc2 |
+
|
|
|
397dc2 |
+ if (firstEntryName) {
|
|
|
397dc2 |
+ /* we didn't match the provided phys_port_id / find a
|
|
|
397dc2 |
+ * phys_port_name matching VIR_PF_PHYS_PORT_NAME_REGEX / find
|
|
|
397dc2 |
+ * as many net devices as the value of idx, but this is
|
|
|
397dc2 |
+ * probably because phys_port_id / phys_port_name isn't
|
|
|
397dc2 |
+ * implemented for this NIC driver, so just return the first
|
|
|
397dc2 |
+ * (probably only) netname we found.
|
|
|
397dc2 |
+ */
|
|
|
397dc2 |
+ *netname = g_steal_pointer(&firstEntryName);
|
|
|
397dc2 |
+ ret = 0;
|
|
|
397dc2 |
+ goto cleanup;
|
|
|
397dc2 |
+ }
|
|
|
397dc2 |
+
|
|
|
397dc2 |
+ virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
397dc2 |
+ _("Could not find any network device under PCI device at %s"),
|
|
|
397dc2 |
+ device_link_sysfs_path);
|
|
|
397dc2 |
+cleanup:
|
|
|
397dc2 |
VIR_DIR_CLOSE(dir);
|
|
|
397dc2 |
return ret;
|
|
|
397dc2 |
}
|
|
|
397dc2 |
diff --git a/src/util/virpci.h b/src/util/virpci.h
|
|
|
397dc2 |
index f6796fc422..e47c766918 100644
|
|
|
397dc2 |
--- a/src/util/virpci.h
|
|
|
397dc2 |
+++ b/src/util/virpci.h
|
|
|
397dc2 |
@@ -49,6 +49,11 @@ struct _virZPCIDeviceAddress {
|
|
|
397dc2 |
|
|
|
397dc2 |
#define VIR_PCI_DEVICE_ADDRESS_FMT "%04x:%02x:%02x.%d"
|
|
|
397dc2 |
|
|
|
397dc2 |
+/* Represents format of PF's phys_port_name in switchdev mode:
|
|
|
397dc2 |
+ * 'p%u' or 'p%us%u'. New line checked since value is readed from sysfs file.
|
|
|
397dc2 |
+ */
|
|
|
397dc2 |
+#define VIR_PF_PHYS_PORT_NAME_REGEX "(p[0-9]+$)|(p[0-9]+s[0-9]+$)"
|
|
|
397dc2 |
+
|
|
|
397dc2 |
struct _virPCIDeviceAddress {
|
|
|
397dc2 |
unsigned int domain;
|
|
|
397dc2 |
unsigned int bus;
|
|
|
397dc2 |
--
|
|
|
397dc2 |
2.30.0
|
|
|
397dc2 |
|