21255d
From 08ac9f7f55c138678c6415139e7510a05a75b81d Mon Sep 17 00:00:00 2001
21255d
From: =?UTF-8?q?Michal=20Sekleta=CC=81r?= <msekleta@redhat.com>
21255d
Date: Wed, 14 Oct 2020 16:57:44 +0200
21255d
Subject: [PATCH] udev: introduce udev net_id "naming schemes"
21255d
MIME-Version: 1.0
21255d
Content-Type: text/plain; charset=UTF-8
21255d
Content-Transfer-Encoding: 8bit
21255d
21255d
With this we can stabilize how naming works for network interfaces. A
21255d
user can request through a kernel cmdline option or an env var which
21255d
scheme to follow. The idea is that installers use this to set into stone
21255d
(a very soft stone though) the scheme used during installation so that
21255d
interface naming doesn't change afterwards anymore.
21255d
21255d
Why use env vars and kernel cmdline options, and not a config file of
21255d
its own?
21255d
21255d
Well, first of all there's no obvious existing one to use. But more
21255d
importantly: I have the feeling that this logic is kind of an incomplete
21255d
hack, and I simply don't want to do advertise this as a perfectly
21255d
working solution. So far we used env vars for the non-so-official
21255d
options and proper config files for the official stuff. Given how
21255d
incomplete this logic is (i.e. the big variable for naming remains the
21255d
kernel, which might expose sysfs attributes in newer versions that we
21255d
check for and didn't exist in older versions — and other problems like
21255d
this), I am simply not confident in giving this first-class exposure in
21255d
a primary configuration file.
21255d
21255d
Fixes: #10448
21255d
21255d
(cherry-picked from commit f7e81fd96fdfe0ac6dcdb72de43f7cb4720e363a)
21255d
21255d
Related: #1827462
21255d
21255d
[msekleta: note that we are introducing our own naming schemes based on
21255d
RHEL-8 minor versions. Also we are not backporting all naming scheme
21255d
features that appeared in the original commit. We are backporting only
21255d
features relevant for v239 while original commit also converted
21255d
changes introduced in v240 into naming scheme flags.]
21255d
---
21255d
 doc/ENVIRONMENT.md             |   9 +++
21255d
 man/kernel-command-line.xml    |   1 +
21255d
 man/systemd-udevd.service.xml  |  16 +++++
21255d
 src/udev/udev-builtin-net_id.c | 106 ++++++++++++++++++++++++++++++++-
21255d
 4 files changed, 130 insertions(+), 2 deletions(-)
21255d
21255d
diff --git a/doc/ENVIRONMENT.md b/doc/ENVIRONMENT.md
21255d
index 39a36a52cc..1a4aa01ef4 100644
21255d
--- a/doc/ENVIRONMENT.md
21255d
+++ b/doc/ENVIRONMENT.md
21255d
@@ -76,6 +76,15 @@ systemd-logind:
21255d
   hibernation is available even if the swap devices do not provide enough room
21255d
   for it.
21255d
 
21255d
+* `$NET_NAMING_SCHEME=` – if set, takes a network naming scheme (i.e. one of
21255d
+  v238, v239, v240 …) as parameter. If specified udev's net_id builtin will
21255d
+  follow the specified naming scheme when determining stable network interface
21255d
+  names. This may be used to revert to naming schemes of older udev versions,
21255d
+  in order to provide more stable naming across updates. This environment
21255d
+  variable takes precedence over the kernel command line option
21255d
+  `net.naming-scheme=`, except if the value is prefixed with `:` in which case
21255d
+  the kernel command line option takes precedence, if it is specified as well.
21255d
+
21255d
 installed systemd tests:
21255d
 
21255d
 * `$SYSTEMD_TEST_DATA` — override the location of test data. This is useful if
21255d
diff --git a/man/kernel-command-line.xml b/man/kernel-command-line.xml
21255d
index 4d8cb4e50e..b753d0592c 100644
21255d
--- a/man/kernel-command-line.xml
21255d
+++ b/man/kernel-command-line.xml
21255d
@@ -246,6 +246,7 @@
21255d
         <term><varname>udev.event_timeout=</varname></term>
21255d
         <term><varname>rd.udev.event_timeout=</varname></term>
21255d
         <term><varname>net.ifnames=</varname></term>
21255d
+        <term><varname>net.naming-scheme=</varname></term>
21255d
 
21255d
         <listitem>
21255d
           <para>Parameters understood by the device event managing
21255d
diff --git a/man/systemd-udevd.service.xml b/man/systemd-udevd.service.xml
21255d
index 73c77ea690..6449103441 100644
21255d
--- a/man/systemd-udevd.service.xml
21255d
+++ b/man/systemd-udevd.service.xml
21255d
@@ -170,6 +170,22 @@
21255d
           when possible. It is enabled by default; specifying 0 disables it.</para>
21255d
         </listitem>
21255d
       </varlistentry>
21255d
+      <varlistentry>
21255d
+        <term><varname>net.naming-scheme=</varname></term>
21255d
+        <listitem>
21255d
+          <para>Network interfaces are renamed to give them predictable names when possible (unless
21255d
+          <varname>net.ifnames=0</varname> is specified, see above). The names are derived from various device metadata
21255d
+          fields. Newer versions of <filename>systemd-udevd.service</filename> take more of these fields into account,
21255d
+          improving (and thus possibly changing) the names used for the same devices. With this kernel command line
21255d
+          option it is possible to pick a specific version of this algorithm. It expects a naming scheme identifier as
21255d
+          argument. Currently the following identifiers are known: <literal>v238</literal>, <literal>v239</literal>,
21255d
+          <literal>v240</literal> which each implement the naming scheme that was the default in the indicated systemd
21255d
+          version. Note that selecting a specific scheme is not sufficient to fully stabilize interface naming: the
21255d
+          naming is generally derived from driver attributes exposed by the kernel. As the kernel is updated,
21255d
+          previously missing attributes <filename>systemd-udevd.service</filename> is checking might appear, which
21255d
+          affects older name derivation algorithms, too.</para>
21255d
+        </listitem>
21255d
+      </varlistentry>
21255d
     </variablelist>
21255d
     
21255d
          in kernel-command-line.xml -->
21255d
diff --git a/src/udev/udev-builtin-net_id.c b/src/udev/udev-builtin-net_id.c
21255d
index 147e04ab8c..148696183e 100644
21255d
--- a/src/udev/udev-builtin-net_id.c
21255d
+++ b/src/udev/udev-builtin-net_id.c
21255d
@@ -96,6 +96,7 @@
21255d
 #include "fileio.h"
21255d
 #include "fs-util.h"
21255d
 #include "parse-util.h"
21255d
+#include "proc-cmdline.h"
21255d
 #include "stdio-util.h"
21255d
 #include "string-util.h"
21255d
 #include "udev.h"
21255d
@@ -103,6 +104,52 @@
21255d
 
21255d
 #define ONBOARD_INDEX_MAX (16*1024-1)
21255d
 
21255d
+/* So here's the deal: net_id is supposed to be an excercise in providing stable names for network devices. However, we
21255d
+ * also want to keep updating the naming scheme used in future versions of net_id. These two goals of course are
21255d
+ * contradictory: on one hand we want things to not change and on the other hand we want them to improve. Our way out
21255d
+ * of this dilemma is to introduce the "naming scheme" concept: each time we improve the naming logic we define a new
21255d
+ * flag for it. Then, we keep a list of schemes, each identified by a name associated with the flags it implements. Via
21255d
+ * a kernel command line and environment variable we then allow the user to pick the scheme they want us to follow:
21255d
+ * installers could "freeze" the used scheme at the moment of installation this way.
21255d
+ *
21255d
+ * Developers: each time you tweak the naming logic here, define a new flag below, and condition the tweak with
21255d
+ * it. Each time we do a release we'll then add a new scheme entry and include all newly defined flags.
21255d
+ *
21255d
+ * Note that this is only half a solution to the problem though: not only udev/net_id gets updated all the time, the
21255d
+ * kernel gets too. And thus a kernel that previously didn't expose some sysfs attribute we look for might eventually
21255d
+ * do, and thus affect our naming scheme too. Thus, enforcing a naming scheme will make interfacing more stable across
21255d
+ * OS versions, but not fully stabilize them. */
21255d
+typedef enum NamingSchemeFlags {
21255d
+        /* First, the individual features */
21255d
+        NAMING_SR_IOV_V        = 1 << 0, /* Use "v" suffix for SR-IOV, see 609948c7043a40008b8299529c978ed8e11de8f6*/
21255d
+        NAMING_NPAR_ARI        = 1 << 1, /* Use NPAR "ARI", see 6bc04997b6eab35d1cb9fa73889892702c27be09 */
21255d
+
21255d
+        /* And now the masks that combine the features above */
21255d
+        NAMING_V238 = 0,
21255d
+        NAMING_V239 = NAMING_V238|NAMING_SR_IOV_V|NAMING_NPAR_ARI,
21255d
+        NAMING_RHEL_8_0 = NAMING_V239,
21255d
+        NAMING_RHEL_8_1 = NAMING_V239,
21255d
+        NAMING_RHEL_8_2 = NAMING_V239,
21255d
+        NAMING_RHEL_8_3 = NAMING_V239,
21255d
+
21255d
+        _NAMING_SCHEME_FLAGS_INVALID = -1,
21255d
+} NamingSchemeFlags;
21255d
+
21255d
+typedef struct NamingScheme {
21255d
+        const char *name;
21255d
+        NamingSchemeFlags flags;
21255d
+} NamingScheme;
21255d
+
21255d
+static const NamingScheme naming_schemes[] = {
21255d
+        { "v238", NAMING_V238 },
21255d
+        { "v239", NAMING_V239 },
21255d
+        { "rhel-8.0", NAMING_RHEL_8_0 },
21255d
+        { "rhel-8.1", NAMING_RHEL_8_1 },
21255d
+        { "rhel-8.2", NAMING_RHEL_8_2 },
21255d
+        { "rhel-8.3", NAMING_RHEL_8_3 },
21255d
+        /* … add more schemes here, as the logic to name devices is updated … */
21255d
+};
21255d
+
21255d
 enum netname_type{
21255d
         NET_UNDEF,
21255d
         NET_PCI,
21255d
@@ -138,6 +185,56 @@ struct virtfn_info {
21255d
         char suffix[IFNAMSIZ];
21255d
 };
21255d
 
21255d
+static const NamingScheme* naming_scheme(void) {
21255d
+        static const NamingScheme *cache = NULL;
21255d
+        _cleanup_free_ char *buffer = NULL;
21255d
+        const char *e, *k;
21255d
+
21255d
+        if (cache)
21255d
+                return cache;
21255d
+
21255d
+        /* Acquire setting from the kernel command line */
21255d
+        (void) proc_cmdline_get_key("net.naming-scheme", 0, &buffer);
21255d
+
21255d
+        /* Also acquire it from an env var */
21255d
+        e = getenv("NET_NAMING_SCHEME");
21255d
+        if (e) {
21255d
+                if (*e == ':') {
21255d
+                        /* If prefixed with ':' the kernel cmdline takes precedence */
21255d
+                        k = buffer ?: e + 1;
21255d
+                } else
21255d
+                        k = e; /* Otherwise the env var takes precedence */
21255d
+        } else
21255d
+                k = buffer;
21255d
+
21255d
+        if (k) {
21255d
+                size_t i;
21255d
+
21255d
+                for (i = 0; i < ELEMENTSOF(naming_schemes); i++)
21255d
+                        if (streq(naming_schemes[i].name, k)) {
21255d
+                                cache = naming_schemes + i;
21255d
+                                break;
21255d
+                        }
21255d
+
21255d
+                if (!cache)
21255d
+                        log_warning("Unknown interface naming scheme '%s' requested, ignoring.", k);
21255d
+        }
21255d
+
21255d
+        if (cache)
21255d
+                log_info("Using interface naming scheme '%s'.", cache->name);
21255d
+        else {
21255d
+                /* RHEL-only: here we differ from the upstream and if no naming scheme was selected we default to naming from systemd-239 */
21255d
+                cache = &naming_schemes[2];
21255d
+                log_info("Using default interface naming scheme '%s'.", cache->name);
21255d
+        }
21255d
+
21255d
+        return cache;
21255d
+}
21255d
+
21255d
+static bool naming_scheme_has(NamingSchemeFlags flags) {
21255d
+        return FLAGS_SET(naming_scheme()->flags, flags);
21255d
+}
21255d
+
21255d
 /* skip intermediate virtio devices */
21255d
 static struct udev_device *skip_virtio(struct udev_device *dev) {
21255d
         struct udev_device *parent = dev;
21255d
@@ -299,7 +396,9 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
21255d
 
21255d
         if (sscanf(udev_device_get_sysname(names->pcidev), "%x:%x:%x.%u", &domain, &bus, &slot, &func) != 4)
21255d
                 return -ENOENT;
21255d
-        if (is_pci_ari_enabled(names->pcidev))
21255d
+
21255d
+        if (naming_scheme_has(NAMING_NPAR_ARI) &&
21255d
+            is_pci_ari_enabled(names->pcidev))
21255d
                 /* ARI devices support up to 256 functions on a single device ("slot"), and interpret the
21255d
                  * traditional 5-bit slot and 3-bit function number as a single 8-bit function number,
21255d
                  * where the slot makes up the upper 5 bits. */
21255d
@@ -494,7 +593,8 @@ static int names_pci(struct udev_device *dev, struct netnames *names) {
21255d
                         return -ENOENT;
21255d
         }
21255d
 
21255d
-        if (get_virtfn_info(dev, names, &vf_info) >= 0) {
21255d
+        if (naming_scheme_has(NAMING_SR_IOV_V) &&
21255d
+            get_virtfn_info(dev, names, &vf_info) >= 0) {
21255d
                 /* If this is an SR-IOV virtual device, get base name using physical device and add virtfn suffix. */
21255d
                 vf_names.pcidev = vf_info.physfn_pcidev;
21255d
                 dev_pci_onboard(dev, &vf_names);
21255d
@@ -741,6 +841,8 @@ static int builtin_net_id(struct udev_device *dev, int argc, char *argv[], bool
21255d
                         prefix = "ww";
21255d
         }
21255d
 
21255d
+        udev_builtin_add_property(dev, test, "ID_NET_NAMING_SCHEME", naming_scheme()->name);
21255d
+
21255d
         err = names_mac(dev, &names);
21255d
         if (err >= 0 && names.mac_valid) {
21255d
                 char str[IFNAMSIZ];