6136c1
From 1d5f966c1758eb620755fcae54abd07a1ac36d3d Mon Sep 17 00:00:00 2001
6136c1
From: Michal Sekletar <msekleta@redhat.com>
6136c1
Date: Wed, 6 Jan 2021 11:43:50 +0100
6136c1
Subject: [PATCH] udev: make algorithm that selects highest priority devlink
6136c1
 less susceptible to race conditions
6136c1
6136c1
Previously it was very likely, when multiple contenders for the symlink
6136c1
appear in parallel, that algorithm would select wrong symlink (i.e. one
6136c1
with lower-priority).
6136c1
6136c1
Now the algorithm is much more defensive and when we detect change in
6136c1
set of contenders for the symlink we reevaluate the selection. Same
6136c1
happens when new symlink replaces already existing symlink that points
6136c1
to different device node.
6136c1
6136c1
Resolves: #1642728
6136c1
---
6136c1
 src/udev/udev-event.c |  71 +++++++-----
6136c1
 src/udev/udev-node.c  | 244 ++++++++++++++++++++++++++++++------------
6136c1
 2 files changed, 216 insertions(+), 99 deletions(-)
6136c1
6136c1
diff --git a/src/udev/udev-event.c b/src/udev/udev-event.c
6136c1
index fd8406d959..9004634f65 100644
6136c1
--- a/src/udev/udev-event.c
6136c1
+++ b/src/udev/udev-event.c
6136c1
@@ -833,6 +833,41 @@ static int rename_netif(struct udev_event *event) {
6136c1
         return 0;
6136c1
 }
6136c1
 
6136c1
+static void update_devnode(struct udev_event *event) {
6136c1
+        struct udev_device *dev = event->dev;
6136c1
+
6136c1
+        if (major(udev_device_get_devnum(dev)) > 0) {
6136c1
+                bool apply;
6136c1
+
6136c1
+                /* remove/update possible left-over symlinks from old database entry */
6136c1
+                if (event->dev_db != NULL)
6136c1
+                        udev_node_update_old_links(dev, event->dev_db);
6136c1
+
6136c1
+                if (!event->owner_set)
6136c1
+                        event->uid = udev_device_get_devnode_uid(dev);
6136c1
+
6136c1
+                if (!event->group_set)
6136c1
+                        event->gid = udev_device_get_devnode_gid(dev);
6136c1
+
6136c1
+                if (!event->mode_set) {
6136c1
+                        if (udev_device_get_devnode_mode(dev) > 0) {
6136c1
+                                /* kernel supplied value */
6136c1
+                                event->mode = udev_device_get_devnode_mode(dev);
6136c1
+                        } else if (event->gid > 0) {
6136c1
+                                /* default 0660 if a group is assigned */
6136c1
+                                event->mode = 0660;
6136c1
+                        }
6136c1
+                        else {
6136c1
+                                /* default 0600 */
6136c1
+                                event->mode = 0600;
6136c1
+                        }
6136c1
+                }
6136c1
+
6136c1
+                apply = streq(udev_device_get_action(dev), "add") || event->owner_set || event->group_set || event->mode_set;
6136c1
+                udev_node_add(dev, apply, event->mode, event->uid, event->gid, &event->seclabel_list);
6136c1
+        }
6136c1
+}
6136c1
+
6136c1
 void udev_event_execute_rules(struct udev_event *event,
6136c1
                               usec_t timeout_usec, usec_t timeout_warn_usec,
6136c1
                               struct udev_list *properties_list,
6136c1
@@ -891,35 +926,7 @@ void udev_event_execute_rules(struct udev_event *event,
6136c1
                         }
6136c1
                 }
6136c1
 
6136c1
-                if (major(udev_device_get_devnum(dev)) > 0) {
6136c1
-                        bool apply;
6136c1
-
6136c1
-                        /* remove/update possible left-over symlinks from old database entry */
6136c1
-                        if (event->dev_db != NULL)
6136c1
-                                udev_node_update_old_links(dev, event->dev_db);
6136c1
-
6136c1
-                        if (!event->owner_set)
6136c1
-                                event->uid = udev_device_get_devnode_uid(dev);
6136c1
-
6136c1
-                        if (!event->group_set)
6136c1
-                                event->gid = udev_device_get_devnode_gid(dev);
6136c1
-
6136c1
-                        if (!event->mode_set) {
6136c1
-                                if (udev_device_get_devnode_mode(dev) > 0) {
6136c1
-                                        /* kernel supplied value */
6136c1
-                                        event->mode = udev_device_get_devnode_mode(dev);
6136c1
-                                } else if (event->gid > 0) {
6136c1
-                                        /* default 0660 if a group is assigned */
6136c1
-                                        event->mode = 0660;
6136c1
-                                } else {
6136c1
-                                        /* default 0600 */
6136c1
-                                        event->mode = 0600;
6136c1
-                                }
6136c1
-                        }
6136c1
-
6136c1
-                        apply = streq(udev_device_get_action(dev), "add") || event->owner_set || event->group_set || event->mode_set;
6136c1
-                        udev_node_add(dev, apply, event->mode, event->uid, event->gid, &event->seclabel_list);
6136c1
-                }
6136c1
+                update_devnode(event);
6136c1
 
6136c1
                 /* preserve old, or get new initialization timestamp */
6136c1
                 udev_device_ensure_usec_initialized(event->dev, event->dev_db);
6136c1
@@ -927,6 +934,12 @@ void udev_event_execute_rules(struct udev_event *event,
6136c1
                 /* (re)write database file */
6136c1
                 udev_device_tag_index(dev, event->dev_db, true);
6136c1
                 udev_device_update_db(dev);
6136c1
+
6136c1
+                /* Yes, we run update_devnode() twice, because in the first invocation, that is before update of udev database,
6136c1
+                 * it could happen that two contenders are replacing each other's symlink. Hence we run it again to make sure
6136c1
+                 * symlinks point to devices that claim them with the highest priority. */
6136c1
+                update_devnode(event);
6136c1
+
6136c1
                 udev_device_set_is_initialized(dev);
6136c1
 
6136c1
                 event->dev_db = udev_device_unref(event->dev_db);
6136c1
diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c
6136c1
index 333dcae6b9..2eeeccdd3a 100644
6136c1
--- a/src/udev/udev-node.c
6136c1
+++ b/src/udev/udev-node.c
6136c1
@@ -13,19 +13,27 @@
6136c1
 #include <unistd.h>
6136c1
 
6136c1
 #include "device-nodes.h"
6136c1
+#include "device-private.h"
6136c1
 #include "dirent-util.h"
6136c1
+#include "fd-util.h"
6136c1
 #include "format-util.h"
6136c1
 #include "fs-util.h"
6136c1
+#include "sd-device.h"
6136c1
 #include "selinux-util.h"
6136c1
 #include "smack-util.h"
6136c1
+#include "stat-util.h"
6136c1
 #include "stdio-util.h"
6136c1
 #include "string-util.h"
6136c1
 #include "udev.h"
6136c1
+#include "libudev-device-internal.h"
6136c1
 
6136c1
-static int node_symlink(struct udev_device *dev, const char *node, const char *slink) {
6136c1
+#define LINK_UPDATE_MAX_RETRIES 128
6136c1
+
6136c1
+static int node_symlink(sd_device *dev, const char *node, const char *slink) {
6136c1
         struct stat stats;
6136c1
         char target[UTIL_PATH_SIZE];
6136c1
         char *s;
6136c1
+        const char *id_filename;
6136c1
         size_t l;
6136c1
         char slink_tmp[UTIL_PATH_SIZE + 32];
6136c1
         int i = 0;
6136c1
@@ -89,7 +97,10 @@ static int node_symlink(struct udev_device *dev, const char *node, const char *s
6136c1
         }
6136c1
 
6136c1
         log_debug("atomically replace '%s'", slink);
6136c1
-        strscpyl(slink_tmp, sizeof(slink_tmp), slink, ".tmp-", udev_device_get_id_filename(dev), NULL);
6136c1
+        err = device_get_id_filename(dev, &id_filename);
6136c1
+        if (err < 0)
6136c1
+                return log_error_errno(err, "Failed to get id_filename: %m");
6136c1
+        strscpyl(slink_tmp, sizeof(slink_tmp), slink, ".tmp-", id_filename, NULL);
6136c1
         unlink(slink_tmp);
6136c1
         do {
6136c1
                 err = mkdir_parents_label(slink_tmp, 0755);
6136c1
@@ -109,104 +120,187 @@ static int node_symlink(struct udev_device *dev, const char *node, const char *s
6136c1
         if (err != 0) {
6136c1
                 log_error_errno(errno, "rename '%s' '%s' failed: %m", slink_tmp, slink);
6136c1
                 unlink(slink_tmp);
6136c1
-        }
6136c1
+        } else
6136c1
+                /* Tell caller that we replaced already existing symlink. */
6136c1
+                return 1;
6136c1
 exit:
6136c1
         return err;
6136c1
 }
6136c1
 
6136c1
 /* find device node of device with highest priority */
6136c1
-static const char *link_find_prioritized(struct udev_device *dev, bool add, const char *stackdir, char *buf, size_t bufsize) {
6136c1
-        struct udev *udev = udev_device_get_udev(dev);
6136c1
-        DIR *dir;
6136c1
+static int link_find_prioritized(sd_device *dev, bool add, const char *stackdir, char **ret) {
6136c1
+        _cleanup_closedir_ DIR *dir = NULL;
6136c1
+        _cleanup_free_ char *target = NULL;
6136c1
         struct dirent *dent;
6136c1
-        int priority = 0;
6136c1
-        const char *target = NULL;
6136c1
+        int r, priority = 0;
6136c1
+
6136c1
+        assert(!add || dev);
6136c1
+        assert(stackdir);
6136c1
+        assert(ret);
6136c1
 
6136c1
         if (add) {
6136c1
-                priority = udev_device_get_devlink_priority(dev);
6136c1
-                strscpy(buf, bufsize, udev_device_get_devnode(dev));
6136c1
-                target = buf;
6136c1
+                const char *devnode;
6136c1
+
6136c1
+                r = device_get_devlink_priority(dev, &priority);
6136c1
+                if (r < 0)
6136c1
+                        return r;
6136c1
+
6136c1
+                r = sd_device_get_devname(dev, &devnode);
6136c1
+                if (r < 0)
6136c1
+                        return r;
6136c1
+
6136c1
+                target = strdup(devnode);
6136c1
+                if (!target)
6136c1
+                        return -ENOMEM;
6136c1
         }
6136c1
 
6136c1
         dir = opendir(stackdir);
6136c1
-        if (dir == NULL)
6136c1
-                return target;
6136c1
+        if (!dir) {
6136c1
+                if (target) {
6136c1
+                        *ret = TAKE_PTR(target);
6136c1
+                        return 0;
6136c1
+                }
6136c1
+
6136c1
+                return -errno;
6136c1
+        }
6136c1
+
6136c1
         FOREACH_DIRENT_ALL(dent, dir, break) {
6136c1
-                struct udev_device *dev_db;
6136c1
+                _cleanup_(sd_device_unrefp) sd_device *dev_db = NULL;
6136c1
+                const char *devnode, *id_filename;
6136c1
+                int db_prio = 0;
6136c1
 
6136c1
                 if (dent->d_name[0] == '\0')
6136c1
                         break;
6136c1
                 if (dent->d_name[0] == '.')
6136c1
                         continue;
6136c1
 
6136c1
-                log_debug("found '%s' claiming '%s'", dent->d_name, stackdir);
6136c1
+                log_debug("Found '%s' claiming '%s'", dent->d_name, stackdir);
6136c1
+
6136c1
+                if (device_get_id_filename(dev, &id_filename) < 0)
6136c1
+                        continue;
6136c1
 
6136c1
                 /* did we find ourself? */
6136c1
-                if (streq(dent->d_name, udev_device_get_id_filename(dev)))
6136c1
+                if (streq(dent->d_name, id_filename))
6136c1
                         continue;
6136c1
 
6136c1
-                dev_db = udev_device_new_from_device_id(udev, dent->d_name);
6136c1
-                if (dev_db != NULL) {
6136c1
-                        const char *devnode;
6136c1
-
6136c1
-                        devnode = udev_device_get_devnode(dev_db);
6136c1
-                        if (devnode != NULL) {
6136c1
-                                if (target == NULL || udev_device_get_devlink_priority(dev_db) > priority) {
6136c1
-                                        log_debug("'%s' claims priority %i for '%s'",
6136c1
-                                                  udev_device_get_syspath(dev_db), udev_device_get_devlink_priority(dev_db), stackdir);
6136c1
-                                        priority = udev_device_get_devlink_priority(dev_db);
6136c1
-                                        strscpy(buf, bufsize, devnode);
6136c1
-                                        target = buf;
6136c1
-                                }
6136c1
-                        }
6136c1
-                        udev_device_unref(dev_db);
6136c1
+                if (sd_device_new_from_device_id(&dev_db, dent->d_name) < 0)
6136c1
+                        continue;
6136c1
+
6136c1
+                if (sd_device_get_devname(dev_db, &devnode) < 0)
6136c1
+                        continue;
6136c1
+
6136c1
+                if (device_get_devlink_priority(dev_db, &db_prio) < 0)
6136c1
+                        continue;
6136c1
+
6136c1
+                if (target && db_prio <= priority)
6136c1
+                        continue;
6136c1
+
6136c1
+                if (DEBUG_LOGGING) {
6136c1
+                        const char *syspath = NULL;
6136c1
+
6136c1
+                        (void) sd_device_get_syspath(dev_db, &syspath);
6136c1
+                        log_debug("Device '%s' claims priority %i for '%s'", strnull(syspath), db_prio, stackdir);
6136c1
                 }
6136c1
+
6136c1
+                r = free_and_strdup(&target, devnode);
6136c1
+                if (r < 0)
6136c1
+                        return r;
6136c1
+                priority = db_prio;
6136c1
         }
6136c1
-        closedir(dir);
6136c1
-        return target;
6136c1
+
6136c1
+        if (!target)
6136c1
+                return -ENOENT;
6136c1
+
6136c1
+        *ret = TAKE_PTR(target);
6136c1
+        return 0;
6136c1
 }
6136c1
 
6136c1
+
6136c1
 /* manage "stack of names" with possibly specified device priorities */
6136c1
-static void link_update(struct udev_device *dev, const char *slink, bool add) {
6136c1
-        char name_enc[UTIL_PATH_SIZE];
6136c1
-        char filename[UTIL_PATH_SIZE * 2];
6136c1
-        char dirname[UTIL_PATH_SIZE];
6136c1
-        const char *target;
6136c1
-        char buf[UTIL_PATH_SIZE];
6136c1
+static int link_update(sd_device *dev, const char *slink, bool add) {
6136c1
+        _cleanup_free_ char *filename = NULL, *dirname = NULL;
6136c1
+        char name_enc[PATH_MAX];
6136c1
+        const char *id_filename;
6136c1
+        int i, r, retries;
6136c1
+
6136c1
+        assert(dev);
6136c1
+        assert(slink);
6136c1
+
6136c1
+        r = device_get_id_filename(dev, &id_filename);
6136c1
+        if (r < 0)
6136c1
+                return log_debug_errno(r, "Failed to get id_filename: %m");
6136c1
 
6136c1
         util_path_encode(slink + STRLEN("/dev"), name_enc, sizeof(name_enc));
6136c1
-        strscpyl(dirname, sizeof(dirname), "/run/udev/links/", name_enc, NULL);
6136c1
-        strscpyl(filename, sizeof(filename), dirname, "/", udev_device_get_id_filename(dev), NULL);
6136c1
+        dirname = path_join(NULL, "/run/udev/links/", name_enc);
6136c1
+        if (!dirname)
6136c1
+                return log_oom();
6136c1
+        filename = path_join(NULL, dirname, id_filename);
6136c1
+        if (!filename)
6136c1
+                return log_oom();
6136c1
+
6136c1
+        if (!add) {
6136c1
+                if (unlink(filename) == 0)
6136c1
+                        (void) rmdir(dirname);
6136c1
+        } else
6136c1
+                for (;;) {
6136c1
+                        _cleanup_close_ int fd = -1;
6136c1
+
6136c1
+                        r = mkdir_parents(filename, 0755);
6136c1
+                        if (!IN_SET(r, 0, -ENOENT))
6136c1
+                                return r;
6136c1
 
6136c1
-        if (!add && unlink(filename) == 0)
6136c1
-                rmdir(dirname);
6136c1
+                        fd = open(filename, O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC|O_NOFOLLOW, 0444);
6136c1
+                        if (fd >= 0)
6136c1
+                                break;
6136c1
+                        if (errno != ENOENT)
6136c1
+                                return -errno;
6136c1
+                }
6136c1
 
6136c1
-        target = link_find_prioritized(dev, add, dirname, buf, sizeof(buf));
6136c1
-        if (target == NULL) {
6136c1
-                log_debug("no reference left, remove '%s'", slink);
6136c1
-                if (unlink(slink) == 0)
6136c1
-                        rmdir_parents(slink, "/");
6136c1
-        } else {
6136c1
-                log_debug("creating link '%s' to '%s'", slink, target);
6136c1
-                node_symlink(dev, target, slink);
6136c1
-        }
6136c1
+        /* If the database entry is not written yet we will just do one iteration and possibly wrong symlink
6136c1
+         * will be fixed in the second invocation. */
6136c1
+        (void) sd_device_get_is_initialized(dev, &r);
6136c1
+        retries = r > 0 ? LINK_UPDATE_MAX_RETRIES : 1;
6136c1
 
6136c1
-        if (add) {
6136c1
-                int err;
6136c1
+        for (i = 0; i < retries; i++) {
6136c1
+                _cleanup_free_ char *target = NULL;
6136c1
+                struct stat st1 = {}, st2 = {};
6136c1
 
6136c1
-                do {
6136c1
-                        int fd;
6136c1
+                r = stat(dirname, &st1;;
6136c1
+                if (r < 0 && errno != ENOENT)
6136c1
+                        return -errno;
6136c1
 
6136c1
-                        err = mkdir_parents(filename, 0755);
6136c1
-                        if (!IN_SET(err, 0, -ENOENT))
6136c1
+                r = link_find_prioritized(dev, add, dirname, &target);
6136c1
+                if (r == -ENOENT) {
6136c1
+                        log_debug("No reference left, removing '%s'", slink);
6136c1
+                        if (unlink(slink) == 0)
6136c1
+                                (void) rmdir_parents(slink, "/");
6136c1
+
6136c1
+                        break;
6136c1
+                } else if (r < 0)
6136c1
+                        return log_error_errno(r, "Failed to determine highest priority symlink: %m");
6136c1
+
6136c1
+                r = node_symlink(dev, target, slink);
6136c1
+                if (r < 0) {
6136c1
+                        (void) unlink(filename);
6136c1
+                        break;
6136c1
+                } else if (r == 1)
6136c1
+                        /* We have replaced already existing symlink, possibly there is some other device trying
6136c1
+                         * to claim the same symlink. Let's do one more iteration to give us a chance to fix
6136c1
+                         * the error if other device actually claims the symlink with higher priority. */
6136c1
+                        continue;
6136c1
+
6136c1
+               /* Skip the second stat() if the first failed, stat_inode_unmodified() would return false regardless. */
6136c1
+                if ((st1.st_mode & S_IFMT) != 0) {
6136c1
+                        r = stat(dirname, &st2;;
6136c1
+                        if (r < 0 && errno != ENOENT)
6136c1
+                                return -errno;
6136c1
+
6136c1
+                        if (stat_inode_unmodified(&st1, &st2))
6136c1
                                 break;
6136c1
-                        fd = open(filename, O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC|O_NOFOLLOW, 0444);
6136c1
-                        if (fd >= 0)
6136c1
-                                close(fd);
6136c1
-                        else
6136c1
-                                err = -errno;
6136c1
-                } while (err == -ENOENT);
6136c1
+                }
6136c1
         }
6136c1
+
6136c1
+        return i < LINK_UPDATE_MAX_RETRIES ? 0 : -ELOOP;
6136c1
 }
6136c1
 
6136c1
 void udev_node_update_old_links(struct udev_device *dev, struct udev_device *dev_old) {
6136c1
@@ -233,7 +327,7 @@ void udev_node_update_old_links(struct udev_device *dev, struct udev_device *dev
6136c1
 
6136c1
                 log_debug("update old name, '%s' no longer belonging to '%s'",
6136c1
                      name, udev_device_get_devpath(dev));
6136c1
-                link_update(dev, name, false);
6136c1
+                link_update(dev->device, name, false);
6136c1
         }
6136c1
 }
6136c1
 
6136c1
@@ -338,11 +432,16 @@ void udev_node_add(struct udev_device *dev, bool apply,
6136c1
         xsprintf_dev_num_path(filename,
6136c1
                               streq(udev_device_get_subsystem(dev), "block") ? "block" : "char",
6136c1
                               udev_device_get_devnum(dev));
6136c1
-        node_symlink(dev, udev_device_get_devnode(dev), filename);
6136c1
+        node_symlink(dev->device, udev_device_get_devnode(dev), filename);
6136c1
 
6136c1
         /* create/update symlinks, add symlinks to name index */
6136c1
-        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev))
6136c1
-                        link_update(dev, udev_list_entry_get_name(list_entry), true);
6136c1
+        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev)) {
6136c1
+                int r;
6136c1
+
6136c1
+                r = link_update(dev->device, udev_list_entry_get_name(list_entry), true);
6136c1
+                if (r < 0)
6136c1
+                        log_info_errno(r, "Failed to update device symlinks: %m");
6136c1
+        }
6136c1
 }
6136c1
 
6136c1
 void udev_node_remove(struct udev_device *dev) {
6136c1
@@ -350,8 +449,13 @@ void udev_node_remove(struct udev_device *dev) {
6136c1
         char filename[DEV_NUM_PATH_MAX];
6136c1
 
6136c1
         /* remove/update symlinks, remove symlinks from name index */
6136c1
-        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev))
6136c1
-                link_update(dev, udev_list_entry_get_name(list_entry), false);
6136c1
+        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev)) {
6136c1
+                int r;
6136c1
+
6136c1
+                r = link_update(dev->device, udev_list_entry_get_name(list_entry), false);
6136c1
+                if (r < 0)
6136c1
+                        log_info_errno(r, "Failed to update device symlinks: %m");
6136c1
+        }
6136c1
 
6136c1
         /* remove /dev/{block,char}/$major:$minor */
6136c1
         xsprintf_dev_num_path(filename,