Blob Blame History Raw
From 1d5f966c1758eb620755fcae54abd07a1ac36d3d Mon Sep 17 00:00:00 2001
From: Michal Sekletar <msekleta@redhat.com>
Date: Wed, 6 Jan 2021 11:43:50 +0100
Subject: [PATCH] udev: make algorithm that selects highest priority devlink
 less susceptible to race conditions

Previously it was very likely, when multiple contenders for the symlink
appear in parallel, that algorithm would select wrong symlink (i.e. one
with lower-priority).

Now the algorithm is much more defensive and when we detect change in
set of contenders for the symlink we reevaluate the selection. Same
happens when new symlink replaces already existing symlink that points
to different device node.

Resolves: #1642728
---
 src/udev/udev-event.c |  71 +++++++-----
 src/udev/udev-node.c  | 244 ++++++++++++++++++++++++++++++------------
 2 files changed, 216 insertions(+), 99 deletions(-)

diff --git a/src/udev/udev-event.c b/src/udev/udev-event.c
index fd8406d959..9004634f65 100644
--- a/src/udev/udev-event.c
+++ b/src/udev/udev-event.c
@@ -833,6 +833,41 @@ static int rename_netif(struct udev_event *event) {
         return 0;
 }
 
+static void update_devnode(struct udev_event *event) {
+        struct udev_device *dev = event->dev;
+
+        if (major(udev_device_get_devnum(dev)) > 0) {
+                bool apply;
+
+                /* remove/update possible left-over symlinks from old database entry */
+                if (event->dev_db != NULL)
+                        udev_node_update_old_links(dev, event->dev_db);
+
+                if (!event->owner_set)
+                        event->uid = udev_device_get_devnode_uid(dev);
+
+                if (!event->group_set)
+                        event->gid = udev_device_get_devnode_gid(dev);
+
+                if (!event->mode_set) {
+                        if (udev_device_get_devnode_mode(dev) > 0) {
+                                /* kernel supplied value */
+                                event->mode = udev_device_get_devnode_mode(dev);
+                        } else if (event->gid > 0) {
+                                /* default 0660 if a group is assigned */
+                                event->mode = 0660;
+                        }
+                        else {
+                                /* default 0600 */
+                                event->mode = 0600;
+                        }
+                }
+
+                apply = streq(udev_device_get_action(dev), "add") || event->owner_set || event->group_set || event->mode_set;
+                udev_node_add(dev, apply, event->mode, event->uid, event->gid, &event->seclabel_list);
+        }
+}
+
 void udev_event_execute_rules(struct udev_event *event,
                               usec_t timeout_usec, usec_t timeout_warn_usec,
                               struct udev_list *properties_list,
@@ -891,35 +926,7 @@ void udev_event_execute_rules(struct udev_event *event,
                         }
                 }
 
-                if (major(udev_device_get_devnum(dev)) > 0) {
-                        bool apply;
-
-                        /* remove/update possible left-over symlinks from old database entry */
-                        if (event->dev_db != NULL)
-                                udev_node_update_old_links(dev, event->dev_db);
-
-                        if (!event->owner_set)
-                                event->uid = udev_device_get_devnode_uid(dev);
-
-                        if (!event->group_set)
-                                event->gid = udev_device_get_devnode_gid(dev);
-
-                        if (!event->mode_set) {
-                                if (udev_device_get_devnode_mode(dev) > 0) {
-                                        /* kernel supplied value */
-                                        event->mode = udev_device_get_devnode_mode(dev);
-                                } else if (event->gid > 0) {
-                                        /* default 0660 if a group is assigned */
-                                        event->mode = 0660;
-                                } else {
-                                        /* default 0600 */
-                                        event->mode = 0600;
-                                }
-                        }
-
-                        apply = streq(udev_device_get_action(dev), "add") || event->owner_set || event->group_set || event->mode_set;
-                        udev_node_add(dev, apply, event->mode, event->uid, event->gid, &event->seclabel_list);
-                }
+                update_devnode(event);
 
                 /* preserve old, or get new initialization timestamp */
                 udev_device_ensure_usec_initialized(event->dev, event->dev_db);
@@ -927,6 +934,12 @@ void udev_event_execute_rules(struct udev_event *event,
                 /* (re)write database file */
                 udev_device_tag_index(dev, event->dev_db, true);
                 udev_device_update_db(dev);
+
+                /* Yes, we run update_devnode() twice, because in the first invocation, that is before update of udev database,
+                 * it could happen that two contenders are replacing each other's symlink. Hence we run it again to make sure
+                 * symlinks point to devices that claim them with the highest priority. */
+                update_devnode(event);
+
                 udev_device_set_is_initialized(dev);
 
                 event->dev_db = udev_device_unref(event->dev_db);
diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c
index 333dcae6b9..2eeeccdd3a 100644
--- a/src/udev/udev-node.c
+++ b/src/udev/udev-node.c
@@ -13,19 +13,27 @@
 #include <unistd.h>
 
 #include "device-nodes.h"
+#include "device-private.h"
 #include "dirent-util.h"
+#include "fd-util.h"
 #include "format-util.h"
 #include "fs-util.h"
+#include "sd-device.h"
 #include "selinux-util.h"
 #include "smack-util.h"
+#include "stat-util.h"
 #include "stdio-util.h"
 #include "string-util.h"
 #include "udev.h"
+#include "libudev-device-internal.h"
 
-static int node_symlink(struct udev_device *dev, const char *node, const char *slink) {
+#define LINK_UPDATE_MAX_RETRIES 128
+
+static int node_symlink(sd_device *dev, const char *node, const char *slink) {
         struct stat stats;
         char target[UTIL_PATH_SIZE];
         char *s;
+        const char *id_filename;
         size_t l;
         char slink_tmp[UTIL_PATH_SIZE + 32];
         int i = 0;
@@ -89,7 +97,10 @@ static int node_symlink(struct udev_device *dev, const char *node, const char *s
         }
 
         log_debug("atomically replace '%s'", slink);
-        strscpyl(slink_tmp, sizeof(slink_tmp), slink, ".tmp-", udev_device_get_id_filename(dev), NULL);
+        err = device_get_id_filename(dev, &id_filename);
+        if (err < 0)
+                return log_error_errno(err, "Failed to get id_filename: %m");
+        strscpyl(slink_tmp, sizeof(slink_tmp), slink, ".tmp-", id_filename, NULL);
         unlink(slink_tmp);
         do {
                 err = mkdir_parents_label(slink_tmp, 0755);
@@ -109,104 +120,187 @@ static int node_symlink(struct udev_device *dev, const char *node, const char *s
         if (err != 0) {
                 log_error_errno(errno, "rename '%s' '%s' failed: %m", slink_tmp, slink);
                 unlink(slink_tmp);
-        }
+        } else
+                /* Tell caller that we replaced already existing symlink. */
+                return 1;
 exit:
         return err;
 }
 
 /* find device node of device with highest priority */
-static const char *link_find_prioritized(struct udev_device *dev, bool add, const char *stackdir, char *buf, size_t bufsize) {
-        struct udev *udev = udev_device_get_udev(dev);
-        DIR *dir;
+static int link_find_prioritized(sd_device *dev, bool add, const char *stackdir, char **ret) {
+        _cleanup_closedir_ DIR *dir = NULL;
+        _cleanup_free_ char *target = NULL;
         struct dirent *dent;
-        int priority = 0;
-        const char *target = NULL;
+        int r, priority = 0;
+
+        assert(!add || dev);
+        assert(stackdir);
+        assert(ret);
 
         if (add) {
-                priority = udev_device_get_devlink_priority(dev);
-                strscpy(buf, bufsize, udev_device_get_devnode(dev));
-                target = buf;
+                const char *devnode;
+
+                r = device_get_devlink_priority(dev, &priority);
+                if (r < 0)
+                        return r;
+
+                r = sd_device_get_devname(dev, &devnode);
+                if (r < 0)
+                        return r;
+
+                target = strdup(devnode);
+                if (!target)
+                        return -ENOMEM;
         }
 
         dir = opendir(stackdir);
-        if (dir == NULL)
-                return target;
+        if (!dir) {
+                if (target) {
+                        *ret = TAKE_PTR(target);
+                        return 0;
+                }
+
+                return -errno;
+        }
+
         FOREACH_DIRENT_ALL(dent, dir, break) {
-                struct udev_device *dev_db;
+                _cleanup_(sd_device_unrefp) sd_device *dev_db = NULL;
+                const char *devnode, *id_filename;
+                int db_prio = 0;
 
                 if (dent->d_name[0] == '\0')
                         break;
                 if (dent->d_name[0] == '.')
                         continue;
 
-                log_debug("found '%s' claiming '%s'", dent->d_name, stackdir);
+                log_debug("Found '%s' claiming '%s'", dent->d_name, stackdir);
+
+                if (device_get_id_filename(dev, &id_filename) < 0)
+                        continue;
 
                 /* did we find ourself? */
-                if (streq(dent->d_name, udev_device_get_id_filename(dev)))
+                if (streq(dent->d_name, id_filename))
                         continue;
 
-                dev_db = udev_device_new_from_device_id(udev, dent->d_name);
-                if (dev_db != NULL) {
-                        const char *devnode;
-
-                        devnode = udev_device_get_devnode(dev_db);
-                        if (devnode != NULL) {
-                                if (target == NULL || udev_device_get_devlink_priority(dev_db) > priority) {
-                                        log_debug("'%s' claims priority %i for '%s'",
-                                                  udev_device_get_syspath(dev_db), udev_device_get_devlink_priority(dev_db), stackdir);
-                                        priority = udev_device_get_devlink_priority(dev_db);
-                                        strscpy(buf, bufsize, devnode);
-                                        target = buf;
-                                }
-                        }
-                        udev_device_unref(dev_db);
+                if (sd_device_new_from_device_id(&dev_db, dent->d_name) < 0)
+                        continue;
+
+                if (sd_device_get_devname(dev_db, &devnode) < 0)
+                        continue;
+
+                if (device_get_devlink_priority(dev_db, &db_prio) < 0)
+                        continue;
+
+                if (target && db_prio <= priority)
+                        continue;
+
+                if (DEBUG_LOGGING) {
+                        const char *syspath = NULL;
+
+                        (void) sd_device_get_syspath(dev_db, &syspath);
+                        log_debug("Device '%s' claims priority %i for '%s'", strnull(syspath), db_prio, stackdir);
                 }
+
+                r = free_and_strdup(&target, devnode);
+                if (r < 0)
+                        return r;
+                priority = db_prio;
         }
-        closedir(dir);
-        return target;
+
+        if (!target)
+                return -ENOENT;
+
+        *ret = TAKE_PTR(target);
+        return 0;
 }
 
+
 /* manage "stack of names" with possibly specified device priorities */
-static void link_update(struct udev_device *dev, const char *slink, bool add) {
-        char name_enc[UTIL_PATH_SIZE];
-        char filename[UTIL_PATH_SIZE * 2];
-        char dirname[UTIL_PATH_SIZE];
-        const char *target;
-        char buf[UTIL_PATH_SIZE];
+static int link_update(sd_device *dev, const char *slink, bool add) {
+        _cleanup_free_ char *filename = NULL, *dirname = NULL;
+        char name_enc[PATH_MAX];
+        const char *id_filename;
+        int i, r, retries;
+
+        assert(dev);
+        assert(slink);
+
+        r = device_get_id_filename(dev, &id_filename);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to get id_filename: %m");
 
         util_path_encode(slink + STRLEN("/dev"), name_enc, sizeof(name_enc));
-        strscpyl(dirname, sizeof(dirname), "/run/udev/links/", name_enc, NULL);
-        strscpyl(filename, sizeof(filename), dirname, "/", udev_device_get_id_filename(dev), NULL);
+        dirname = path_join(NULL, "/run/udev/links/", name_enc);
+        if (!dirname)
+                return log_oom();
+        filename = path_join(NULL, dirname, id_filename);
+        if (!filename)
+                return log_oom();
+
+        if (!add) {
+                if (unlink(filename) == 0)
+                        (void) rmdir(dirname);
+        } else
+                for (;;) {
+                        _cleanup_close_ int fd = -1;
+
+                        r = mkdir_parents(filename, 0755);
+                        if (!IN_SET(r, 0, -ENOENT))
+                                return r;
 
-        if (!add && unlink(filename) == 0)
-                rmdir(dirname);
+                        fd = open(filename, O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC|O_NOFOLLOW, 0444);
+                        if (fd >= 0)
+                                break;
+                        if (errno != ENOENT)
+                                return -errno;
+                }
 
-        target = link_find_prioritized(dev, add, dirname, buf, sizeof(buf));
-        if (target == NULL) {
-                log_debug("no reference left, remove '%s'", slink);
-                if (unlink(slink) == 0)
-                        rmdir_parents(slink, "/");
-        } else {
-                log_debug("creating link '%s' to '%s'", slink, target);
-                node_symlink(dev, target, slink);
-        }
+        /* If the database entry is not written yet we will just do one iteration and possibly wrong symlink
+         * will be fixed in the second invocation. */
+        (void) sd_device_get_is_initialized(dev, &r);
+        retries = r > 0 ? LINK_UPDATE_MAX_RETRIES : 1;
 
-        if (add) {
-                int err;
+        for (i = 0; i < retries; i++) {
+                _cleanup_free_ char *target = NULL;
+                struct stat st1 = {}, st2 = {};
 
-                do {
-                        int fd;
+                r = stat(dirname, &st1);
+                if (r < 0 && errno != ENOENT)
+                        return -errno;
 
-                        err = mkdir_parents(filename, 0755);
-                        if (!IN_SET(err, 0, -ENOENT))
+                r = link_find_prioritized(dev, add, dirname, &target);
+                if (r == -ENOENT) {
+                        log_debug("No reference left, removing '%s'", slink);
+                        if (unlink(slink) == 0)
+                                (void) rmdir_parents(slink, "/");
+
+                        break;
+                } else if (r < 0)
+                        return log_error_errno(r, "Failed to determine highest priority symlink: %m");
+
+                r = node_symlink(dev, target, slink);
+                if (r < 0) {
+                        (void) unlink(filename);
+                        break;
+                } else if (r == 1)
+                        /* We have replaced already existing symlink, possibly there is some other device trying
+                         * to claim the same symlink. Let's do one more iteration to give us a chance to fix
+                         * the error if other device actually claims the symlink with higher priority. */
+                        continue;
+
+               /* Skip the second stat() if the first failed, stat_inode_unmodified() would return false regardless. */
+                if ((st1.st_mode & S_IFMT) != 0) {
+                        r = stat(dirname, &st2);
+                        if (r < 0 && errno != ENOENT)
+                                return -errno;
+
+                        if (stat_inode_unmodified(&st1, &st2))
                                 break;
-                        fd = open(filename, O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC|O_NOFOLLOW, 0444);
-                        if (fd >= 0)
-                                close(fd);
-                        else
-                                err = -errno;
-                } while (err == -ENOENT);
+                }
         }
+
+        return i < LINK_UPDATE_MAX_RETRIES ? 0 : -ELOOP;
 }
 
 void udev_node_update_old_links(struct udev_device *dev, struct udev_device *dev_old) {
@@ -233,7 +327,7 @@ void udev_node_update_old_links(struct udev_device *dev, struct udev_device *dev
 
                 log_debug("update old name, '%s' no longer belonging to '%s'",
                      name, udev_device_get_devpath(dev));
-                link_update(dev, name, false);
+                link_update(dev->device, name, false);
         }
 }
 
@@ -338,11 +432,16 @@ void udev_node_add(struct udev_device *dev, bool apply,
         xsprintf_dev_num_path(filename,
                               streq(udev_device_get_subsystem(dev), "block") ? "block" : "char",
                               udev_device_get_devnum(dev));
-        node_symlink(dev, udev_device_get_devnode(dev), filename);
+        node_symlink(dev->device, udev_device_get_devnode(dev), filename);
 
         /* create/update symlinks, add symlinks to name index */
-        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev))
-                        link_update(dev, udev_list_entry_get_name(list_entry), true);
+        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev)) {
+                int r;
+
+                r = link_update(dev->device, udev_list_entry_get_name(list_entry), true);
+                if (r < 0)
+                        log_info_errno(r, "Failed to update device symlinks: %m");
+        }
 }
 
 void udev_node_remove(struct udev_device *dev) {
@@ -350,8 +449,13 @@ void udev_node_remove(struct udev_device *dev) {
         char filename[DEV_NUM_PATH_MAX];
 
         /* remove/update symlinks, remove symlinks from name index */
-        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev))
-                link_update(dev, udev_list_entry_get_name(list_entry), false);
+        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev)) {
+                int r;
+
+                r = link_update(dev->device, udev_list_entry_get_name(list_entry), false);
+                if (r < 0)
+                        log_info_errno(r, "Failed to update device symlinks: %m");
+        }
 
         /* remove /dev/{block,char}/$major:$minor */
         xsprintf_dev_num_path(filename,