bd1529
From 1d5f966c1758eb620755fcae54abd07a1ac36d3d Mon Sep 17 00:00:00 2001
bd1529
From: Michal Sekletar <msekleta@redhat.com>
bd1529
Date: Wed, 6 Jan 2021 11:43:50 +0100
bd1529
Subject: [PATCH] udev: make algorithm that selects highest priority devlink
bd1529
 less susceptible to race conditions
bd1529
bd1529
Previously it was very likely, when multiple contenders for the symlink
bd1529
appear in parallel, that algorithm would select wrong symlink (i.e. one
bd1529
with lower-priority).
bd1529
bd1529
Now the algorithm is much more defensive and when we detect change in
bd1529
set of contenders for the symlink we reevaluate the selection. Same
bd1529
happens when new symlink replaces already existing symlink that points
bd1529
to different device node.
bd1529
bd1529
Resolves: #1642728
bd1529
---
bd1529
 src/udev/udev-event.c |  71 +++++++-----
bd1529
 src/udev/udev-node.c  | 244 ++++++++++++++++++++++++++++++------------
bd1529
 2 files changed, 216 insertions(+), 99 deletions(-)
bd1529
bd1529
diff --git a/src/udev/udev-event.c b/src/udev/udev-event.c
bd1529
index fd8406d959..9004634f65 100644
bd1529
--- a/src/udev/udev-event.c
bd1529
+++ b/src/udev/udev-event.c
bd1529
@@ -833,6 +833,41 @@ static int rename_netif(struct udev_event *event) {
bd1529
         return 0;
bd1529
 }
bd1529
 
bd1529
+static void update_devnode(struct udev_event *event) {
bd1529
+        struct udev_device *dev = event->dev;
bd1529
+
bd1529
+        if (major(udev_device_get_devnum(dev)) > 0) {
bd1529
+                bool apply;
bd1529
+
bd1529
+                /* remove/update possible left-over symlinks from old database entry */
bd1529
+                if (event->dev_db != NULL)
bd1529
+                        udev_node_update_old_links(dev, event->dev_db);
bd1529
+
bd1529
+                if (!event->owner_set)
bd1529
+                        event->uid = udev_device_get_devnode_uid(dev);
bd1529
+
bd1529
+                if (!event->group_set)
bd1529
+                        event->gid = udev_device_get_devnode_gid(dev);
bd1529
+
bd1529
+                if (!event->mode_set) {
bd1529
+                        if (udev_device_get_devnode_mode(dev) > 0) {
bd1529
+                                /* kernel supplied value */
bd1529
+                                event->mode = udev_device_get_devnode_mode(dev);
bd1529
+                        } else if (event->gid > 0) {
bd1529
+                                /* default 0660 if a group is assigned */
bd1529
+                                event->mode = 0660;
bd1529
+                        }
bd1529
+                        else {
bd1529
+                                /* default 0600 */
bd1529
+                                event->mode = 0600;
bd1529
+                        }
bd1529
+                }
bd1529
+
bd1529
+                apply = streq(udev_device_get_action(dev), "add") || event->owner_set || event->group_set || event->mode_set;
bd1529
+                udev_node_add(dev, apply, event->mode, event->uid, event->gid, &event->seclabel_list);
bd1529
+        }
bd1529
+}
bd1529
+
bd1529
 void udev_event_execute_rules(struct udev_event *event,
bd1529
                               usec_t timeout_usec, usec_t timeout_warn_usec,
bd1529
                               struct udev_list *properties_list,
bd1529
@@ -891,35 +926,7 @@ void udev_event_execute_rules(struct udev_event *event,
bd1529
                         }
bd1529
                 }
bd1529
 
bd1529
-                if (major(udev_device_get_devnum(dev)) > 0) {
bd1529
-                        bool apply;
bd1529
-
bd1529
-                        /* remove/update possible left-over symlinks from old database entry */
bd1529
-                        if (event->dev_db != NULL)
bd1529
-                                udev_node_update_old_links(dev, event->dev_db);
bd1529
-
bd1529
-                        if (!event->owner_set)
bd1529
-                                event->uid = udev_device_get_devnode_uid(dev);
bd1529
-
bd1529
-                        if (!event->group_set)
bd1529
-                                event->gid = udev_device_get_devnode_gid(dev);
bd1529
-
bd1529
-                        if (!event->mode_set) {
bd1529
-                                if (udev_device_get_devnode_mode(dev) > 0) {
bd1529
-                                        /* kernel supplied value */
bd1529
-                                        event->mode = udev_device_get_devnode_mode(dev);
bd1529
-                                } else if (event->gid > 0) {
bd1529
-                                        /* default 0660 if a group is assigned */
bd1529
-                                        event->mode = 0660;
bd1529
-                                } else {
bd1529
-                                        /* default 0600 */
bd1529
-                                        event->mode = 0600;
bd1529
-                                }
bd1529
-                        }
bd1529
-
bd1529
-                        apply = streq(udev_device_get_action(dev), "add") || event->owner_set || event->group_set || event->mode_set;
bd1529
-                        udev_node_add(dev, apply, event->mode, event->uid, event->gid, &event->seclabel_list);
bd1529
-                }
bd1529
+                update_devnode(event);
bd1529
 
bd1529
                 /* preserve old, or get new initialization timestamp */
bd1529
                 udev_device_ensure_usec_initialized(event->dev, event->dev_db);
bd1529
@@ -927,6 +934,12 @@ void udev_event_execute_rules(struct udev_event *event,
bd1529
                 /* (re)write database file */
bd1529
                 udev_device_tag_index(dev, event->dev_db, true);
bd1529
                 udev_device_update_db(dev);
bd1529
+
bd1529
+                /* Yes, we run update_devnode() twice, because in the first invocation, that is before update of udev database,
bd1529
+                 * it could happen that two contenders are replacing each other's symlink. Hence we run it again to make sure
bd1529
+                 * symlinks point to devices that claim them with the highest priority. */
bd1529
+                update_devnode(event);
bd1529
+
bd1529
                 udev_device_set_is_initialized(dev);
bd1529
 
bd1529
                 event->dev_db = udev_device_unref(event->dev_db);
bd1529
diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c
bd1529
index 333dcae6b9..2eeeccdd3a 100644
bd1529
--- a/src/udev/udev-node.c
bd1529
+++ b/src/udev/udev-node.c
bd1529
@@ -13,19 +13,27 @@
bd1529
 #include <unistd.h>
bd1529
 
bd1529
 #include "device-nodes.h"
bd1529
+#include "device-private.h"
bd1529
 #include "dirent-util.h"
bd1529
+#include "fd-util.h"
bd1529
 #include "format-util.h"
bd1529
 #include "fs-util.h"
bd1529
+#include "sd-device.h"
bd1529
 #include "selinux-util.h"
bd1529
 #include "smack-util.h"
bd1529
+#include "stat-util.h"
bd1529
 #include "stdio-util.h"
bd1529
 #include "string-util.h"
bd1529
 #include "udev.h"
bd1529
+#include "libudev-device-internal.h"
bd1529
 
bd1529
-static int node_symlink(struct udev_device *dev, const char *node, const char *slink) {
bd1529
+#define LINK_UPDATE_MAX_RETRIES 128
bd1529
+
bd1529
+static int node_symlink(sd_device *dev, const char *node, const char *slink) {
bd1529
         struct stat stats;
bd1529
         char target[UTIL_PATH_SIZE];
bd1529
         char *s;
bd1529
+        const char *id_filename;
bd1529
         size_t l;
bd1529
         char slink_tmp[UTIL_PATH_SIZE + 32];
bd1529
         int i = 0;
bd1529
@@ -89,7 +97,10 @@ static int node_symlink(struct udev_device *dev, const char *node, const char *s
bd1529
         }
bd1529
 
bd1529
         log_debug("atomically replace '%s'", slink);
bd1529
-        strscpyl(slink_tmp, sizeof(slink_tmp), slink, ".tmp-", udev_device_get_id_filename(dev), NULL);
bd1529
+        err = device_get_id_filename(dev, &id_filename);
bd1529
+        if (err < 0)
bd1529
+                return log_error_errno(err, "Failed to get id_filename: %m");
bd1529
+        strscpyl(slink_tmp, sizeof(slink_tmp), slink, ".tmp-", id_filename, NULL);
bd1529
         unlink(slink_tmp);
bd1529
         do {
bd1529
                 err = mkdir_parents_label(slink_tmp, 0755);
bd1529
@@ -109,104 +120,187 @@ static int node_symlink(struct udev_device *dev, const char *node, const char *s
bd1529
         if (err != 0) {
bd1529
                 log_error_errno(errno, "rename '%s' '%s' failed: %m", slink_tmp, slink);
bd1529
                 unlink(slink_tmp);
bd1529
-        }
bd1529
+        } else
bd1529
+                /* Tell caller that we replaced already existing symlink. */
bd1529
+                return 1;
bd1529
 exit:
bd1529
         return err;
bd1529
 }
bd1529
 
bd1529
 /* find device node of device with highest priority */
bd1529
-static const char *link_find_prioritized(struct udev_device *dev, bool add, const char *stackdir, char *buf, size_t bufsize) {
bd1529
-        struct udev *udev = udev_device_get_udev(dev);
bd1529
-        DIR *dir;
bd1529
+static int link_find_prioritized(sd_device *dev, bool add, const char *stackdir, char **ret) {
bd1529
+        _cleanup_closedir_ DIR *dir = NULL;
bd1529
+        _cleanup_free_ char *target = NULL;
bd1529
         struct dirent *dent;
bd1529
-        int priority = 0;
bd1529
-        const char *target = NULL;
bd1529
+        int r, priority = 0;
bd1529
+
bd1529
+        assert(!add || dev);
bd1529
+        assert(stackdir);
bd1529
+        assert(ret);
bd1529
 
bd1529
         if (add) {
bd1529
-                priority = udev_device_get_devlink_priority(dev);
bd1529
-                strscpy(buf, bufsize, udev_device_get_devnode(dev));
bd1529
-                target = buf;
bd1529
+                const char *devnode;
bd1529
+
bd1529
+                r = device_get_devlink_priority(dev, &priority);
bd1529
+                if (r < 0)
bd1529
+                        return r;
bd1529
+
bd1529
+                r = sd_device_get_devname(dev, &devnode);
bd1529
+                if (r < 0)
bd1529
+                        return r;
bd1529
+
bd1529
+                target = strdup(devnode);
bd1529
+                if (!target)
bd1529
+                        return -ENOMEM;
bd1529
         }
bd1529
 
bd1529
         dir = opendir(stackdir);
bd1529
-        if (dir == NULL)
bd1529
-                return target;
bd1529
+        if (!dir) {
bd1529
+                if (target) {
bd1529
+                        *ret = TAKE_PTR(target);
bd1529
+                        return 0;
bd1529
+                }
bd1529
+
bd1529
+                return -errno;
bd1529
+        }
bd1529
+
bd1529
         FOREACH_DIRENT_ALL(dent, dir, break) {
bd1529
-                struct udev_device *dev_db;
bd1529
+                _cleanup_(sd_device_unrefp) sd_device *dev_db = NULL;
bd1529
+                const char *devnode, *id_filename;
bd1529
+                int db_prio = 0;
bd1529
 
bd1529
                 if (dent->d_name[0] == '\0')
bd1529
                         break;
bd1529
                 if (dent->d_name[0] == '.')
bd1529
                         continue;
bd1529
 
bd1529
-                log_debug("found '%s' claiming '%s'", dent->d_name, stackdir);
bd1529
+                log_debug("Found '%s' claiming '%s'", dent->d_name, stackdir);
bd1529
+
bd1529
+                if (device_get_id_filename(dev, &id_filename) < 0)
bd1529
+                        continue;
bd1529
 
bd1529
                 /* did we find ourself? */
bd1529
-                if (streq(dent->d_name, udev_device_get_id_filename(dev)))
bd1529
+                if (streq(dent->d_name, id_filename))
bd1529
                         continue;
bd1529
 
bd1529
-                dev_db = udev_device_new_from_device_id(udev, dent->d_name);
bd1529
-                if (dev_db != NULL) {
bd1529
-                        const char *devnode;
bd1529
-
bd1529
-                        devnode = udev_device_get_devnode(dev_db);
bd1529
-                        if (devnode != NULL) {
bd1529
-                                if (target == NULL || udev_device_get_devlink_priority(dev_db) > priority) {
bd1529
-                                        log_debug("'%s' claims priority %i for '%s'",
bd1529
-                                                  udev_device_get_syspath(dev_db), udev_device_get_devlink_priority(dev_db), stackdir);
bd1529
-                                        priority = udev_device_get_devlink_priority(dev_db);
bd1529
-                                        strscpy(buf, bufsize, devnode);
bd1529
-                                        target = buf;
bd1529
-                                }
bd1529
-                        }
bd1529
-                        udev_device_unref(dev_db);
bd1529
+                if (sd_device_new_from_device_id(&dev_db, dent->d_name) < 0)
bd1529
+                        continue;
bd1529
+
bd1529
+                if (sd_device_get_devname(dev_db, &devnode) < 0)
bd1529
+                        continue;
bd1529
+
bd1529
+                if (device_get_devlink_priority(dev_db, &db_prio) < 0)
bd1529
+                        continue;
bd1529
+
bd1529
+                if (target && db_prio <= priority)
bd1529
+                        continue;
bd1529
+
bd1529
+                if (DEBUG_LOGGING) {
bd1529
+                        const char *syspath = NULL;
bd1529
+
bd1529
+                        (void) sd_device_get_syspath(dev_db, &syspath);
bd1529
+                        log_debug("Device '%s' claims priority %i for '%s'", strnull(syspath), db_prio, stackdir);
bd1529
                 }
bd1529
+
bd1529
+                r = free_and_strdup(&target, devnode);
bd1529
+                if (r < 0)
bd1529
+                        return r;
bd1529
+                priority = db_prio;
bd1529
         }
bd1529
-        closedir(dir);
bd1529
-        return target;
bd1529
+
bd1529
+        if (!target)
bd1529
+                return -ENOENT;
bd1529
+
bd1529
+        *ret = TAKE_PTR(target);
bd1529
+        return 0;
bd1529
 }
bd1529
 
bd1529
+
bd1529
 /* manage "stack of names" with possibly specified device priorities */
bd1529
-static void link_update(struct udev_device *dev, const char *slink, bool add) {
bd1529
-        char name_enc[UTIL_PATH_SIZE];
bd1529
-        char filename[UTIL_PATH_SIZE * 2];
bd1529
-        char dirname[UTIL_PATH_SIZE];
bd1529
-        const char *target;
bd1529
-        char buf[UTIL_PATH_SIZE];
bd1529
+static int link_update(sd_device *dev, const char *slink, bool add) {
bd1529
+        _cleanup_free_ char *filename = NULL, *dirname = NULL;
bd1529
+        char name_enc[PATH_MAX];
bd1529
+        const char *id_filename;
bd1529
+        int i, r, retries;
bd1529
+
bd1529
+        assert(dev);
bd1529
+        assert(slink);
bd1529
+
bd1529
+        r = device_get_id_filename(dev, &id_filename);
bd1529
+        if (r < 0)
bd1529
+                return log_debug_errno(r, "Failed to get id_filename: %m");
bd1529
 
bd1529
         util_path_encode(slink + STRLEN("/dev"), name_enc, sizeof(name_enc));
bd1529
-        strscpyl(dirname, sizeof(dirname), "/run/udev/links/", name_enc, NULL);
bd1529
-        strscpyl(filename, sizeof(filename), dirname, "/", udev_device_get_id_filename(dev), NULL);
bd1529
+        dirname = path_join(NULL, "/run/udev/links/", name_enc);
bd1529
+        if (!dirname)
bd1529
+                return log_oom();
bd1529
+        filename = path_join(NULL, dirname, id_filename);
bd1529
+        if (!filename)
bd1529
+                return log_oom();
bd1529
+
bd1529
+        if (!add) {
bd1529
+                if (unlink(filename) == 0)
bd1529
+                        (void) rmdir(dirname);
bd1529
+        } else
bd1529
+                for (;;) {
bd1529
+                        _cleanup_close_ int fd = -1;
bd1529
+
bd1529
+                        r = mkdir_parents(filename, 0755);
bd1529
+                        if (!IN_SET(r, 0, -ENOENT))
bd1529
+                                return r;
bd1529
 
bd1529
-        if (!add && unlink(filename) == 0)
bd1529
-                rmdir(dirname);
bd1529
+                        fd = open(filename, O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC|O_NOFOLLOW, 0444);
bd1529
+                        if (fd >= 0)
bd1529
+                                break;
bd1529
+                        if (errno != ENOENT)
bd1529
+                                return -errno;
bd1529
+                }
bd1529
 
bd1529
-        target = link_find_prioritized(dev, add, dirname, buf, sizeof(buf));
bd1529
-        if (target == NULL) {
bd1529
-                log_debug("no reference left, remove '%s'", slink);
bd1529
-                if (unlink(slink) == 0)
bd1529
-                        rmdir_parents(slink, "/");
bd1529
-        } else {
bd1529
-                log_debug("creating link '%s' to '%s'", slink, target);
bd1529
-                node_symlink(dev, target, slink);
bd1529
-        }
bd1529
+        /* If the database entry is not written yet we will just do one iteration and possibly wrong symlink
bd1529
+         * will be fixed in the second invocation. */
bd1529
+        (void) sd_device_get_is_initialized(dev, &r);
bd1529
+        retries = r > 0 ? LINK_UPDATE_MAX_RETRIES : 1;
bd1529
 
bd1529
-        if (add) {
bd1529
-                int err;
bd1529
+        for (i = 0; i < retries; i++) {
bd1529
+                _cleanup_free_ char *target = NULL;
bd1529
+                struct stat st1 = {}, st2 = {};
bd1529
 
bd1529
-                do {
bd1529
-                        int fd;
bd1529
+                r = stat(dirname, &st1;;
bd1529
+                if (r < 0 && errno != ENOENT)
bd1529
+                        return -errno;
bd1529
 
bd1529
-                        err = mkdir_parents(filename, 0755);
bd1529
-                        if (!IN_SET(err, 0, -ENOENT))
bd1529
+                r = link_find_prioritized(dev, add, dirname, &target);
bd1529
+                if (r == -ENOENT) {
bd1529
+                        log_debug("No reference left, removing '%s'", slink);
bd1529
+                        if (unlink(slink) == 0)
bd1529
+                                (void) rmdir_parents(slink, "/");
bd1529
+
bd1529
+                        break;
bd1529
+                } else if (r < 0)
bd1529
+                        return log_error_errno(r, "Failed to determine highest priority symlink: %m");
bd1529
+
bd1529
+                r = node_symlink(dev, target, slink);
bd1529
+                if (r < 0) {
bd1529
+                        (void) unlink(filename);
bd1529
+                        break;
bd1529
+                } else if (r == 1)
bd1529
+                        /* We have replaced already existing symlink, possibly there is some other device trying
bd1529
+                         * to claim the same symlink. Let's do one more iteration to give us a chance to fix
bd1529
+                         * the error if other device actually claims the symlink with higher priority. */
bd1529
+                        continue;
bd1529
+
bd1529
+               /* Skip the second stat() if the first failed, stat_inode_unmodified() would return false regardless. */
bd1529
+                if ((st1.st_mode & S_IFMT) != 0) {
bd1529
+                        r = stat(dirname, &st2;;
bd1529
+                        if (r < 0 && errno != ENOENT)
bd1529
+                                return -errno;
bd1529
+
bd1529
+                        if (stat_inode_unmodified(&st1, &st2))
bd1529
                                 break;
bd1529
-                        fd = open(filename, O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC|O_NOFOLLOW, 0444);
bd1529
-                        if (fd >= 0)
bd1529
-                                close(fd);
bd1529
-                        else
bd1529
-                                err = -errno;
bd1529
-                } while (err == -ENOENT);
bd1529
+                }
bd1529
         }
bd1529
+
bd1529
+        return i < LINK_UPDATE_MAX_RETRIES ? 0 : -ELOOP;
bd1529
 }
bd1529
 
bd1529
 void udev_node_update_old_links(struct udev_device *dev, struct udev_device *dev_old) {
bd1529
@@ -233,7 +327,7 @@ void udev_node_update_old_links(struct udev_device *dev, struct udev_device *dev
bd1529
 
bd1529
                 log_debug("update old name, '%s' no longer belonging to '%s'",
bd1529
                      name, udev_device_get_devpath(dev));
bd1529
-                link_update(dev, name, false);
bd1529
+                link_update(dev->device, name, false);
bd1529
         }
bd1529
 }
bd1529
 
bd1529
@@ -338,11 +432,16 @@ void udev_node_add(struct udev_device *dev, bool apply,
bd1529
         xsprintf_dev_num_path(filename,
bd1529
                               streq(udev_device_get_subsystem(dev), "block") ? "block" : "char",
bd1529
                               udev_device_get_devnum(dev));
bd1529
-        node_symlink(dev, udev_device_get_devnode(dev), filename);
bd1529
+        node_symlink(dev->device, udev_device_get_devnode(dev), filename);
bd1529
 
bd1529
         /* create/update symlinks, add symlinks to name index */
bd1529
-        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev))
bd1529
-                        link_update(dev, udev_list_entry_get_name(list_entry), true);
bd1529
+        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev)) {
bd1529
+                int r;
bd1529
+
bd1529
+                r = link_update(dev->device, udev_list_entry_get_name(list_entry), true);
bd1529
+                if (r < 0)
bd1529
+                        log_info_errno(r, "Failed to update device symlinks: %m");
bd1529
+        }
bd1529
 }
bd1529
 
bd1529
 void udev_node_remove(struct udev_device *dev) {
bd1529
@@ -350,8 +449,13 @@ void udev_node_remove(struct udev_device *dev) {
bd1529
         char filename[DEV_NUM_PATH_MAX];
bd1529
 
bd1529
         /* remove/update symlinks, remove symlinks from name index */
bd1529
-        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev))
bd1529
-                link_update(dev, udev_list_entry_get_name(list_entry), false);
bd1529
+        udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev)) {
bd1529
+                int r;
bd1529
+
bd1529
+                r = link_update(dev->device, udev_list_entry_get_name(list_entry), false);
bd1529
+                if (r < 0)
bd1529
+                        log_info_errno(r, "Failed to update device symlinks: %m");
bd1529
+        }
bd1529
 
bd1529
         /* remove /dev/{block,char}/$major:$minor */
bd1529
         xsprintf_dev_num_path(filename,