ac3a84
From 4d081a18d324e2010b6c46d468f693b1186c4275 Mon Sep 17 00:00:00 2001
ac3a84
From: Lennart Poettering <lennart@poettering.net>
ac3a84
Date: Wed, 30 Nov 2022 17:17:20 +0100
ac3a84
Subject: [PATCH] dissect: rework DISSECT_IMAGE_ADD_PARTITION_DEVICES +
ac3a84
 DISSECT_IMAGE_OPEN_PARTITION_DEVICES
ac3a84
ac3a84
Curently, these two flags were implied by dissect_loop_device(), but
ac3a84
that's not right, because this means systemd-gpt-auto-generator will
ac3a84
dissect the root block device with these flags set and that's not
ac3a84
desirable: the generator should not cause the partition devices to be
ac3a84
created (we don't intend to use them right-away after all, but expect
ac3a84
udev to find/probe them first, and then mount them though .mount units).
ac3a84
And there's no point in opening the partition devices, since we do not
ac3a84
intend to mount them via fds either.
ac3a84
ac3a84
Hence, rework this: instead of implying the flags, specify them
ac3a84
explicitly.
ac3a84
ac3a84
While we are at it, let's also rename the flags to make them more
ac3a84
descriptive:
ac3a84
ac3a84
DISSECT_IMAGE_MANAGE_PARTITION_DEVICES becomes
ac3a84
DISSECT_IMAGE_ADD_PARTITION_DEVICES, since that's really all this does:
ac3a84
add the partition devices via BLKPG.
ac3a84
ac3a84
DISSECT_IMAGE_OPEN_PARTITION_DEVICES becomes
ac3a84
DISSECT_IMAGE_PIN_PARTITION_DEVICES, since we not only open the devices,
ac3a84
but keep the devices open continously (i.e. we "pin" them).
ac3a84
ac3a84
Also, drop the DISSECT_IMAGE_BLOCK_DEVICE combination flag, since it is
ac3a84
misleading, i.e. it suggests it was appropriate to specify on all
ac3a84
dissected blocking devices, but that's precisely not the case, see the
ac3a84
systemd-gpt-auto-generator case. My guess is that the confusion around
ac3a84
this was actually the cause for this bug we are addressing here.
ac3a84
ac3a84
Fixes: #25528
ac3a84
(cherry picked from commit 73d88b806b92efa0738bb6bcccbf105441f6d8cb)
ac3a84
ac3a84
Related: #2138081
ac3a84
---
ac3a84
 src/core/namespace.c                        |  4 +++-
ac3a84
 src/dissect/dissect.c                       |  4 +++-
ac3a84
 src/gpt-auto-generator/gpt-auto-generator.c |  5 +++++
ac3a84
 src/nspawn/nspawn.c                         |  4 +++-
ac3a84
 src/portable/portable.c                     |  4 +++-
ac3a84
 src/shared/discover-image.c                 |  4 +++-
ac3a84
 src/shared/dissect-image.c                  | 23 +++++++++++++--------
ac3a84
 src/shared/dissect-image.h                  |  6 ++----
ac3a84
 src/sysext/sysext.c                         |  4 +++-
ac3a84
 src/test/test-loop-block.c                  |  6 +++---
ac3a84
 10 files changed, 42 insertions(+), 22 deletions(-)
ac3a84
ac3a84
diff --git a/src/core/namespace.c b/src/core/namespace.c
ac3a84
index 852be3bdde..96b05303eb 100644
ac3a84
--- a/src/core/namespace.c
ac3a84
+++ b/src/core/namespace.c
ac3a84
@@ -2051,7 +2051,9 @@ int setup_namespace(
ac3a84
                 DISSECT_IMAGE_RELAX_VAR_CHECK |
ac3a84
                 DISSECT_IMAGE_FSCK |
ac3a84
                 DISSECT_IMAGE_USR_NO_ROOT |
ac3a84
-                DISSECT_IMAGE_GROWFS;
ac3a84
+                DISSECT_IMAGE_GROWFS |
ac3a84
+                DISSECT_IMAGE_ADD_PARTITION_DEVICES |
ac3a84
+                DISSECT_IMAGE_PIN_PARTITION_DEVICES;
ac3a84
         size_t n_mounts;
ac3a84
         int r;
ac3a84
 
ac3a84
diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c
ac3a84
index c465115fc7..c1d731dc82 100644
ac3a84
--- a/src/dissect/dissect.c
ac3a84
+++ b/src/dissect/dissect.c
ac3a84
@@ -60,7 +60,9 @@ static DissectImageFlags arg_flags =
ac3a84
         DISSECT_IMAGE_RELAX_VAR_CHECK |
ac3a84
         DISSECT_IMAGE_FSCK |
ac3a84
         DISSECT_IMAGE_USR_NO_ROOT |
ac3a84
-        DISSECT_IMAGE_GROWFS;
ac3a84
+        DISSECT_IMAGE_GROWFS |
ac3a84
+        DISSECT_IMAGE_PIN_PARTITION_DEVICES |
ac3a84
+        DISSECT_IMAGE_ADD_PARTITION_DEVICES;
ac3a84
 static VeritySettings arg_verity_settings = VERITY_SETTINGS_DEFAULT;
ac3a84
 static JsonFormatFlags arg_json_format_flags = JSON_FORMAT_OFF;
ac3a84
 static PagerFlags arg_pager_flags = 0;
ac3a84
diff --git a/src/gpt-auto-generator/gpt-auto-generator.c b/src/gpt-auto-generator/gpt-auto-generator.c
ac3a84
index 0fb53bb9ea..143faa0c39 100644
ac3a84
--- a/src/gpt-auto-generator/gpt-auto-generator.c
ac3a84
+++ b/src/gpt-auto-generator/gpt-auto-generator.c
ac3a84
@@ -665,6 +665,11 @@ static int enumerate_partitions(dev_t devnum) {
ac3a84
                         NULL, NULL,
ac3a84
                         DISSECT_IMAGE_GPT_ONLY|
ac3a84
                         DISSECT_IMAGE_USR_NO_ROOT,
ac3a84
+                        /* NB! Unlike most other places where we dissect block devices we do not use
ac3a84
+                         * DISSECT_IMAGE_ADD_PARTITION_DEVICES here: we want that the kernel finds the
ac3a84
+                         * devices, and udev probes them before we mount them via .mount units much later
ac3a84
+                         * on. And thus we also don't set DISSECT_IMAGE_PIN_PARTITION_DEVICES here, because
ac3a84
+                         * we don't actually mount anything immediately. */
ac3a84
                         &m);
ac3a84
         if (r == -ENOPKG) {
ac3a84
                 log_debug_errno(r, "No suitable partition table found, ignoring.");
ac3a84
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
ac3a84
index 93d646ed56..57723aa3cf 100644
ac3a84
--- a/src/nspawn/nspawn.c
ac3a84
+++ b/src/nspawn/nspawn.c
ac3a84
@@ -5657,7 +5657,9 @@ static int run(int argc, char *argv[]) {
ac3a84
                         DISSECT_IMAGE_GENERIC_ROOT |
ac3a84
                         DISSECT_IMAGE_REQUIRE_ROOT |
ac3a84
                         DISSECT_IMAGE_RELAX_VAR_CHECK |
ac3a84
-                        DISSECT_IMAGE_USR_NO_ROOT;
ac3a84
+                        DISSECT_IMAGE_USR_NO_ROOT |
ac3a84
+                        DISSECT_IMAGE_ADD_PARTITION_DEVICES |
ac3a84
+                        DISSECT_IMAGE_PIN_PARTITION_DEVICES;
ac3a84
                 assert(arg_image);
ac3a84
                 assert(!arg_template);
ac3a84
 
ac3a84
diff --git a/src/portable/portable.c b/src/portable/portable.c
ac3a84
index fbc4497014..570751f05b 100644
ac3a84
--- a/src/portable/portable.c
ac3a84
+++ b/src/portable/portable.c
ac3a84
@@ -375,7 +375,9 @@ static int portable_extract_by_path(
ac3a84
                                 DISSECT_IMAGE_REQUIRE_ROOT |
ac3a84
                                 DISSECT_IMAGE_DISCARD_ON_LOOP |
ac3a84
                                 DISSECT_IMAGE_RELAX_VAR_CHECK |
ac3a84
-                                DISSECT_IMAGE_USR_NO_ROOT,
ac3a84
+                                DISSECT_IMAGE_USR_NO_ROOT |
ac3a84
+                                DISSECT_IMAGE_ADD_PARTITION_DEVICES |
ac3a84
+                                DISSECT_IMAGE_PIN_PARTITION_DEVICES,
ac3a84
                                 &m);
ac3a84
                 if (r == -ENOPKG)
ac3a84
                         sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Couldn't identify a suitable partition table or file system in '%s'.", path);
ac3a84
diff --git a/src/shared/discover-image.c b/src/shared/discover-image.c
ac3a84
index fad95f7f43..5d740de266 100644
ac3a84
--- a/src/shared/discover-image.c
ac3a84
+++ b/src/shared/discover-image.c
ac3a84
@@ -1203,7 +1203,9 @@ int image_read_metadata(Image *i) {
ac3a84
                                 DISSECT_IMAGE_REQUIRE_ROOT |
ac3a84
                                 DISSECT_IMAGE_RELAX_VAR_CHECK |
ac3a84
                                 DISSECT_IMAGE_READ_ONLY |
ac3a84
-                                DISSECT_IMAGE_USR_NO_ROOT,
ac3a84
+                                DISSECT_IMAGE_USR_NO_ROOT |
ac3a84
+                                DISSECT_IMAGE_ADD_PARTITION_DEVICES |
ac3a84
+                                DISSECT_IMAGE_PIN_PARTITION_DEVICES,
ac3a84
                                 &m);
ac3a84
                 if (r < 0)
ac3a84
                         return r;
ac3a84
diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c
ac3a84
index 7676636723..462ee4b3e8 100644
ac3a84
--- a/src/shared/dissect-image.c
ac3a84
+++ b/src/shared/dissect-image.c
ac3a84
@@ -436,7 +436,7 @@ static int dissect_image(
ac3a84
                         const char *fstype = NULL, *options = NULL;
ac3a84
                         _cleanup_close_ int mount_node_fd = -1;
ac3a84
 
ac3a84
-                        if (FLAGS_SET(flags, DISSECT_IMAGE_OPEN_PARTITION_DEVICES)) {
ac3a84
+                        if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES)) {
ac3a84
                                 mount_node_fd = open_partition(devname, /* is_partition = */ false, m->loop);
ac3a84
                                 if (mount_node_fd < 0)
ac3a84
                                         return mount_node_fd;
ac3a84
@@ -505,7 +505,7 @@ static int dissect_image(
ac3a84
         if (verity && verity->data_path)
ac3a84
                 return -EBADR;
ac3a84
 
ac3a84
-        if (FLAGS_SET(flags, DISSECT_IMAGE_MANAGE_PARTITION_DEVICES)) {
ac3a84
+        if (FLAGS_SET(flags, DISSECT_IMAGE_ADD_PARTITION_DEVICES)) {
ac3a84
                 /* Safety check: refuse block devices that carry a partition table but for which the kernel doesn't
ac3a84
                  * do partition scanning. */
ac3a84
                 r = blockdev_partscan_enabled(fd);
ac3a84
@@ -574,7 +574,7 @@ static int dissect_image(
ac3a84
                  * Kernel returns EBUSY if there's already a partition by that number or an overlapping
ac3a84
                  * partition already existent. */
ac3a84
 
ac3a84
-                if (FLAGS_SET(flags, DISSECT_IMAGE_MANAGE_PARTITION_DEVICES)) {
ac3a84
+                if (FLAGS_SET(flags, DISSECT_IMAGE_ADD_PARTITION_DEVICES)) {
ac3a84
                         r = block_device_add_partition(fd, node, nr, (uint64_t) start * 512, (uint64_t) size * 512);
ac3a84
                         if (r < 0) {
ac3a84
                                 if (r != -EBUSY)
ac3a84
@@ -871,7 +871,7 @@ static int dissect_image(
ac3a84
                                         dissected_partition_done(m->partitions + designator);
ac3a84
                                 }
ac3a84
 
ac3a84
-                                if (FLAGS_SET(flags, DISSECT_IMAGE_OPEN_PARTITION_DEVICES)) {
ac3a84
+                                if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES)) {
ac3a84
                                         mount_node_fd = open_partition(node, /* is_partition = */ true, m->loop);
ac3a84
                                         if (mount_node_fd < 0)
ac3a84
                                                 return mount_node_fd;
ac3a84
@@ -945,7 +945,7 @@ static int dissect_image(
ac3a84
                                 if (m->partitions[PARTITION_XBOOTLDR].found)
ac3a84
                                         continue;
ac3a84
 
ac3a84
-                                if (FLAGS_SET(flags, DISSECT_IMAGE_OPEN_PARTITION_DEVICES)) {
ac3a84
+                                if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES)) {
ac3a84
                                         mount_node_fd = open_partition(node, /* is_partition = */ true, m->loop);
ac3a84
                                         if (mount_node_fd < 0)
ac3a84
                                                 return mount_node_fd;
ac3a84
@@ -1127,7 +1127,7 @@ static int dissect_image(
ac3a84
                         _cleanup_free_ char *o = NULL;
ac3a84
                         const char *options;
ac3a84
 
ac3a84
-                        if (FLAGS_SET(flags, DISSECT_IMAGE_OPEN_PARTITION_DEVICES)) {
ac3a84
+                        if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES)) {
ac3a84
                                 mount_node_fd = open_partition(generic_node, /* is_partition = */ true, m->loop);
ac3a84
                                 if (mount_node_fd < 0)
ac3a84
                                         return mount_node_fd;
ac3a84
@@ -1232,7 +1232,6 @@ int dissect_image_file(
ac3a84
         int r;
ac3a84
 
ac3a84
         assert(path);
ac3a84
-        assert((flags & DISSECT_IMAGE_BLOCK_DEVICE) == 0);
ac3a84
         assert(ret);
ac3a84
 
ac3a84
         fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
ac3a84
@@ -3036,7 +3035,7 @@ int dissect_loop_device(
ac3a84
 
ac3a84
         m->loop = loop_device_ref(loop);
ac3a84
 
ac3a84
-        r = dissect_image(m, loop->fd, loop->node, verity, mount_options, flags | DISSECT_IMAGE_BLOCK_DEVICE);
ac3a84
+        r = dissect_image(m, loop->fd, loop->node, verity, mount_options, flags);
ac3a84
         if (r < 0)
ac3a84
                 return r;
ac3a84
 
ac3a84
@@ -3199,6 +3198,10 @@ int mount_image_privately_interactively(
ac3a84
         assert(ret_directory);
ac3a84
         assert(ret_loop_device);
ac3a84
 
ac3a84
+        /* We intend to mount this right-away, hence add the partitions if needed and pin them*/
ac3a84
+        flags |= DISSECT_IMAGE_ADD_PARTITION_DEVICES |
ac3a84
+                DISSECT_IMAGE_PIN_PARTITION_DEVICES;
ac3a84
+
ac3a84
         r = verity_settings_load(&verity, image, NULL, NULL);
ac3a84
         if (r < 0)
ac3a84
                 return log_error_errno(r, "Failed to load root hash data: %m");
ac3a84
@@ -3321,7 +3324,9 @@ int verity_dissect_and_mount(
ac3a84
                 return log_debug_errno(r, "Failed to load root hash: %m");
ac3a84
 
ac3a84
         dissect_image_flags = (verity.data_path ? DISSECT_IMAGE_NO_PARTITION_TABLE : 0) |
ac3a84
-                (relax_extension_release_check ? DISSECT_IMAGE_RELAX_SYSEXT_CHECK : 0);
ac3a84
+                (relax_extension_release_check ? DISSECT_IMAGE_RELAX_SYSEXT_CHECK : 0) |
ac3a84
+                DISSECT_IMAGE_ADD_PARTITION_DEVICES |
ac3a84
+                DISSECT_IMAGE_PIN_PARTITION_DEVICES;
ac3a84
 
ac3a84
         /* Note that we don't use loop_device_make here, as the FD is most likely O_PATH which would not be
ac3a84
          * accepted by LOOP_CONFIGURE, so just let loop_device_make_by_path reopen it as a regular FD. */
ac3a84
diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h
ac3a84
index f2278c4dfa..631d4c7a04 100644
ac3a84
--- a/src/shared/dissect-image.h
ac3a84
+++ b/src/shared/dissect-image.h
ac3a84
@@ -214,10 +214,8 @@ typedef enum DissectImageFlags {
ac3a84
                                                  DISSECT_IMAGE_MOUNT_READ_ONLY,
ac3a84
         DISSECT_IMAGE_GROWFS                   = 1 << 18, /* Grow file systems in partitions marked for that to the size of the partitions after mount */
ac3a84
         DISSECT_IMAGE_MOUNT_IDMAPPED           = 1 << 19, /* Mount mounts with kernel 5.12-style userns ID mapping, if file system type doesn't support uid=/gid= */
ac3a84
-        DISSECT_IMAGE_MANAGE_PARTITION_DEVICES = 1 << 20, /* Manage partition devices, e.g. probe each partition in more detail */
ac3a84
-        DISSECT_IMAGE_OPEN_PARTITION_DEVICES   = 1 << 21, /* Open dissected partitions and decrypted partitions */
ac3a84
-        DISSECT_IMAGE_BLOCK_DEVICE             = DISSECT_IMAGE_MANAGE_PARTITION_DEVICES |
ac3a84
-                                                 DISSECT_IMAGE_OPEN_PARTITION_DEVICES,
ac3a84
+        DISSECT_IMAGE_ADD_PARTITION_DEVICES    = 1 << 20, /* Create partition devices via BLKPG_ADD_PARTITION */
ac3a84
+        DISSECT_IMAGE_PIN_PARTITION_DEVICES    = 1 << 21, /* Open dissected partitions and decrypted partitions and pin them by fd */
ac3a84
         DISSECT_IMAGE_RELAX_SYSEXT_CHECK       = 1 << 22, /* Don't insist that the extension-release file name matches the image name */
ac3a84
 } DissectImageFlags;
ac3a84
 
ac3a84
diff --git a/src/sysext/sysext.c b/src/sysext/sysext.c
ac3a84
index 0875099d5f..c57293b0e5 100644
ac3a84
--- a/src/sysext/sysext.c
ac3a84
+++ b/src/sysext/sysext.c
ac3a84
@@ -520,7 +520,9 @@ static int merge_subprocess(Hashmap *images, const char *workspace) {
ac3a84
                                 DISSECT_IMAGE_GENERIC_ROOT |
ac3a84
                                 DISSECT_IMAGE_REQUIRE_ROOT |
ac3a84
                                 DISSECT_IMAGE_MOUNT_ROOT_ONLY |
ac3a84
-                                DISSECT_IMAGE_USR_NO_ROOT;
ac3a84
+                                DISSECT_IMAGE_USR_NO_ROOT |
ac3a84
+                                DISSECT_IMAGE_ADD_PARTITION_DEVICES |
ac3a84
+                                DISSECT_IMAGE_PIN_PARTITION_DEVICES;
ac3a84
 
ac3a84
                         r = verity_settings_load(&verity_settings, img->path, NULL, NULL);
ac3a84
                         if (r < 0)
ac3a84
diff --git a/src/test/test-loop-block.c b/src/test/test-loop-block.c
ac3a84
index e2b97dd56f..af2a9683a4 100644
ac3a84
--- a/src/test/test-loop-block.c
ac3a84
+++ b/src/test/test-loop-block.c
ac3a84
@@ -71,7 +71,7 @@ static void* thread_func(void *ptr) {
ac3a84
 
ac3a84
                 log_notice("Acquired loop device %s, will mount on %s", loop->node, mounted);
ac3a84
 
ac3a84
-                r = dissect_loop_device(loop, NULL, NULL, DISSECT_IMAGE_READ_ONLY, &dissected);
ac3a84
+                r = dissect_loop_device(loop, NULL, NULL, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_ADD_PARTITION_DEVICES|DISSECT_IMAGE_PIN_PARTITION_DEVICES, &dissected);
ac3a84
                 if (r < 0)
ac3a84
                         log_error_errno(r, "Failed dissect loopback device %s: %m", loop->node);
ac3a84
                 assert_se(r >= 0);
ac3a84
@@ -220,7 +220,7 @@ static int run(int argc, char *argv[]) {
ac3a84
         assert_se(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 0, LO_FLAGS_PARTSCAN, LOCK_EX, &loop) >= 0);
ac3a84
 
ac3a84
 #if HAVE_BLKID
ac3a84
-        assert_se(dissect_loop_device(loop, NULL, NULL, 0, &dissected) >= 0);
ac3a84
+        assert_se(dissect_loop_device(loop, NULL, NULL, DISSECT_IMAGE_ADD_PARTITION_DEVICES|DISSECT_IMAGE_PIN_PARTITION_DEVICES, &dissected) >= 0);
ac3a84
         verify_dissected_image(dissected);
ac3a84
 
ac3a84
         FOREACH_STRING(fs, "vfat", "ext4") {
ac3a84
@@ -246,7 +246,7 @@ static int run(int argc, char *argv[]) {
ac3a84
         assert_se(make_filesystem(dissected->partitions[PARTITION_HOME].node, "ext4", "home", NULL, id, true) >= 0);
ac3a84
 
ac3a84
         dissected = dissected_image_unref(dissected);
ac3a84
-        assert_se(dissect_loop_device(loop, NULL, NULL, 0, &dissected) >= 0);
ac3a84
+        assert_se(dissect_loop_device(loop, NULL, NULL, DISSECT_IMAGE_ADD_PARTITION_DEVICES|DISSECT_IMAGE_PIN_PARTITION_DEVICES, &dissected) >= 0);
ac3a84
         verify_dissected_image(dissected);
ac3a84
 
ac3a84
         assert_se(mkdtemp_malloc(NULL, &mounted) >= 0);