ac3a84
From 7b6a09c47f1fee035c4b42840fabf65edce12aa8 Mon Sep 17 00:00:00 2001
ac3a84
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
ac3a84
Date: Mon, 7 Nov 2022 12:40:20 +0100
ac3a84
Subject: [PATCH] pid1: skip cleanup if root is not tmpfs/ramfs
ac3a84
ac3a84
in_initrd() was really doing two things: checking if we're in the initrd, and
ac3a84
also verifying that the initrd is set up correctly. But this second check is
ac3a84
complicated, in particular it would return false for overlayfs, even with an
ac3a84
upper tmpfs layer. It also doesn't support the use case of having an initial
ac3a84
initrd with tmpfs, and then transitioning into an intermediate initrd that is
ac3a84
e.g. a DDI, i.e. a filesystem possibly with verity arranged as a disk image.
ac3a84
ac3a84
We don't need to check if we're in initrd in every program. Instead, concerns
ac3a84
are separated:
ac3a84
- in_initrd() just does a simple check for /etc/initrd-release.
ac3a84
- When doing cleanup, pid1 checks if it's on a tmpfs before starting to wipe
ac3a84
  the old root. The only case where we want to remove the old root is when
ac3a84
  we're on a plain tempory filesystem. With an overlay, we'd be creating
ac3a84
  whiteout files, which is not very useful. (*)
ac3a84
ac3a84
This should resolve https://bugzilla.redhat.com/show_bug.cgi?id=2137631
ac3a84
which is caused by systemd refusing to treat the system as an initrd because
ac3a84
overlayfs is used.
ac3a84
ac3a84
(*) I think the idea of keeping the initrd fs around for shutdown is outdated.
ac3a84
We should just have a completely separate exitrd that is unpacked when we want
ac3a84
to shut down. This way, we don't waste memory at runtime, and we also don't
ac3a84
transition to a potentially older version of systemd. But we don't have support
ac3a84
for this yet.
ac3a84
ac3a84
This replaces 0fef5b0f0bd9ded1ae7bcb3e4e4b2893e36c51a6.
ac3a84
ac3a84
(cherry picked from commit a940f507fbe1c81d6787dc0b7ce232c39818eec9)
ac3a84
ac3a84
Related: #2138081
ac3a84
---
ac3a84
 src/basic/util.c         | 19 ++++++++-----------
ac3a84
 src/shared/switch-root.c | 22 ++++++++++++----------
ac3a84
 2 files changed, 20 insertions(+), 21 deletions(-)
ac3a84
ac3a84
diff --git a/src/basic/util.c b/src/basic/util.c
ac3a84
index 981f917fab..e6aaa2dc9b 100644
ac3a84
--- a/src/basic/util.c
ac3a84
+++ b/src/basic/util.c
ac3a84
@@ -56,14 +56,8 @@ bool in_initrd(void) {
ac3a84
         if (saved_in_initrd >= 0)
ac3a84
                 return saved_in_initrd;
ac3a84
 
ac3a84
-        /* We make two checks here:
ac3a84
-         *
ac3a84
-         * 1. the flag file /etc/initrd-release must exist
ac3a84
-         * 2. the root file system must be a memory file system
ac3a84
-         *
ac3a84
-         * The second check is extra paranoia, since misdetecting an
ac3a84
-         * initrd can have bad consequences due the initrd
ac3a84
-         * emptying when transititioning to the main systemd.
ac3a84
+        /* If /etc/initrd-release exists, we're in an initrd.
ac3a84
+         * This can be overridden by setting SYSTEMD_IN_INITRD=0|1.
ac3a84
          */
ac3a84
 
ac3a84
         r = getenv_bool_secure("SYSTEMD_IN_INITRD");
ac3a84
@@ -72,9 +66,12 @@ bool in_initrd(void) {
ac3a84
 
ac3a84
         if (r >= 0)
ac3a84
                 saved_in_initrd = r > 0;
ac3a84
-        else
ac3a84
-                saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
ac3a84
-                                  path_is_temporary_fs("/") > 0;
ac3a84
+        else {
ac3a84
+                r = access("/etc/initrd-release", F_OK);
ac3a84
+                if (r < 0 && errno != ENOENT)
ac3a84
+                        log_debug_errno(r, "Failed to check if /etc/initrd-release exists, assuming it does not: %m");
ac3a84
+                saved_in_initrd = r >= 0;
ac3a84
+        }
ac3a84
 
ac3a84
         return saved_in_initrd;
ac3a84
 }
ac3a84
diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c
ac3a84
index 1a444841fa..4cad3551a6 100644
ac3a84
--- a/src/shared/switch-root.c
ac3a84
+++ b/src/shared/switch-root.c
ac3a84
@@ -32,7 +32,6 @@ int switch_root(const char *new_root,
ac3a84
 
ac3a84
         _cleanup_free_ char *resolved_old_root_after = NULL;
ac3a84
         _cleanup_close_ int old_root_fd = -1;
ac3a84
-        bool old_root_remove;
ac3a84
         int r;
ac3a84
 
ac3a84
         assert(new_root);
ac3a84
@@ -42,12 +41,16 @@ int switch_root(const char *new_root,
ac3a84
                 return 0;
ac3a84
 
ac3a84
         /* Check if we shall remove the contents of the old root */
ac3a84
-        old_root_remove = in_initrd();
ac3a84
-        if (old_root_remove) {
ac3a84
-                old_root_fd = open("/", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
ac3a84
-                if (old_root_fd < 0)
ac3a84
-                        return log_error_errno(errno, "Failed to open root directory: %m");
ac3a84
-        }
ac3a84
+        old_root_fd = open("/", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
ac3a84
+        if (old_root_fd < 0)
ac3a84
+                return log_error_errno(errno, "Failed to open root directory: %m");
ac3a84
+        r = fd_is_temporary_fs(old_root_fd);
ac3a84
+        if (r < 0)
ac3a84
+                return log_error_errno(r, "Failed to stat root directory: %m");
ac3a84
+        if (r > 0)
ac3a84
+                log_debug("Root directory is on tmpfs, will do cleanup later.");
ac3a84
+        else
ac3a84
+                old_root_fd = safe_close(old_root_fd);
ac3a84
 
ac3a84
         /* Determine where we shall place the old root after the transition */
ac3a84
         r = chase_symlinks(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL);
ac3a84
@@ -117,9 +120,8 @@ int switch_root(const char *new_root,
ac3a84
                 struct stat rb;
ac3a84
 
ac3a84
                 if (fstat(old_root_fd, &rb) < 0)
ac3a84
-                        log_warning_errno(errno, "Failed to stat old root directory, leaving: %m");
ac3a84
-                else
ac3a84
-                        (void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */
ac3a84
+                        return log_error_errno(errno, "Failed to stat old root directory: %m");
ac3a84
+                (void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */
ac3a84
         }
ac3a84
 
ac3a84
         return 0;