dd65c9
From 41e91ccdf9fa3097d7b90718cc83e743f4dc8d6b Mon Sep 17 00:00:00 2001
dd65c9
From: Jan Rybar <jrybar@redhat.com>
dd65c9
Date: Thu, 17 Aug 2017 18:01:42 +0200
dd65c9
Subject: [PATCH] nspawn: new option to start as PID2
dd65c9
dd65c9
Cherry-picked from: 7732f92
dd65c9
Resolves: #1417387
dd65c9
---
dd65c9
 Makefile.am                   |   2 +
23b3cf
 man/systemd-nspawn.xml        |  65 +++++++++--
23b3cf
 src/nspawn/nspawn-stub-pid1.c | 196 ++++++++++++++++++++++++++++++++++
23b3cf
 src/nspawn/nspawn-stub-pid1.h |  22 ++++
23b3cf
 src/nspawn/nspawn.c           |  56 ++++++++--
dd65c9
 5 files changed, 328 insertions(+), 13 deletions(-)
dd65c9
 create mode 100644 src/nspawn/nspawn-stub-pid1.c
dd65c9
 create mode 100644 src/nspawn/nspawn-stub-pid1.h
dd65c9
dd65c9
diff --git a/Makefile.am b/Makefile.am
c62b8e
index 7c58fd0504..0e2f8d561c 100644
dd65c9
--- a/Makefile.am
dd65c9
+++ b/Makefile.am
dd65c9
@@ -2658,6 +2658,8 @@ systemd_cgtop_LDADD = \
dd65c9
 # ------------------------------------------------------------------------------
dd65c9
 systemd_nspawn_SOURCES = \
dd65c9
 	src/nspawn/nspawn.c \
dd65c9
+	src/nspawn/nspawn-stub-pid1.c \
dd65c9
+	src/nspawn/nspawn-stub-pid1.h \
dd65c9
 	src/core/mount-setup.c \
dd65c9
 	src/core/mount-setup.h \
dd65c9
 	src/core/loopback-setup.c \
dd65c9
diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml
c62b8e
index cbd44d4aba..d0eddaacc3 100644
dd65c9
--- a/man/systemd-nspawn.xml
dd65c9
+++ b/man/systemd-nspawn.xml
dd65c9
@@ -241,16 +241,69 @@
dd65c9
         <option>--ephemeral</option>.</para></listitem>
dd65c9
       </varlistentry>
dd65c9
 
dd65c9
+      <varlistentry>
dd65c9
+      <term><option>-a</option></term>
dd65c9
+        <term><option>--as-pid2</option></term>
dd65c9
+
dd65c9
+        <listitem><para>Invoke the shell or specified program as process ID (PID) 2 instead of PID 1 (init). By
dd65c9
+        default, if neither this option nor <option>--boot</option> is used, the selected binary is run as process with
dd65c9
+        PID 1, a mode only suitable for programs that are aware of the special semantics that the process with PID 1
dd65c9
+        has on UNIX. For example, it needs to reap all processes reparented to it, and should implement
dd65c9
+        <command>sysvinit</command> compatible signal handling (specifically: it needs to reboot on SIGINT, reexecute
dd65c9
+        on SIGTERM, reload configuration on SIGHUP, and so on). With <option>--as-pid2</option> a minimal stub init
dd65c9
+        process is run as PID 1 and the selected binary is executed as PID 2 (and hence does not need to implement any
dd65c9
+        special semantics). The stub init process will reap processes as necessary and react appropriately to
dd65c9
+        signals. It is recommended to use this mode to invoke arbitrary commands in containers, unless they have been
dd65c9
+        modified to run correctly as PID 1. Or in other words: this switch should be used for pretty much all commands,
dd65c9
+        except when the command refers to an init or shell implementation, as these are generally capable of running
dd65c9
+        correctly as PID 1). This option may not be combined with <option>--boot</option> or
dd65c9
+        <option>--share-system</option>.</para>
dd65c9
+        </listitem>
dd65c9
+      </varlistentry>
dd65c9
+
dd65c9
       <varlistentry>
dd65c9
         <term><option>-b</option></term>
dd65c9
         <term><option>--boot</option></term>
dd65c9
 
dd65c9
-        <listitem><para>Automatically search for an init binary and
dd65c9
-        invoke it instead of a shell or a user supplied program. If
dd65c9
-        this option is used, arguments specified on the command line
dd65c9
-        are used as arguments for the init binary. This option may not
dd65c9
-        be combined with <option>--share-system</option>.
dd65c9
-        </para></listitem>
dd65c9
+        <listitem><para>Automatically search for an init binary and invoke it as PID 1, instead of a shell or a user
dd65c9
+        supplied program. If this option is used, arguments specified on the command line are used as arguments for the
dd65c9
+        init binary. This option may not be combined with <option>--as-pid2</option> or
dd65c9
+        <option>--share-system</option>.</para>
dd65c9
+
dd65c9
+        <para>The following table explains the different modes of invocation and relationship to
dd65c9
+        <option>--as-pid2</option> (see above):</para>
dd65c9
+
dd65c9
+        
dd65c9
+          <title>Invocation Mode</title>
dd65c9
+          <tgroup cols='2' align='left' colsep='1' rowsep='1'>
dd65c9
+            <colspec colname="switch" />
dd65c9
+            <colspec colname="explanation" />
dd65c9
+            
dd65c9
+              <row>
dd65c9
+                <entry>Switch</entry>
dd65c9
+                <entry>Explanation</entry>
dd65c9
+              </row>
dd65c9
+            
dd65c9
+            
dd65c9
+              <row>
dd65c9
+                <entry>Neither <option>--as-pid2</option> nor <option>--boot</option> specified</entry>
dd65c9
+                <entry>The passed parameters are interpreted as command line, which is executed as PID 1 in the container.</entry>
dd65c9
+              </row>
dd65c9
+
dd65c9
+              <row>
dd65c9
+                <entry><option>--as-pid2</option> specified</entry>
dd65c9
+                <entry>The passed parameters are interpreted as command line, which are executed as PID 2 in the container. A stub init process is run as PID 1.</entry>
dd65c9
+              </row>
dd65c9
+
dd65c9
+              <row>
dd65c9
+                <entry><option>--boot</option> specified</entry>
dd65c9
+                <entry>An init binary as automatically searched and run as PID 1 in the container. The passed parameters are used as invocation parameters for this process.</entry>
dd65c9
+              </row>
dd65c9
+
dd65c9
+            
dd65c9
+          </tgroup>
dd65c9
+        
dd65c9
+        </listitem>
dd65c9
       </varlistentry>
dd65c9
 
dd65c9
       <varlistentry>
dd65c9
diff --git a/src/nspawn/nspawn-stub-pid1.c b/src/nspawn/nspawn-stub-pid1.c
dd65c9
new file mode 100644
c62b8e
index 0000000000..11c11560c4
dd65c9
--- /dev/null
dd65c9
+++ b/src/nspawn/nspawn-stub-pid1.c
dd65c9
@@ -0,0 +1,196 @@
dd65c9
+/***
dd65c9
+  This file is part of systemd.
dd65c9
+
dd65c9
+  Copyright 2016 Lennart Poettering
dd65c9
+
dd65c9
+  systemd is free software; you can redistribute it and/or modify it
dd65c9
+  under the terms of the GNU Lesser General Public License as published by
dd65c9
+  the Free Software Foundation; either version 2.1 of the License, or
dd65c9
+  (at your option) any later version.
dd65c9
+
dd65c9
+  systemd is distributed in the hope that it will be useful, but
dd65c9
+  WITHOUT ANY WARRANTY; without even the implied warranty of
dd65c9
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
dd65c9
+  Lesser General Public License for more details.
dd65c9
+
dd65c9
+  You should have received a copy of the GNU Lesser General Public License
dd65c9
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
dd65c9
+***/
dd65c9
+
dd65c9
+#include <sys/reboot.h>
dd65c9
+#include <sys/unistd.h>
dd65c9
+#include <sys/wait.h>
dd65c9
+#include <sys/prctl.h>
dd65c9
+
dd65c9
+#include "log.h"
dd65c9
+#include "nspawn-stub-pid1.h"
dd65c9
+#include "util.h"
dd65c9
+#include "time-util.h"
dd65c9
+#include "def.h"
dd65c9
+
dd65c9
+static int reset_environ(const char *new_environment, size_t length) {
dd65c9
+        unsigned long start, end;
dd65c9
+
dd65c9
+        start = (unsigned long) new_environment;
dd65c9
+        end = start + length;
dd65c9
+
dd65c9
+        if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0)
dd65c9
+                return -errno;
dd65c9
+
dd65c9
+        if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0)
dd65c9
+                return -errno;
dd65c9
+
dd65c9
+        return 0;
dd65c9
+}
dd65c9
+
dd65c9
+int stub_pid1(sd_id128_t uuid) {
dd65c9
+        enum {
dd65c9
+                STATE_RUNNING,
dd65c9
+                STATE_REBOOT,
dd65c9
+                STATE_POWEROFF,
dd65c9
+        } state = STATE_RUNNING;
dd65c9
+
dd65c9
+        sigset_t fullmask, oldmask, waitmask;
dd65c9
+        usec_t quit_usec = USEC_INFINITY;
dd65c9
+        pid_t pid;
dd65c9
+        int r;
dd65c9
+
dd65c9
+        /* The new environment we set up, on the stack. */
dd65c9
+        char new_environment[] =
dd65c9
+                "container=systemd-nspawn\0"
dd65c9
+                "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
dd65c9
+
dd65c9
+        /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful
dd65c9
+         * for allowing arbitrary processes run in a container, and still have all zombies reaped. */
dd65c9
+
dd65c9
+        assert_se(sigfillset(&fullmask) >= 0);
dd65c9
+        assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0);
dd65c9
+
dd65c9
+        pid = fork();
dd65c9
+        if (pid < 0)
dd65c9
+                return log_error_errno(errno, "Failed to fork child pid: %m");
dd65c9
+
dd65c9
+        if (pid == 0) {
dd65c9
+                /* Return in the child */
dd65c9
+                assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0);
dd65c9
+                setsid();
dd65c9
+                return 0;
dd65c9
+        }
dd65c9
+
dd65c9
+        reset_all_signal_handlers();
dd65c9
+
dd65c9
+        log_close();
dd65c9
+        close_all_fds(NULL, 0);
dd65c9
+        log_open();
dd65c9
+
dd65c9
+        /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also,
dd65c9
+         * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ
dd65c9
+         * find them set. */
dd65c9
+        sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX);
dd65c9
+        reset_environ(new_environment, sizeof(new_environment));
dd65c9
+
dd65c9
+        rename_process("STUBINIT");
dd65c9
+
dd65c9
+        assert_se(sigemptyset(&waitmask) >= 0);
dd65c9
+
dd65c9
+        sigset_add_many(&waitmask,
dd65c9
+                        SIGCHLD,          /* posix: process died */
dd65c9
+                        SIGINT,           /* sysv: ctrl-alt-del */
dd65c9
+                        SIGRTMIN+3,       /* systemd: halt */
dd65c9
+                        SIGRTMIN+4,       /* systemd: poweroff */
dd65c9
+                        SIGRTMIN+5,       /* systemd: reboot */
dd65c9
+                        SIGRTMIN+6,       /* systemd: kexec */
dd65c9
+                        SIGRTMIN+13,      /* systemd: halt */
dd65c9
+                        SIGRTMIN+14,      /* systemd: poweroff */
dd65c9
+                        SIGRTMIN+15,      /* systemd: reboot */
dd65c9
+                        SIGRTMIN+16,      /* systemd: kexec */
dd65c9
+                        -1);
dd65c9
+
dd65c9
+        /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't
dd65c9
+         * support reexec/reloading in this stub process. */
dd65c9
+
dd65c9
+        for (;;) {
dd65c9
+                siginfo_t si;
dd65c9
+                usec_t current_usec;
dd65c9
+
dd65c9
+                si.si_pid = 0;
dd65c9
+                r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG);
dd65c9
+                if (r < 0) {
dd65c9
+                        r = log_error_errno(errno, "Failed to reap children: %m");
dd65c9
+                        goto finish;
dd65c9
+                }
dd65c9
+
dd65c9
+                current_usec = now(CLOCK_MONOTONIC);
dd65c9
+
dd65c9
+                if (si.si_pid == pid || current_usec >= quit_usec) {
dd65c9
+
dd65c9
+                        /* The child we started ourselves died or we reached a timeout. */
dd65c9
+
dd65c9
+                        if (state == STATE_REBOOT) { /* dispatch a queued reboot */
dd65c9
+                                (void) reboot(RB_AUTOBOOT);
dd65c9
+                                r = log_error_errno(errno, "Failed to reboot: %m");
dd65c9
+                                goto finish;
dd65c9
+
dd65c9
+                        } else if (state == STATE_POWEROFF)
dd65c9
+                                (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */
dd65c9
+
dd65c9
+                        if (si.si_pid == pid && si.si_code == CLD_EXITED)
dd65c9
+                                r = si.si_status; /* pass on exit code */
dd65c9
+                        else
dd65c9
+                                r = 255; /* signal, coredump, timeout, … */
dd65c9
+
dd65c9
+                        goto finish;
dd65c9
+                }
dd65c9
+                if (si.si_pid != 0)
dd65c9
+                        /* We reaped something. Retry until there's nothing more to reap. */
dd65c9
+                        continue;
dd65c9
+
dd65c9
+                if (quit_usec == USEC_INFINITY)
dd65c9
+                        r = sigwaitinfo(&waitmask, &si);
dd65c9
+                else {
dd65c9
+                        struct timespec ts;
dd65c9
+                        r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec));
dd65c9
+                }
dd65c9
+                if (r < 0) {
dd65c9
+                        if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */
dd65c9
+                                continue;
dd65c9
+                        if (errno == EAGAIN) /* timeout reached */
dd65c9
+                                continue;
dd65c9
+
dd65c9
+                        r = log_error_errno(errno, "Failed to wait for signal: %m");
dd65c9
+                        goto finish;
dd65c9
+                }
dd65c9
+
dd65c9
+                if (si.si_signo == SIGCHLD)
dd65c9
+                        continue; /* Let's reap this */
dd65c9
+
dd65c9
+                if (state != STATE_RUNNING)
dd65c9
+                        continue;
dd65c9
+
dd65c9
+                /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a
dd65c9
+                 * constant… */
dd65c9
+
dd65c9
+                if (si.si_signo == SIGRTMIN+3 ||
dd65c9
+                    si.si_signo == SIGRTMIN+4 ||
dd65c9
+                    si.si_signo == SIGRTMIN+13 ||
dd65c9
+                    si.si_signo == SIGRTMIN+14)
dd65c9
+
dd65c9
+                        state = STATE_POWEROFF;
dd65c9
+
dd65c9
+                else if (si.si_signo == SIGINT ||
dd65c9
+                         si.si_signo == SIGRTMIN+5 ||
dd65c9
+                         si.si_signo == SIGRTMIN+6 ||
dd65c9
+                         si.si_signo == SIGRTMIN+15 ||
dd65c9
+                         si.si_signo == SIGRTMIN+16)
dd65c9
+
dd65c9
+                        state = STATE_REBOOT;
dd65c9
+                else
dd65c9
+                        assert_not_reached("Got unexpected signal");
dd65c9
+
dd65c9
+                /* (void) kill_and_sigcont(pid, SIGTERM); */
dd65c9
+                quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC;
dd65c9
+        }
dd65c9
+
dd65c9
+finish:
dd65c9
+        _exit(r < 0 ? EXIT_FAILURE : r);
dd65c9
+}
dd65c9
diff --git a/src/nspawn/nspawn-stub-pid1.h b/src/nspawn/nspawn-stub-pid1.h
dd65c9
new file mode 100644
c62b8e
index 0000000000..be0f1af4cb
dd65c9
--- /dev/null
dd65c9
+++ b/src/nspawn/nspawn-stub-pid1.h
dd65c9
@@ -0,0 +1,22 @@
dd65c9
+#pragma once
dd65c9
+
dd65c9
+/***
dd65c9
+  This file is part of systemd.
dd65c9
+
dd65c9
+  Copyright 2016 Lennart Poettering
dd65c9
+
dd65c9
+  systemd is free software; you can redistribute it and/or modify it
dd65c9
+  under the terms of the GNU Lesser General Public License as published by
dd65c9
+  the Free Software Foundation; either version 2.1 of the License, or
dd65c9
+  (at your option) any later version.
dd65c9
+
dd65c9
+  systemd is distributed in the hope that it will be useful, but
dd65c9
+  WITHOUT ANY WARRANTY; without even the implied warranty of
dd65c9
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
dd65c9
+  Lesser General Public License for more details.
dd65c9
+
dd65c9
+  You should have received a copy of the GNU Lesser General Public License
dd65c9
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
dd65c9
+***/
dd65c9
+
dd65c9
+int stub_pid1(sd_id128_t uuid);
dd65c9
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
c62b8e
index d0003d3790..ea365b3f9b 100644
dd65c9
--- a/src/nspawn/nspawn.c
dd65c9
+++ b/src/nspawn/nspawn.c
dd65c9
@@ -99,6 +99,7 @@
dd65c9
 #include "in-addr-util.h"
dd65c9
 #include "fw-util.h"
dd65c9
 #include "local-addresses.h"
dd65c9
+#include "nspawn-stub-pid1.h"
dd65c9
 
dd65c9
 #ifdef HAVE_SECCOMP
dd65c9
 #include "seccomp-util.h"
dd65c9
@@ -129,6 +130,14 @@ typedef enum Volatile {
dd65c9
         VOLATILE_STATE,
dd65c9
 } Volatile;
dd65c9
 
dd65c9
+typedef enum StartMode {
dd65c9
+        START_PID1, /* Run parameters as command line as process 1 */
dd65c9
+        START_PID2, /* Use stub init process as PID 1, run parameters as command line as process 2 */
dd65c9
+        START_BOOT, /* Search for init system, pass arguments as parameters */
dd65c9
+        _START_MODE_MAX,
dd65c9
+        _START_MODE_INVALID = -1
dd65c9
+} StartMode;
dd65c9
+
dd65c9
 static char *arg_directory = NULL;
dd65c9
 static char *arg_template = NULL;
dd65c9
 static char *arg_user = NULL;
dd65c9
@@ -139,7 +148,7 @@ static const char *arg_selinux_apifs_context = NULL;
dd65c9
 static const char *arg_slice = NULL;
dd65c9
 static bool arg_private_network = false;
dd65c9
 static bool arg_read_only = false;
dd65c9
-static bool arg_boot = false;
dd65c9
+static StartMode arg_start_mode = START_PID1;
dd65c9
 static bool arg_ephemeral = false;
dd65c9
 static LinkJournal arg_link_journal = LINK_AUTO;
dd65c9
 static bool arg_link_journal_try = false;
dd65c9
@@ -200,6 +209,7 @@ static void help(void) {
dd65c9
                "  -x --ephemeral            Run container with snapshot of root directory, and\n"
dd65c9
                "                            remove it after exit\n"
dd65c9
                "  -i --image=PATH           File system device or disk image for the container\n"
dd65c9
+               "  -a --as-pid2              Maintain a stub init as PID1, invoke binary as PID2\n"
dd65c9
                "  -b --boot                 Boot up full system (i.e. invoke init)\n"
dd65c9
                "  -u --user=USER            Run the command under specified user or uid\n"
dd65c9
                "  -M --machine=NAME         Set the machine name for the container\n"
dd65c9
@@ -304,6 +314,7 @@ static int parse_argv(int argc, char *argv[]) {
dd65c9
                 { "ephemeral",             no_argument,       NULL, 'x'                   },
dd65c9
                 { "user",                  required_argument, NULL, 'u'                   },
dd65c9
                 { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK   },
dd65c9
+                { "as-pid2",               no_argument,       NULL, 'a'                   },
dd65c9
                 { "boot",                  no_argument,       NULL, 'b'                   },
dd65c9
                 { "uuid",                  required_argument, NULL, ARG_UUID              },
dd65c9
                 { "read-only",             no_argument,       NULL, ARG_READ_ONLY         },
dd65c9
@@ -340,7 +351,7 @@ static int parse_argv(int argc, char *argv[]) {
dd65c9
         assert(argc >= 0);
dd65c9
         assert(argv);
dd65c9
 
dd65c9
-        while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:qi:xp:n", options, NULL)) >= 0)
dd65c9
+        while ((c = getopt_long(argc, argv, "+hD:u:abL:M:jS:Z:qi:xp:n", options, NULL)) >= 0)
dd65c9
 
dd65c9
                 switch (c) {
dd65c9
 
dd65c9
@@ -421,7 +432,21 @@ static int parse_argv(int argc, char *argv[]) {
dd65c9
                         break;
dd65c9
 
dd65c9
                 case 'b':
dd65c9
-                        arg_boot = true;
dd65c9
+                        if (arg_start_mode == START_PID2) {
dd65c9
+                                log_error("--boot and --as-pid2 may not be combined.");
dd65c9
+                                return -EINVAL;
dd65c9
+                        }
dd65c9
+
dd65c9
+                        arg_start_mode = START_BOOT;
dd65c9
+                        break;
dd65c9
+
dd65c9
+                case 'a':
dd65c9
+                        if (arg_start_mode == START_BOOT) {
dd65c9
+                                log_error("--boot and --as-pid2 may not be combined.");
dd65c9
+                                return -EINVAL;
dd65c9
+                        }
dd65c9
+
dd65c9
+                        arg_start_mode = START_PID2;
dd65c9
                         break;
dd65c9
 
dd65c9
                 case ARG_UUID:
dd65c9
@@ -741,7 +766,7 @@ static int parse_argv(int argc, char *argv[]) {
dd65c9
         if (arg_share_system)
dd65c9
                 arg_register = false;
dd65c9
 
dd65c9
-        if (arg_boot && arg_share_system) {
dd65c9
+        if (arg_start_mode != START_PID1 && arg_share_system) {
dd65c9
                 log_error("--boot and --share-system may not be combined.");
dd65c9
                 return -EINVAL;
dd65c9
         }
dd65c9
@@ -3586,6 +3611,10 @@ int main(int argc, char *argv[]) {
dd65c9
         log_parse_environment();
dd65c9
         log_open();
dd65c9
 
dd65c9
+        /* Make sure rename_process() in the stub init process can work */
dd65c9
+        saved_argv = argv;
dd65c9
+        saved_argc = argc;
dd65c9
+
dd65c9
         r = parse_argv(argc, argv);
dd65c9
         if (r <= 0)
dd65c9
                 goto finish;
dd65c9
@@ -3694,7 +3723,7 @@ int main(int argc, char *argv[]) {
dd65c9
                         }
dd65c9
                 }
dd65c9
 
dd65c9
-                if (arg_boot) {
dd65c9
+                if (arg_start_mode == START_BOOT) {
dd65c9
                         if (path_is_os_tree(arg_directory) <= 0) {
dd65c9
                                 log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", arg_directory);
dd65c9
                                 r = -EINVAL;
dd65c9
@@ -4109,7 +4138,19 @@ int main(int argc, char *argv[]) {
dd65c9
                         if (!barrier_place_and_sync(&barrier))
dd65c9
                                 _exit(EXIT_FAILURE);
dd65c9
 
dd65c9
-                        if (arg_boot) {
dd65c9
+                        if (arg_start_mode == START_PID2) {
dd65c9
+                                r = stub_pid1(arg_uuid);
dd65c9
+                                if (r < 0)
dd65c9
+                                {
dd65c9
+                                        log_error_errno(r, "Failed to start as PID2: %m");
dd65c9
+                                        _exit(EXIT_FAILURE);
dd65c9
+                                }
dd65c9
+                        }
dd65c9
+
dd65c9
+                        log_close();
dd65c9
+                        (void) fdset_close_others(fds);
dd65c9
+
dd65c9
+                        if (arg_start_mode == START_BOOT) {
dd65c9
                                 char **a;
dd65c9
                                 size_t l;
dd65c9
 
dd65c9
@@ -4135,6 +4176,7 @@ int main(int argc, char *argv[]) {
dd65c9
                                 execle("/bin/sh", "-sh", NULL, env_use);
dd65c9
                         }
dd65c9
 
dd65c9
+                        log_open();
dd65c9
                         log_error_errno(errno, "execv() failed: %m");
dd65c9
                         _exit(EXIT_FAILURE);
dd65c9
                 }
dd65c9
@@ -4210,7 +4252,7 @@ int main(int argc, char *argv[]) {
dd65c9
                                         goto finish;
dd65c9
                                 }
dd65c9
 
dd65c9
-                                if (arg_boot) {
dd65c9
+                                if (arg_start_mode == START_BOOT) {
dd65c9
                                         /* Try to kill the init system on SIGINT or SIGTERM */
dd65c9
                                         sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, UINT32_TO_PTR(pid));
dd65c9
                                         sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, UINT32_TO_PTR(pid));