|
|
4bff0a |
From c7861c541e49e0bf3678d9f3c9093ee819ed436a Mon Sep 17 00:00:00 2001
|
|
|
4bff0a |
From: Lennart Poettering <lennart@poettering.net>
|
|
|
4bff0a |
Date: Tue, 17 Jul 2018 11:47:14 +0200
|
|
|
4bff0a |
Subject: [PATCH] core: introduce new Type=exec service type
|
|
|
4bff0a |
|
|
|
4bff0a |
Users are often surprised that "systemd-run" command lines like
|
|
|
4bff0a |
"systemd-run -p User=idontexist /bin/true" will return successfully,
|
|
|
4bff0a |
even though the logs show that the process couldn't be invoked, as the
|
|
|
4bff0a |
user "idontexist" doesn't exist. This is because Type=simple will only
|
|
|
4bff0a |
wait until fork() succeeded before returning start-up success.
|
|
|
4bff0a |
|
|
|
4bff0a |
This patch adds a new service type Type=exec, which is very similar to
|
|
|
4bff0a |
Type=simple, but waits until the child process completed the execve()
|
|
|
4bff0a |
before returning success. It uses a pipe that has O_CLOEXEC set for this
|
|
|
4bff0a |
logic, so that the kernel automatically sends POLLHUP on it when the
|
|
|
4bff0a |
execve() succeeded but leaves the pipe open if not. This means PID 1
|
|
|
4bff0a |
waits exactly until the execve() succeeded in the child, and not longer
|
|
|
4bff0a |
and not shorter, which is the desired functionality.
|
|
|
4bff0a |
|
|
|
4bff0a |
Making use of this new functionality, the command line
|
|
|
4bff0a |
"systemd-run -p User=idontexist -p Type=exec /bin/true" will now fail,
|
|
|
4bff0a |
as expected.
|
|
|
4bff0a |
|
|
|
4bff0a |
(cherry picked from commit 5686391b006ee82d8a4559067ad9818e3e631247)
|
|
|
4bff0a |
|
|
|
4bff0a |
Resolves: #1683334
|
|
|
4bff0a |
---
|
|
|
4bff0a |
src/core/execute.c | 89 +++++++++++++++++++++---
|
|
|
4bff0a |
src/core/execute.h | 3 +
|
|
|
4bff0a |
src/core/mount.c | 9 +--
|
|
|
4bff0a |
src/core/service.c | 167 ++++++++++++++++++++++++++++++++++++++++++---
|
|
|
4bff0a |
src/core/service.h | 4 ++
|
|
|
4bff0a |
src/core/socket.c | 9 +--
|
|
|
4bff0a |
src/core/swap.c | 1 +
|
|
|
4bff0a |
7 files changed, 254 insertions(+), 28 deletions(-)
|
|
|
4bff0a |
|
|
|
4bff0a |
diff --git a/src/core/execute.c b/src/core/execute.c
|
|
|
4bff0a |
index 7476ac51da..c62f3cf849 100644
|
|
|
4bff0a |
--- a/src/core/execute.c
|
|
|
4bff0a |
+++ b/src/core/execute.c
|
|
|
4bff0a |
@@ -2566,6 +2566,7 @@ static int close_remaining_fds(
|
|
|
4bff0a |
const DynamicCreds *dcreds,
|
|
|
4bff0a |
int user_lookup_fd,
|
|
|
4bff0a |
int socket_fd,
|
|
|
4bff0a |
+ int exec_fd,
|
|
|
4bff0a |
int *fds, size_t n_fds) {
|
|
|
4bff0a |
|
|
|
4bff0a |
size_t n_dont_close = 0;
|
|
|
4bff0a |
@@ -2582,6 +2583,8 @@ static int close_remaining_fds(
|
|
|
4bff0a |
|
|
|
4bff0a |
if (socket_fd >= 0)
|
|
|
4bff0a |
dont_close[n_dont_close++] = socket_fd;
|
|
|
4bff0a |
+ if (exec_fd >= 0)
|
|
|
4bff0a |
+ dont_close[n_dont_close++] = exec_fd;
|
|
|
4bff0a |
if (n_fds > 0) {
|
|
|
4bff0a |
memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
|
|
|
4bff0a |
n_dont_close += n_fds;
|
|
|
4bff0a |
@@ -2725,9 +2728,10 @@ static int exec_child(
|
|
|
4bff0a |
int *exit_status) {
|
|
|
4bff0a |
|
|
|
4bff0a |
_cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
|
|
|
4bff0a |
- _cleanup_free_ char *home_buffer = NULL;
|
|
|
4bff0a |
+ int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
|
|
|
4bff0a |
_cleanup_free_ gid_t *supplementary_gids = NULL;
|
|
|
4bff0a |
const char *username = NULL, *groupname = NULL;
|
|
|
4bff0a |
+ _cleanup_free_ char *home_buffer = NULL;
|
|
|
4bff0a |
const char *home = NULL, *shell = NULL;
|
|
|
4bff0a |
dev_t journal_stream_dev = 0;
|
|
|
4bff0a |
ino_t journal_stream_ino = 0;
|
|
|
4bff0a |
@@ -2747,7 +2751,6 @@ static int exec_child(
|
|
|
4bff0a |
#endif
|
|
|
4bff0a |
uid_t uid = UID_INVALID;
|
|
|
4bff0a |
gid_t gid = GID_INVALID;
|
|
|
4bff0a |
- int r, ngids = 0;
|
|
|
4bff0a |
size_t n_fds;
|
|
|
4bff0a |
ExecDirectoryType dt;
|
|
|
4bff0a |
int secure_bits;
|
|
|
4bff0a |
@@ -2791,8 +2794,8 @@ static int exec_child(
|
|
|
4bff0a |
/* In case anything used libc syslog(), close this here, too */
|
|
|
4bff0a |
closelog();
|
|
|
4bff0a |
|
|
|
4bff0a |
- n_fds = n_storage_fds + n_socket_fds;
|
|
|
4bff0a |
- r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
|
|
|
4bff0a |
+ n_fds = n_socket_fds + n_storage_fds;
|
|
|
4bff0a |
+ r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
|
|
|
4bff0a |
if (r < 0) {
|
|
|
4bff0a |
*exit_status = EXIT_FDS;
|
|
|
4bff0a |
return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
|
|
|
4bff0a |
@@ -3165,9 +3168,45 @@ static int exec_child(
|
|
|
4bff0a |
}
|
|
|
4bff0a |
|
|
|
4bff0a |
/* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
|
|
|
4bff0a |
- * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
|
|
|
4bff0a |
- * was needed to upload the policy and can now be closed as well. */
|
|
|
4bff0a |
- r = close_all_fds(fds, n_fds);
|
|
|
4bff0a |
+ * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
|
|
|
4bff0a |
+ * however if we have it as we want to keep it open until the final execve(). */
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ if (params->exec_fd >= 0) {
|
|
|
4bff0a |
+ exec_fd = params->exec_fd;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ if (exec_fd < 3 + (int) n_fds) {
|
|
|
4bff0a |
+ int moved_fd;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
|
|
|
4bff0a |
+ * process we are about to execute. */
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
|
|
|
4bff0a |
+ if (moved_fd < 0) {
|
|
|
4bff0a |
+ *exit_status = EXIT_FDS;
|
|
|
4bff0a |
+ return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ safe_close(exec_fd);
|
|
|
4bff0a |
+ exec_fd = moved_fd;
|
|
|
4bff0a |
+ } else {
|
|
|
4bff0a |
+ /* This fd should be FD_CLOEXEC already, but let's make sure. */
|
|
|
4bff0a |
+ r = fd_cloexec(exec_fd, true);
|
|
|
4bff0a |
+ if (r < 0) {
|
|
|
4bff0a |
+ *exit_status = EXIT_FDS;
|
|
|
4bff0a |
+ return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ fds_with_exec_fd = newa(int, n_fds + 1);
|
|
|
4bff0a |
+ memcpy(fds_with_exec_fd, fds, n_fds * sizeof(int));
|
|
|
4bff0a |
+ fds_with_exec_fd[n_fds] = exec_fd;
|
|
|
4bff0a |
+ n_fds_with_exec_fd = n_fds + 1;
|
|
|
4bff0a |
+ } else {
|
|
|
4bff0a |
+ fds_with_exec_fd = fds;
|
|
|
4bff0a |
+ n_fds_with_exec_fd = n_fds;
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
|
|
|
4bff0a |
if (r >= 0)
|
|
|
4bff0a |
r = shift_fds(fds, n_fds);
|
|
|
4bff0a |
if (r >= 0)
|
|
|
4bff0a |
@@ -3177,6 +3216,11 @@ static int exec_child(
|
|
|
4bff0a |
return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
|
|
|
4bff0a |
}
|
|
|
4bff0a |
|
|
|
4bff0a |
+ /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
|
|
|
4bff0a |
+ * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
|
|
|
4bff0a |
+ * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
|
|
|
4bff0a |
+ * came this far. */
|
|
|
4bff0a |
+
|
|
|
4bff0a |
secure_bits = context->secure_bits;
|
|
|
4bff0a |
|
|
|
4bff0a |
if (needs_sandboxing) {
|
|
|
4bff0a |
@@ -3407,10 +3451,35 @@ static int exec_child(
|
|
|
4bff0a |
LOG_UNIT_INVOCATION_ID(unit));
|
|
|
4bff0a |
}
|
|
|
4bff0a |
|
|
|
4bff0a |
+ if (exec_fd >= 0) {
|
|
|
4bff0a |
+ uint8_t hot = 1;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ /* We have finished with all our initializations. Let's now let the manager know that. From this point
|
|
|
4bff0a |
+ * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ if (write(exec_fd, &hot, sizeof(hot)) < 0) {
|
|
|
4bff0a |
+ *exit_status = EXIT_EXEC;
|
|
|
4bff0a |
+ return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+
|
|
|
4bff0a |
execve(command->path, final_argv, accum_env);
|
|
|
4bff0a |
+ r = -errno;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ if (exec_fd >= 0) {
|
|
|
4bff0a |
+ uint8_t hot = 0;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
|
|
|
4bff0a |
+ * that POLLHUP on it no longer means execve() succeeded. */
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ if (write(exec_fd, &hot, sizeof(hot)) < 0) {
|
|
|
4bff0a |
+ *exit_status = EXIT_EXEC;
|
|
|
4bff0a |
+ return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
|
|
|
4bff0a |
- if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
|
|
|
4bff0a |
- log_struct_errno(LOG_INFO, errno,
|
|
|
4bff0a |
+ if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
|
|
|
4bff0a |
+ log_struct_errno(LOG_INFO, r,
|
|
|
4bff0a |
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
|
|
|
4bff0a |
LOG_UNIT_ID(unit),
|
|
|
4bff0a |
LOG_UNIT_INVOCATION_ID(unit),
|
|
|
4bff0a |
@@ -3421,7 +3490,7 @@ static int exec_child(
|
|
|
4bff0a |
}
|
|
|
4bff0a |
|
|
|
4bff0a |
*exit_status = EXIT_EXEC;
|
|
|
4bff0a |
- return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
|
|
|
4bff0a |
+ return log_unit_error_errno(unit, r, "Failed to execute command: %m");
|
|
|
4bff0a |
}
|
|
|
4bff0a |
|
|
|
4bff0a |
static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
|
|
|
4bff0a |
diff --git a/src/core/execute.h b/src/core/execute.h
|
|
|
4bff0a |
index f24dbf581a..bff1634b88 100644
|
|
|
4bff0a |
--- a/src/core/execute.h
|
|
|
4bff0a |
+++ b/src/core/execute.h
|
|
|
4bff0a |
@@ -316,6 +316,9 @@ struct ExecParameters {
|
|
|
4bff0a |
int stdin_fd;
|
|
|
4bff0a |
int stdout_fd;
|
|
|
4bff0a |
int stderr_fd;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ /* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done */
|
|
|
4bff0a |
+ int exec_fd;
|
|
|
4bff0a |
};
|
|
|
4bff0a |
|
|
|
4bff0a |
#include "unit.h"
|
|
|
4bff0a |
diff --git a/src/core/mount.c b/src/core/mount.c
|
|
|
4bff0a |
index 21437dad08..16229d4af1 100644
|
|
|
4bff0a |
--- a/src/core/mount.c
|
|
|
4bff0a |
+++ b/src/core/mount.c
|
|
|
4bff0a |
@@ -747,10 +747,11 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
|
|
|
4bff0a |
static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
|
|
|
4bff0a |
|
|
|
4bff0a |
ExecParameters exec_params = {
|
|
|
4bff0a |
- .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
|
|
|
4bff0a |
- .stdin_fd = -1,
|
|
|
4bff0a |
- .stdout_fd = -1,
|
|
|
4bff0a |
- .stderr_fd = -1,
|
|
|
4bff0a |
+ .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
|
|
|
4bff0a |
+ .stdin_fd = -1,
|
|
|
4bff0a |
+ .stdout_fd = -1,
|
|
|
4bff0a |
+ .stderr_fd = -1,
|
|
|
4bff0a |
+ .exec_fd = -1,
|
|
|
4bff0a |
};
|
|
|
4bff0a |
pid_t pid;
|
|
|
4bff0a |
int r;
|
|
|
4bff0a |
diff --git a/src/core/service.c b/src/core/service.c
|
|
|
4bff0a |
index 7f8ce1b998..3eab749362 100644
|
|
|
4bff0a |
--- a/src/core/service.c
|
|
|
4bff0a |
+++ b/src/core/service.c
|
|
|
4bff0a |
@@ -79,9 +79,10 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
|
|
|
4bff0a |
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING
|
|
|
4bff0a |
};
|
|
|
4bff0a |
|
|
|
4bff0a |
-static int service_dispatch_io(sd_event_source *source, int fd, uint32_t events, void *userdata);
|
|
|
4bff0a |
+static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata);
|
|
|
4bff0a |
static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
|
|
|
4bff0a |
static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata);
|
|
|
4bff0a |
+static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t events, void *userdata);
|
|
|
4bff0a |
|
|
|
4bff0a |
static void service_enter_signal(Service *s, ServiceState state, ServiceResult f);
|
|
|
4bff0a |
static void service_enter_reload_by_notify(Service *s);
|
|
|
4bff0a |
@@ -389,6 +390,7 @@ static void service_done(Unit *u) {
|
|
|
4bff0a |
service_stop_watchdog(s);
|
|
|
4bff0a |
|
|
|
4bff0a |
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
|
|
|
4bff0a |
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
|
|
|
4bff0a |
|
|
|
4bff0a |
service_release_resources(u);
|
|
|
4bff0a |
}
|
|
|
4bff0a |
@@ -1066,6 +1068,9 @@ static void service_set_state(Service *s, ServiceState state) {
|
|
|
4bff0a |
!(state == SERVICE_DEAD && UNIT(s)->job))
|
|
|
4bff0a |
service_close_socket_fd(s);
|
|
|
4bff0a |
|
|
|
4bff0a |
+ if (state != SERVICE_START)
|
|
|
4bff0a |
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
|
|
|
4bff0a |
+
|
|
|
4bff0a |
if (!IN_SET(state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
|
|
|
4bff0a |
service_stop_watchdog(s);
|
|
|
4bff0a |
|
|
|
4bff0a |
@@ -1296,6 +1301,63 @@ static int service_collect_fds(
|
|
|
4bff0a |
return 0;
|
|
|
4bff0a |
}
|
|
|
4bff0a |
|
|
|
4bff0a |
+static int service_allocate_exec_fd_event_source(
|
|
|
4bff0a |
+ Service *s,
|
|
|
4bff0a |
+ int fd,
|
|
|
4bff0a |
+ sd_event_source **ret_event_source) {
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL;
|
|
|
4bff0a |
+ int r;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ assert(s);
|
|
|
4bff0a |
+ assert(fd >= 0);
|
|
|
4bff0a |
+ assert(ret_event_source);
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ r = sd_event_add_io(UNIT(s)->manager->event, &source, fd, 0, service_dispatch_exec_io, s);
|
|
|
4bff0a |
+ if (r < 0)
|
|
|
4bff0a |
+ return log_unit_error_errno(UNIT(s), r, "Failed to allocate exec_fd event source: %m");
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ /* This is a bit lower priority than SIGCHLD, as that carries a lot more interesting failure information */
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ r = sd_event_source_set_priority(source, SD_EVENT_PRIORITY_NORMAL-3);
|
|
|
4bff0a |
+ if (r < 0)
|
|
|
4bff0a |
+ return log_unit_error_errno(UNIT(s), r, "Failed to adjust priority of exec_fd event source: %m");
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ (void) sd_event_source_set_description(source, "service event_fd");
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ r = sd_event_source_set_io_fd_own(source, true);
|
|
|
4bff0a |
+ if (r < 0)
|
|
|
4bff0a |
+ return log_unit_error_errno(UNIT(s), r, "Failed to pass ownership of fd to event source: %m");
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ *ret_event_source = TAKE_PTR(source);
|
|
|
4bff0a |
+ return 0;
|
|
|
4bff0a |
+}
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+static int service_allocate_exec_fd(
|
|
|
4bff0a |
+ Service *s,
|
|
|
4bff0a |
+ sd_event_source **ret_event_source,
|
|
|
4bff0a |
+ int* ret_exec_fd) {
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ _cleanup_close_pair_ int p[2] = { -1, -1 };
|
|
|
4bff0a |
+ int r;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ assert(s);
|
|
|
4bff0a |
+ assert(ret_event_source);
|
|
|
4bff0a |
+ assert(ret_exec_fd);
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ if (pipe2(p, O_CLOEXEC|O_NONBLOCK) < 0)
|
|
|
4bff0a |
+ return log_unit_error_errno(UNIT(s), errno, "Failed to allocate exec_fd pipe: %m");
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ r = service_allocate_exec_fd_event_source(s, p[0], ret_event_source);
|
|
|
4bff0a |
+ if (r < 0)
|
|
|
4bff0a |
+ return r;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ p[0] = -1;
|
|
|
4bff0a |
+ *ret_exec_fd = TAKE_FD(p[1]);
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ return 0;
|
|
|
4bff0a |
+}
|
|
|
4bff0a |
+
|
|
|
4bff0a |
static bool service_exec_needs_notify_socket(Service *s, ExecFlags flags) {
|
|
|
4bff0a |
assert(s);
|
|
|
4bff0a |
|
|
|
4bff0a |
@@ -1330,7 +1392,9 @@ static int service_spawn(
|
|
|
4bff0a |
.exec_fd = -1,
|
|
|
4bff0a |
};
|
|
|
4bff0a |
_cleanup_strv_free_ char **final_env = NULL, **our_env = NULL, **fd_names = NULL;
|
|
|
4bff0a |
+ _cleanup_(sd_event_source_unrefp) sd_event_source *exec_fd_source = NULL;
|
|
|
4bff0a |
size_t n_socket_fds = 0, n_storage_fds = 0, n_env = 0;
|
|
|
4bff0a |
+ _cleanup_close_ int exec_fd = -1;
|
|
|
4bff0a |
_cleanup_free_ int *fds = NULL;
|
|
|
4bff0a |
pid_t pid;
|
|
|
4bff0a |
int r;
|
|
|
4bff0a |
@@ -1363,6 +1427,14 @@ static int service_spawn(
|
|
|
4bff0a |
log_unit_debug(UNIT(s), "Passing %zu fds to service", n_socket_fds + n_storage_fds);
|
|
|
4bff0a |
}
|
|
|
4bff0a |
|
|
|
4bff0a |
+ if (!FLAGS_SET(flags, EXEC_IS_CONTROL) && s->type == SERVICE_EXEC) {
|
|
|
4bff0a |
+ assert(!s->exec_fd_event_source);
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ r = service_allocate_exec_fd(s, &exec_fd_source, &exec_fd);
|
|
|
4bff0a |
+ if (r < 0)
|
|
|
4bff0a |
+ return r;
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+
|
|
|
4bff0a |
r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout));
|
|
|
4bff0a |
if (r < 0)
|
|
|
4bff0a |
return r;
|
|
|
4bff0a |
@@ -1462,6 +1534,7 @@ static int service_spawn(
|
|
|
4bff0a |
exec_params.stdin_fd = s->stdin_fd;
|
|
|
4bff0a |
exec_params.stdout_fd = s->stdout_fd;
|
|
|
4bff0a |
exec_params.stderr_fd = s->stderr_fd;
|
|
|
4bff0a |
+ exec_params.exec_fd = exec_fd;
|
|
|
4bff0a |
|
|
|
4bff0a |
r = exec_spawn(UNIT(s),
|
|
|
4bff0a |
c,
|
|
|
4bff0a |
@@ -1473,6 +1546,9 @@ static int service_spawn(
|
|
|
4bff0a |
if (r < 0)
|
|
|
4bff0a |
return r;
|
|
|
4bff0a |
|
|
|
4bff0a |
+ s->exec_fd_event_source = TAKE_PTR(exec_fd_source);
|
|
|
4bff0a |
+ s->exec_fd_hot = false;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
r = unit_watch_pid(UNIT(s), pid);
|
|
|
4bff0a |
if (r < 0) /* FIXME: we need to do something here */
|
|
|
4bff0a |
return r;
|
|
|
4bff0a |
@@ -1984,14 +2060,12 @@ static void service_enter_start(Service *s) {
|
|
|
4bff0a |
s->control_pid = pid;
|
|
|
4bff0a |
service_set_state(s, SERVICE_START);
|
|
|
4bff0a |
|
|
|
4bff0a |
- } else if (IN_SET(s->type, SERVICE_ONESHOT, SERVICE_DBUS, SERVICE_NOTIFY)) {
|
|
|
4bff0a |
+ } else if (IN_SET(s->type, SERVICE_ONESHOT, SERVICE_DBUS, SERVICE_NOTIFY, SERVICE_EXEC)) {
|
|
|
4bff0a |
|
|
|
4bff0a |
- /* For oneshot services we wait until the start
|
|
|
4bff0a |
- * process exited, too, but it is our main process. */
|
|
|
4bff0a |
+ /* For oneshot services we wait until the start process exited, too, but it is our main process. */
|
|
|
4bff0a |
|
|
|
4bff0a |
- /* For D-Bus services we know the main pid right away,
|
|
|
4bff0a |
- * but wait for the bus name to appear on the
|
|
|
4bff0a |
- * bus. Notify services are similar. */
|
|
|
4bff0a |
+ /* For D-Bus services we know the main pid right away, but wait for the bus name to appear on the
|
|
|
4bff0a |
+ * bus. 'notify' and 'exec' services are similar. */
|
|
|
4bff0a |
|
|
|
4bff0a |
service_set_main_pid(s, pid);
|
|
|
4bff0a |
service_set_state(s, SERVICE_START);
|
|
|
4bff0a |
@@ -2444,6 +2518,13 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
|
|
|
4bff0a |
if (r < 0)
|
|
|
4bff0a |
return r;
|
|
|
4bff0a |
|
|
|
4bff0a |
+ if (s->exec_fd_event_source) {
|
|
|
4bff0a |
+ r = unit_serialize_item_fd(u, f, fds, "exec-fd", sd_event_source_get_io_fd(s->exec_fd_event_source));
|
|
|
4bff0a |
+ if (r < 0)
|
|
|
4bff0a |
+ return r;
|
|
|
4bff0a |
+ unit_serialize_item(u, f, "exec-fd-hot", yes_no(s->exec_fd_hot));
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+
|
|
|
4bff0a |
if (UNIT_ISSET(s->accept_socket)) {
|
|
|
4bff0a |
r = unit_serialize_item(u, f, "accept-socket", UNIT_DEREF(s->accept_socket)->id);
|
|
|
4bff0a |
if (r < 0)
|
|
|
4bff0a |
@@ -2777,6 +2858,18 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
|
|
|
4bff0a |
s->stderr_fd = fdset_remove(fds, fd);
|
|
|
4bff0a |
s->exec_context.stdio_as_fds = true;
|
|
|
4bff0a |
}
|
|
|
4bff0a |
+ } else if (streq(key, "exec-fd")) {
|
|
|
4bff0a |
+ int fd;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
|
|
|
4bff0a |
+ log_unit_debug(u, "Failed to parse exec-fd value: %s", value);
|
|
|
4bff0a |
+ else {
|
|
|
4bff0a |
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ fd = fdset_remove(fds, fd);
|
|
|
4bff0a |
+ if (service_allocate_exec_fd_event_source(s, fd, &s->exec_fd_event_source) < 0)
|
|
|
4bff0a |
+ safe_close(fd);
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
} else if (streq(key, "watchdog-override-usec")) {
|
|
|
4bff0a |
usec_t watchdog_override_usec;
|
|
|
4bff0a |
if (timestamp_deserialize(value, &watchdog_override_usec) < 0)
|
|
|
4bff0a |
@@ -2860,7 +2953,7 @@ static int service_watch_pid_file(Service *s) {
|
|
|
4bff0a |
|
|
|
4bff0a |
log_unit_debug(UNIT(s), "Setting watch for PID file %s", s->pid_file_pathspec->path);
|
|
|
4bff0a |
|
|
|
4bff0a |
- r = path_spec_watch(s->pid_file_pathspec, service_dispatch_io);
|
|
|
4bff0a |
+ r = path_spec_watch(s->pid_file_pathspec, service_dispatch_inotify_io);
|
|
|
4bff0a |
if (r < 0)
|
|
|
4bff0a |
goto fail;
|
|
|
4bff0a |
|
|
|
4bff0a |
@@ -2904,7 +2997,7 @@ static int service_demand_pid_file(Service *s) {
|
|
|
4bff0a |
return service_watch_pid_file(s);
|
|
|
4bff0a |
}
|
|
|
4bff0a |
|
|
|
4bff0a |
-static int service_dispatch_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
|
|
|
4bff0a |
+static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
|
|
|
4bff0a |
PathSpec *p = userdata;
|
|
|
4bff0a |
Service *s;
|
|
|
4bff0a |
|
|
|
4bff0a |
@@ -2937,6 +3030,59 @@ fail:
|
|
|
4bff0a |
return 0;
|
|
|
4bff0a |
}
|
|
|
4bff0a |
|
|
|
4bff0a |
+static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
|
|
|
4bff0a |
+ Service *s = SERVICE(userdata);
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ assert(s);
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ log_unit_debug(UNIT(s), "got exec-fd event");
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ /* If Type=exec is set, we'll consider a service started successfully the instant we invoked execve()
|
|
|
4bff0a |
+ * successfully for it. We implement this through a pipe() towards the child, which the kernel automatically
|
|
|
4bff0a |
+ * closes for us due to O_CLOEXEC on execve() in the child, which then triggers EOF on the pipe in the
|
|
|
4bff0a |
+ * parent. We need to be careful however, as there are other reasons that we might cause the child's side of
|
|
|
4bff0a |
+ * the pipe to be closed (for example, a simple exit()). To deal with that we'll ignore EOFs on the pipe unless
|
|
|
4bff0a |
+ * the child signalled us first that it is about to call the execve(). It does so by sending us a simple
|
|
|
4bff0a |
+ * non-zero byte via the pipe. We also provide the child with a way to inform us in case execve() failed: if it
|
|
|
4bff0a |
+ * sends a zero byte we'll ignore POLLHUP on the fd again. */
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ for (;;) {
|
|
|
4bff0a |
+ uint8_t x;
|
|
|
4bff0a |
+ ssize_t n;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ n = read(fd, &x, sizeof(x));
|
|
|
4bff0a |
+ if (n < 0) {
|
|
|
4bff0a |
+ if (errno == EAGAIN) /* O_NONBLOCK in effect → everything queued has now been processed. */
|
|
|
4bff0a |
+ return 0;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ return log_unit_error_errno(UNIT(s), errno, "Failed to read from exec_fd: %m");
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+ if (n == 0) { /* EOF → the event we are waiting for */
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ if (s->exec_fd_hot) { /* Did the child tell us to expect EOF now? */
|
|
|
4bff0a |
+ log_unit_debug(UNIT(s), "Got EOF on exec-fd");
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ s->exec_fd_hot = false;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ /* Nice! This is what we have been waiting for. Transition to next state. */
|
|
|
4bff0a |
+ if (s->type == SERVICE_EXEC && s->state == SERVICE_START)
|
|
|
4bff0a |
+ service_enter_start_post(s);
|
|
|
4bff0a |
+ } else
|
|
|
4bff0a |
+ log_unit_debug(UNIT(s), "Got EOF on exec-fd while it was disabled, ignoring.");
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ return 0;
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ /* A byte was read → this turns on/off the exec fd logic */
|
|
|
4bff0a |
+ assert(n == sizeof(x));
|
|
|
4bff0a |
+ s->exec_fd_hot = x;
|
|
|
4bff0a |
+ }
|
|
|
4bff0a |
+
|
|
|
4bff0a |
+ return 0;
|
|
|
4bff0a |
+}
|
|
|
4bff0a |
+
|
|
|
4bff0a |
static void service_notify_cgroup_empty_event(Unit *u) {
|
|
|
4bff0a |
Service *s = SERVICE(u);
|
|
|
4bff0a |
|
|
|
4bff0a |
@@ -3850,7 +3996,8 @@ static const char* const service_type_table[_SERVICE_TYPE_MAX] = {
|
|
|
4bff0a |
[SERVICE_ONESHOT] = "oneshot",
|
|
|
4bff0a |
[SERVICE_DBUS] = "dbus",
|
|
|
4bff0a |
[SERVICE_NOTIFY] = "notify",
|
|
|
4bff0a |
- [SERVICE_IDLE] = "idle"
|
|
|
4bff0a |
+ [SERVICE_IDLE] = "idle",
|
|
|
4bff0a |
+ [SERVICE_EXEC] = "exec",
|
|
|
4bff0a |
};
|
|
|
4bff0a |
|
|
|
4bff0a |
DEFINE_STRING_TABLE_LOOKUP(service_type, ServiceType);
|
|
|
4bff0a |
diff --git a/src/core/service.h b/src/core/service.h
|
|
|
4bff0a |
index a142b09f0d..1206e3cdda 100644
|
|
|
4bff0a |
--- a/src/core/service.h
|
|
|
4bff0a |
+++ b/src/core/service.h
|
|
|
4bff0a |
@@ -30,6 +30,7 @@ typedef enum ServiceType {
|
|
|
4bff0a |
SERVICE_DBUS, /* we fork and wait until a specific D-Bus name appears on the bus */
|
|
|
4bff0a |
SERVICE_NOTIFY, /* we fork and wait until a daemon sends us a ready message with sd_notify() */
|
|
|
4bff0a |
SERVICE_IDLE, /* much like simple, but delay exec() until all jobs are dispatched. */
|
|
|
4bff0a |
+ SERVICE_EXEC, /* we fork and wait until we execute exec() (this means our own setup is waited for) */
|
|
|
4bff0a |
_SERVICE_TYPE_MAX,
|
|
|
4bff0a |
_SERVICE_TYPE_INVALID = -1
|
|
|
4bff0a |
} ServiceType;
|
|
|
4bff0a |
@@ -165,6 +166,8 @@ struct Service {
|
|
|
4bff0a |
NotifyAccess notify_access;
|
|
|
4bff0a |
NotifyState notify_state;
|
|
|
4bff0a |
|
|
|
4bff0a |
+ sd_event_source *exec_fd_event_source;
|
|
|
4bff0a |
+
|
|
|
4bff0a |
ServiceFDStore *fd_store;
|
|
|
4bff0a |
size_t n_fd_store;
|
|
|
4bff0a |
unsigned n_fd_store_max;
|
|
|
4bff0a |
@@ -179,6 +182,7 @@ struct Service {
|
|
|
4bff0a |
|
|
|
4bff0a |
unsigned n_restarts;
|
|
|
4bff0a |
bool flush_n_restarts;
|
|
|
4bff0a |
+ bool exec_fd_hot;
|
|
|
4bff0a |
};
|
|
|
4bff0a |
|
|
|
4bff0a |
extern const UnitVTable service_vtable;
|
|
|
4bff0a |
diff --git a/src/core/socket.c b/src/core/socket.c
|
|
|
4bff0a |
index 56d32225c4..d488c64e91 100644
|
|
|
4bff0a |
--- a/src/core/socket.c
|
|
|
4bff0a |
+++ b/src/core/socket.c
|
|
|
4bff0a |
@@ -1867,10 +1867,11 @@ static int socket_coldplug(Unit *u) {
|
|
|
4bff0a |
static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
|
|
|
4bff0a |
|
|
|
4bff0a |
ExecParameters exec_params = {
|
|
|
4bff0a |
- .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
|
|
|
4bff0a |
- .stdin_fd = -1,
|
|
|
4bff0a |
- .stdout_fd = -1,
|
|
|
4bff0a |
- .stderr_fd = -1,
|
|
|
4bff0a |
+ .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
|
|
|
4bff0a |
+ .stdin_fd = -1,
|
|
|
4bff0a |
+ .stdout_fd = -1,
|
|
|
4bff0a |
+ .stderr_fd = -1,
|
|
|
4bff0a |
+ .exec_fd = -1,
|
|
|
4bff0a |
};
|
|
|
4bff0a |
pid_t pid;
|
|
|
4bff0a |
int r;
|
|
|
4bff0a |
diff --git a/src/core/swap.c b/src/core/swap.c
|
|
|
4bff0a |
index b78b1aa266..e01e61e56d 100644
|
|
|
4bff0a |
--- a/src/core/swap.c
|
|
|
4bff0a |
+++ b/src/core/swap.c
|
|
|
4bff0a |
@@ -606,6 +606,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
|
|
|
4bff0a |
.stdin_fd = -1,
|
|
|
4bff0a |
.stdout_fd = -1,
|
|
|
4bff0a |
.stderr_fd = -1,
|
|
|
4bff0a |
+ .exec_fd = -1,
|
|
|
4bff0a |
};
|
|
|
4bff0a |
pid_t pid;
|
|
|
4bff0a |
int r;
|