From c0f32feb77768aa76d8c813471b3484c93bc2651 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 5 Jan 2018 12:20:22 +0100 Subject: [PATCH] core: be stricter when handling PID files and MAINPID sd_notify() messages Let's be more restrictive when validating PID files and MAINPID= messages: don't accept PIDs that make no sense, and if the configuration source is not trusted, don't accept out-of-cgroup PIDs. A configuratin source is considered trusted when the PID file is owned by root, or the message was received from root. This should lock things down a bit, in case service authors write out PID files from unprivileged code or use NotifyAccess=all with unprivileged code. Note that doing so was always problematic, just now it's a bit less problematic. When we open the PID file we'll now use the CHASE_SAFE chase_symlinks() logic, to ensure that we won't follow an unpriviled-owned symlink to a privileged-owned file thinking this was a valid privileged PID file, even though it really isn't. Fixes: #6632 (cherry picked from commit db256aab13d8a89d583ecd2bacf0aca87c66effc) Resolves: #1663143 --- man/systemd.service.xml | 18 ++- src/core/manager.c | 17 ++- src/core/service.c | 166 ++++++++++++++++------ src/core/unit.h | 2 +- test/TEST-20-MAINPIDGAMES/Makefile | 1 + test/TEST-20-MAINPIDGAMES/test.sh | 81 +++++++++++ test/TEST-20-MAINPIDGAMES/testsuite.sh | 189 +++++++++++++++++++++++++ test/test-functions | 2 +- 8 files changed, 418 insertions(+), 58 deletions(-) create mode 120000 test/TEST-20-MAINPIDGAMES/Makefile create mode 100755 test/TEST-20-MAINPIDGAMES/test.sh create mode 100755 test/TEST-20-MAINPIDGAMES/testsuite.sh diff --git a/man/systemd.service.xml b/man/systemd.service.xml index d147e449a6..565a783f72 100644 --- a/man/systemd.service.xml +++ b/man/systemd.service.xml @@ -221,16 +221,14 @@ PIDFile= - Takes an absolute file name pointing to the - PID file of this daemon. Use of this option is recommended for - services where Type= is set to - . systemd will read the PID of the - main process of the daemon after start-up of the service. - systemd will not write to the file configured here, although - it will remove the file after the service has shut down if it - still exists. - - + Takes an absolute path referring to the PID file of the service. Usage of this option is + recommended for services where Type= is set to . The service manager + will read the PID of the main process of the service from this file after start-up of the service. The service + manager will not write to the file configured here, although it will remove the file after the service has shut + down if it still exists. The PID file does not need to be owned by a privileged user, but if it is owned by an + unprivileged user additional safety restrictions are enforced: the file may not be a symlink to a file owned by + a different user (neither directly nor indirectly), and the PID file must refer to a process already belonging + to the service. diff --git a/src/core/manager.c b/src/core/manager.c index 73d6c81fdb..3bca61d0b1 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1658,11 +1658,18 @@ static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, ui return 0; } -static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const char *buf, FDSet *fds) { +static void manager_invoke_notify_message( + Manager *m, + Unit *u, + const struct ucred *ucred, + const char *buf, + FDSet *fds) { + _cleanup_strv_free_ char **tags = NULL; assert(m); assert(u); + assert(ucred); assert(buf); tags = strv_split(buf, "\n\r"); @@ -1674,7 +1681,7 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const log_unit_debug(u->id, "Got notification message for unit %s", u->id); if (UNIT_VTABLE(u)->notify_message) - UNIT_VTABLE(u)->notify_message(u, pid, tags, fds); + UNIT_VTABLE(u)->notify_message(u, ucred, tags, fds); else if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) { _cleanup_free_ char *x = NULL, *y = NULL; @@ -1777,19 +1784,19 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t * to avoid notifying the same one multiple times. */ u1 = manager_get_unit_by_pid(m, ucred->pid); if (u1) { - manager_invoke_notify_message(m, u1, ucred->pid, buf, fds); + manager_invoke_notify_message(m, u1, ucred, buf, fds); found = true; } u2 = hashmap_get(m->watch_pids1, LONG_TO_PTR(ucred->pid)); if (u2 && u2 != u1) { - manager_invoke_notify_message(m, u2, ucred->pid, buf, fds); + manager_invoke_notify_message(m, u2, ucred, buf, fds); found = true; } u3 = hashmap_get(m->watch_pids2, LONG_TO_PTR(ucred->pid)); if (u3 && u3 != u2 && u3 != u1) { - manager_invoke_notify_message(m, u3, ucred->pid, buf, fds); + manager_invoke_notify_message(m, u3, ucred, buf, fds); found = true; } diff --git a/src/core/service.c b/src/core/service.c index fe6e2ff17c..06b39e3a5a 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -700,9 +700,45 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { } } +static int service_is_suitable_main_pid(Service *s, pid_t pid, int prio) { + Unit *owner; + + assert(s); + assert(pid > 0); + + /* Checks whether the specified PID is suitable as main PID for this service. returns negative if not, 0 if the + * PID is questionnable but should be accepted if the source of configuration is trusted. > 0 if the PID is + * good */ + + if (pid == getpid() || pid == 1) { + log_unit_full(UNIT(s)->id, prio, "New main PID "PID_FMT" is the manager, refusing.", pid); + return -EPERM; + } + + if (pid == s->control_pid) { + log_unit_full(UNIT(s)->id, prio, "New main PID "PID_FMT" is the control process, refusing.", pid); + return -EPERM; + } + + if (!pid_is_alive(pid)) { + log_unit_full(UNIT(s)->id, prio, "New main PID "PID_FMT" does not exist or is a zombie.", pid); + return -ESRCH; + } + + owner = manager_get_unit_by_pid(UNIT(s)->manager, pid); + if (owner == UNIT(s)) { + log_unit_debug(UNIT(s)->id, "New main PID "PID_FMT" belongs to service, we are happy.", pid); + return 1; /* Yay, it's definitely a good PID */ + } + + return 0; /* Hmm it's a suspicious PID, let's accept it if configuration source is trusted */ +} + static int service_load_pid_file(Service *s, bool may_warn) { + char procfs[sizeof("/proc/self/fd/") - 1 + DECIMAL_STR_MAX(int)]; _cleanup_free_ char *k = NULL; - int r; + _cleanup_close_ int fd = -1; + int r, prio; pid_t pid; assert(s); @@ -710,30 +746,47 @@ static int service_load_pid_file(Service *s, bool may_warn) { if (!s->pid_file) return -ENOENT; - r = read_one_line_file(s->pid_file, &k); - if (r < 0) { - if (may_warn) - log_unit_info(UNIT(s)->id, "PID file %s not readable (yet?) after %s.", s->pid_file, service_state_to_string(s->state)); - return r; - } + prio = may_warn ? LOG_INFO : LOG_DEBUG; + + fd = chase_symlinks(s->pid_file, NULL, CHASE_OPEN|CHASE_SAFE, NULL); + if (fd == -EPERM) + return log_unit_full(UNIT(s)->id, prio, "Permission denied while opening PID file or unsafe symlink chain: %s", s->pid_file); + if (fd < 0) + return log_unit_full(UNIT(s)->id, prio, "Can't open PID file %s (yet?) after %s: %m", s->pid_file, service_state_to_string(s->state)); + + /* Let's read the PID file now that we chased it down. But we need to convert the O_PATH fd chase_symlinks() returned us into a proper fd first. */ + xsprintf(procfs, "/proc/self/fd/%i", fd); + r = read_one_line_file(procfs, &k); + if (r < 0) + return log_unit_error_errno(UNIT(s)->id, r, "Can't convert PID files %s O_PATH file descriptor to proper file descriptor: %m", s->pid_file); r = parse_pid(k, &pid); - if (r < 0) { - if (may_warn) - log_unit_info_errno(UNIT(s)->id, r, "Failed to read PID from file %s: %m", s->pid_file); + if (r < 0) + return log_unit_full(UNIT(s)->id, prio, "Failed to parse PID from file %s: %m", s->pid_file); + + if (s->main_pid_known && pid == s->main_pid) + return 0; + + r = service_is_suitable_main_pid(s, pid, prio); + if (r < 0) return r; - } + if (r == 0) { + struct stat st; - if (!pid_is_alive(pid)) { - if (may_warn) - log_unit_info(UNIT(s)->id, "PID "PID_FMT" read from file %s does not exist or is a zombie.", pid, s->pid_file); - return -ESRCH; + /* Hmm, it's not clear if the new main PID is safe. Let's allow this if the PID file is owned by root */ + + if (fstat(fd, &st) < 0) + return log_unit_error_errno(UNIT(s)->id, errno, "Failed to fstat() PID file O_PATH fd: %m"); + + if (st.st_uid != 0) { + log_unit_error(UNIT(s)->id, "New main PID "PID_FMT" does not belong to service, and PID file is not owned by root. Refusing.", pid); + return -EPERM; + } + + log_unit_debug(UNIT(s)->id, "New main PID "PID_FMT" does not belong to service, but we'll accept it since PID file is owned by root.", pid); } if (s->main_pid_known) { - if (pid == s->main_pid) - return 0; - log_unit_debug(UNIT(s)->id, "Main PID changing: "PID_FMT" -> "PID_FMT, s->main_pid, pid); service_unwatch_main_pid(s); @@ -752,7 +805,7 @@ static int service_load_pid_file(Service *s, bool may_warn) { return r; } - return 0; + return 1; } static int service_search_main_pid(Service *s) { @@ -2584,7 +2637,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* Forking services may occasionally move to a new PID. * As long as they update the PID file before exiting the old * PID, they're fine. */ - if (service_load_pid_file(s, false) == 0) + if (service_load_pid_file(s, false) > 0) return; s->main_pid = 0; @@ -2957,42 +3010,73 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void return 0; } -static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds) { +static bool service_notify_message_authorized(Service *s, pid_t pid, char **tags, FDSet *fds) { + assert(s); + + if (s->notify_access == NOTIFY_NONE) { + log_unit_warning(UNIT(s)->id, "Got notification message from PID "PID_FMT", but reception is disabled.", pid); + return false; + } + + if (s->notify_access == NOTIFY_MAIN && pid != s->main_pid) { + if (s->main_pid != 0) + log_unit_warning(UNIT(s)->id, "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT, pid, s->main_pid); + else + log_unit_warning(UNIT(s)->id, "Got notification message from PID "PID_FMT", but reception only permitted for main PID which is currently not known", pid); + + return false; + } + + return true; +} + +static void service_notify_message( + Unit *u, + const struct ucred *ucred, + char **tags, + FDSet *fds) { Service *s = SERVICE(u); - _cleanup_free_ char *cc = NULL; bool notify_dbus = false; const char *e; + int r; assert(u); + assert(ucred); - cc = strv_join(tags, ", "); - log_unit_debug(u->id, "%s: Got notification message from PID "PID_FMT" (%s)", - u->id, pid, isempty(cc) ? "n/a" : cc); + if (!service_notify_message_authorized(SERVICE(u), ucred->pid, tags, fds)) + return; if (s->notify_access == NOTIFY_NONE) { - log_unit_warning(u->id, "%s: Got notification message from PID "PID_FMT", but reception is disabled.", u->id, pid); - return; - } + _cleanup_free_ char *cc = NULL; - if (s->notify_access == NOTIFY_MAIN && pid != s->main_pid) { - if (s->main_pid != 0) - log_unit_warning(u->id, "%s: Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT, u->id, pid, s->main_pid); - else - log_unit_debug(u->id, "%s: Got notification message from PID "PID_FMT", but reception only permitted for main PID which is currently not known", u->id, pid); - return; + cc = strv_join(tags, ", "); + log_unit_debug(u->id, "Got notification message from PID "PID_FMT" (%s)", ucred->pid, isempty(cc) ? "n/a" : cc); } /* Interpret MAINPID= */ e = strv_find_startswith(tags, "MAINPID="); if (e && IN_SET(s->state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD)) { - if (parse_pid(e, &pid) < 0) - log_unit_warning(u->id, "Failed to parse MAINPID= field in notification message: %s", e); - else { - log_unit_debug(u->id, "%s: got MAINPID=%s", u->id, e); + pid_t new_main_pid; - service_set_main_pid(s, pid); - unit_watch_pid(UNIT(s), pid); - notify_dbus = true; + if (parse_pid(e, &new_main_pid) < 0) + log_unit_warning(u->id, "Failed to parse MAINPID= field in notification message, ignoring: %s", e); + else if (!s->main_pid_known || new_main_pid != s->main_pid) { + + r = service_is_suitable_main_pid(s, new_main_pid, LOG_WARNING); + if (r == 0) { + /* The new main PID is a bit suspicous, which is OK if the sender is privileged. */ + + if (ucred->uid == 0) { + log_unit_debug(u->id, "New main PID "PID_FMT" does not belong to service, but we'll accept it as the request to change it came from a privileged process.", new_main_pid); + r = 1; + } else + log_unit_debug(u->id, "New main PID "PID_FMT" does not belong to service, refusing.", new_main_pid); + } + if (r > 0) { + service_set_main_pid(s, new_main_pid); + unit_watch_pid(UNIT(s), new_main_pid); + notify_dbus = true; + } } } diff --git a/src/core/unit.h b/src/core/unit.h index dfec9cea01..091ef7596e 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -376,7 +376,7 @@ struct UnitVTable { void (*notify_cgroup_empty)(Unit *u); /* Called whenever a process of this unit sends us a message */ - void (*notify_message)(Unit *u, pid_t pid, char **tags, FDSet *fds); + void (*notify_message)(Unit *u, const struct ucred *ucred, char **tags, FDSet *fds); /* Called whenever a name this Unit registered for comes or * goes away. */ diff --git a/test/TEST-20-MAINPIDGAMES/Makefile b/test/TEST-20-MAINPIDGAMES/Makefile new file mode 120000 index 0000000000..e9f93b1104 --- /dev/null +++ b/test/TEST-20-MAINPIDGAMES/Makefile @@ -0,0 +1 @@ +../TEST-01-BASIC/Makefile \ No newline at end of file diff --git a/test/TEST-20-MAINPIDGAMES/test.sh b/test/TEST-20-MAINPIDGAMES/test.sh new file mode 100755 index 0000000000..733532b718 --- /dev/null +++ b/test/TEST-20-MAINPIDGAMES/test.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*- +# ex: ts=8 sw=4 sts=4 et filetype=sh +TEST_DESCRIPTION="test changing main PID" + +. $TEST_BASE_DIR/test-functions + +check_result_qemu() { + ret=1 + mkdir -p $TESTDIR/root + mount ${LOOPDEV}p1 $TESTDIR/root + [[ -e $TESTDIR/root/testok ]] && ret=0 + [[ -f $TESTDIR/root/failed ]] && cp -a $TESTDIR/root/failed $TESTDIR + [[ -f $TESTDIR/root/var/log/journal ]] && cp -a $TESTDIR/root/var/log/journal $TESTDIR + umount $TESTDIR/root + [[ -f $TESTDIR/failed ]] && cat $TESTDIR/failed + ls -l $TESTDIR/journal/*/*.journal + test -s $TESTDIR/failed && ret=$(($ret+1)) + return $ret +} + +test_run() { + if run_qemu; then + check_result_qemu || return 1 + else + dwarn "can't run QEMU, skipping" + fi + if check_nspawn; then + run_nspawn + check_result_nspawn || return 1 + else + dwarn "can't run systemd-nspawn, skipping" + fi + return 0 +} + +test_setup() { + create_empty_image + mkdir -p $TESTDIR/root + mount ${LOOPDEV}p1 $TESTDIR/root + + ( + LOG_LEVEL=5 + eval $(udevadm info --export --query=env --name=${LOOPDEV}p2) + + setup_basic_environment + inst_binary cut + inst_binary useradd + inst /etc/login.defs + + # setup the testsuite service + cat >$initdir/etc/systemd/system/testsuite.service </dev/null + [[ $LOOPDEV ]] && losetup -d $LOOPDEV + return 0 +} + +do_test "$@" diff --git a/test/TEST-20-MAINPIDGAMES/testsuite.sh b/test/TEST-20-MAINPIDGAMES/testsuite.sh new file mode 100755 index 0000000000..d4ad63865c --- /dev/null +++ b/test/TEST-20-MAINPIDGAMES/testsuite.sh @@ -0,0 +1,189 @@ +#!/bin/bash +# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*- +# ex: ts=8 sw=4 sts=4 et filetype=sh +set -ex +set -o pipefail + +systemctl_show_value() { + systemctl show "$@" | cut -d = -f 2- +} + +systemd-analyze set-log-level debug + +test `systemctl_show_value -p MainPID testsuite.service` -eq $$ + +# Start a test process inside of our own cgroup +sleep infinity & +INTERNALPID=$! +disown + +# Start a test process outside of our own cgroup +systemd-run -p User=test --unit=sleep.service /bin/sleep infinity +EXTERNALPID=`systemctl_show_value -p MainPID sleep.service` + +# Update our own main PID to the external test PID, this should work +systemd-notify MAINPID=$EXTERNALPID +test `systemctl_show_value -p MainPID testsuite.service` -eq $EXTERNALPID + +# Update our own main PID to the internal test PID, this should work, too +systemd-notify MAINPID=$INTERNALPID +test `systemctl_show_value -p MainPID testsuite.service` -eq $INTERNALPID + +# Update it back to our own PID, this should also work +systemd-notify MAINPID=$$ +test `systemctl_show_value -p MainPID testsuite.service` -eq $$ + +# Try to set it to PID 1, which it should ignore, because that's the manager +systemd-notify MAINPID=1 +test `systemctl_show_value -p MainPID testsuite.service` -eq $$ + +# Try to set it to PID 0, which is invalid and should be ignored +systemd-notify MAINPID=0 +test `systemctl_show_value -p MainPID testsuite.service` -eq $$ + +# Try to set it to a valid but non-existing PID, which should be ignored. (Note +# that we set the PID to a value well above any known /proc/sys/kernel/pid_max, +# which means we can be pretty sure it doesn't exist by coincidence) +systemd-notify MAINPID=1073741824 +test `systemctl_show_value -p MainPID testsuite.service` -eq $$ + +# Change it again to the external PID, without priviliges this time. This should be ignored, because the PID is from outside of our cgroup and we lack privileges. +systemd-notify --uid=1000 MAINPID=$EXTERNALPID +test `systemctl_show_value -p MainPID testsuite.service` -eq $$ + +# Change it again to the internal PID, without priviliges this time. This should work, as the process is on our cgroup, and that's enough even if we lack privileges. +systemd-notify --uid=1000 MAINPID=$INTERNALPID +test `systemctl_show_value -p MainPID testsuite.service` -eq $INTERNALPID + +# Update it back to our own PID, this should also work +systemd-notify --uid=1000 MAINPID=$$ +test `systemctl_show_value -p MainPID testsuite.service` -eq $$ + +cat >/tmp/mainpid.sh < /run/mainpidsh/pid +EOF +chmod +x /tmp/mainpid.sh + +cat > /etc/systemd/system/mainpidsh.service </tmp/mainpid2.sh < /run/mainpidsh2/pid +chown 1001:1001 /run/mainpidsh2/pid +EOF +chmod +x /tmp/mainpid2.sh + +cat > /etc/systemd/system/mainpidsh2.service </dev/shm/mainpid3.sh < /etc/systemd/system/mainpidsh3.service < the pid file was ignored +test `systemctl_show_value -p MainPID mainpidsh3.service` -eq 0 + +systemd-analyze set-log-level info + +echo OK > /testok + +exit 0 diff --git a/test/test-functions b/test/test-functions index 78e725d5b9..e50ce556fd 100644 --- a/test/test-functions +++ b/test/test-functions @@ -12,7 +12,7 @@ if ! ROOTLIBDIR=$(pkg-config --variable=systemdutildir systemd); then ROOTLIBDIR=/usr/lib/systemd fi -BASICTOOLS="sh bash setsid loadkeys setfont login sulogin gzip sleep echo mount umount cryptsetup date dmsetup modprobe" +BASICTOOLS="test sh bash setsid loadkeys setfont login sulogin gzip sleep echo mount umount cryptsetup date dmsetup modprobe chmod chown ln" DEBUGTOOLS="df free ls stty cat ps ln ip route dmesg dhclient mkdir cp ping dhclient strace less grep id tty touch du sort hostname" function find_qemu_bin() {