be0c12
From a42cf9af339f48f633fa0b17a960e1e407b7450f Mon Sep 17 00:00:00 2001
be0c12
From: Lorenz Bauer <lmb@cloudflare.com>
be0c12
Date: Mon, 4 Nov 2019 16:35:46 +0000
be0c12
Subject: [PATCH] journal: refresh cached credentials of stdout streams
be0c12
be0c12
journald assumes that getsockopt(SO_PEERCRED) correctly identifies the
be0c12
process on the remote end of the socket. However, this is incorrect
be0c12
according to man 7 socket:
be0c12
be0c12
    The returned  credentials  are  those that were in effect at the
be0c12
    time of the call to connect(2) or socketpair(2).
be0c12
be0c12
This becomes a problem when a new process inherits the stdout stream
be0c12
from a parent. First, log messages from the child process will
be0c12
be attributed to the parent. Second, the struct ucred used by journald
be0c12
becomes invalid as soon as the parent exits. Further sendmsg calls then
be0c12
fail with ENOENT. Logs for the child process then vanish from the journal.
be0c12
be0c12
Fix this by using recvmsg on the stdout stream, and refreshing the cached
be0c12
struct ucred if SCM_CREDENTIALS indicate a new process.
be0c12
be0c12
Fixes #13708
be0c12
be0c12
(cherry picked from commit 09d0b46ab61bebafe5bdc1be95ee153dfb13d6bc)
be0c12
be0c12
Resolves: #1931806
be0c12
---
be0c12
 src/journal/journald-stream.c        | 49 ++++++++++++++++++++++++++--
be0c12
 test/TEST-04-JOURNAL/test-journal.sh | 13 ++++++++
be0c12
 2 files changed, 60 insertions(+), 2 deletions(-)
be0c12
be0c12
diff --git a/src/journal/journald-stream.c b/src/journal/journald-stream.c
be0c12
index 6f8a4011ff..302a82d3d7 100644
be0c12
--- a/src/journal/journald-stream.c
be0c12
+++ b/src/journal/journald-stream.c
be0c12
@@ -484,11 +484,22 @@ static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
be0c12
 }
be0c12
 
be0c12
 static int stdout_stream_process(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
be0c12
+        uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
be0c12
         StdoutStream *s = userdata;
be0c12
+        struct ucred *ucred = NULL;
be0c12
+        struct cmsghdr *cmsg;
be0c12
+        struct iovec iovec;
be0c12
         size_t limit;
be0c12
         ssize_t l;
be0c12
         int r;
be0c12
 
be0c12
+        struct msghdr msghdr = {
be0c12
+                .msg_iov = &iovec,
be0c12
+                .msg_iovlen = 1,
be0c12
+                .msg_control = buf,
be0c12
+                .msg_controllen = sizeof(buf),
be0c12
+        };
be0c12
+
be0c12
         assert(s);
be0c12
 
be0c12
         if ((revents|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
be0c12
@@ -508,20 +519,50 @@ static int stdout_stream_process(sd_event_source *es, int fd, uint32_t revents,
be0c12
          * always leave room for a terminating NUL we might need to add. */
be0c12
         limit = MIN(s->allocated - 1, s->server->line_max);
be0c12
 
be0c12
-        l = read(s->fd, s->buffer + s->length, limit - s->length);
be0c12
+        iovec = IOVEC_MAKE(s->buffer + s->length, limit - s->length);
be0c12
+
be0c12
+        l = recvmsg(s->fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
be0c12
         if (l < 0) {
be0c12
-                if (errno == EAGAIN)
be0c12
+                if (IN_SET(errno, EINTR, EAGAIN))
be0c12
                         return 0;
be0c12
 
be0c12
                 log_warning_errno(errno, "Failed to read from stream: %m");
be0c12
                 goto terminate;
be0c12
         }
be0c12
+        cmsg_close_all(&msghdr);
be0c12
 
be0c12
         if (l == 0) {
be0c12
                 stdout_stream_scan(s, true);
be0c12
                 goto terminate;
be0c12
         }
be0c12
 
be0c12
+        CMSG_FOREACH(cmsg, &msghdr)
be0c12
+                if (cmsg->cmsg_level == SOL_SOCKET &&
be0c12
+                    cmsg->cmsg_type == SCM_CREDENTIALS &&
be0c12
+                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
be0c12
+                        ucred = (struct ucred *)CMSG_DATA(cmsg);
be0c12
+                        break;
be0c12
+                }
be0c12
+
be0c12
+        /* Invalidate the context if the pid of the sender changed.
be0c12
+         * This happens when a forked process inherits stdout / stderr
be0c12
+         * from a parent. In this case getpeercred returns the ucred
be0c12
+         * of the parent, which can be invalid if the parent has exited
be0c12
+         * in the meantime.
be0c12
+         */
be0c12
+        if (ucred && ucred->pid != s->ucred.pid) {
be0c12
+                /* force out any previously half-written lines from a
be0c12
+                 * different process, before we switch to the new ucred
be0c12
+                 * structure for everything we just added */
be0c12
+                r = stdout_stream_scan(s, true);
be0c12
+                if (r < 0)
be0c12
+                        goto terminate;
be0c12
+
be0c12
+                s->ucred = *ucred;
be0c12
+                client_context_release(s->server, s->context);
be0c12
+                s->context = NULL;
be0c12
+        }
be0c12
+
be0c12
         s->length += l;
be0c12
         r = stdout_stream_scan(s, false);
be0c12
         if (r < 0)
be0c12
@@ -559,6 +600,10 @@ int stdout_stream_install(Server *s, int fd, StdoutStream **ret) {
be0c12
         if (r < 0)
be0c12
                 return log_error_errno(r, "Failed to determine peer credentials: %m");
be0c12
 
be0c12
+        r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
be0c12
+        if (r < 0)
be0c12
+                return log_error_errno(r, "SO_PASSCRED failed: %m");
be0c12
+
be0c12
         if (mac_selinux_use()) {
be0c12
                 r = getpeersec(fd, &stream->label);
be0c12
                 if (r < 0 && r != -EOPNOTSUPP)
be0c12
diff --git a/test/TEST-04-JOURNAL/test-journal.sh b/test/TEST-04-JOURNAL/test-journal.sh
be0c12
index 260cae09ab..52a6ee84d1 100755
be0c12
--- a/test/TEST-04-JOURNAL/test-journal.sh
be0c12
+++ b/test/TEST-04-JOURNAL/test-journal.sh
be0c12
@@ -63,6 +63,19 @@ grep -q '^PRIORITY=6$' /output
be0c12
 ! grep -q '^FOO=' /output
be0c12
 ! grep -q '^SYSLOG_FACILITY=' /output
be0c12
 
be0c12
+# https://github.com/systemd/systemd/issues/13708
be0c12
+ID=$(journalctl --new-id128 | sed -n 2p)
be0c12
+systemd-cat -t "$ID" bash -c 'echo parent; (echo child) & wait' &
be0c12
+PID=$!
be0c12
+wait %%
be0c12
+journalctl --sync
be0c12
+# We can drop this grep when https://github.com/systemd/systemd/issues/13937
be0c12
+# has a fix.
be0c12
+journalctl -b -o export -t "$ID" --output-fields=_PID | grep '^_PID=' >/output
be0c12
+[[ `grep -c . /output` -eq 2 ]]
be0c12
+grep -q "^_PID=$PID" /output
be0c12
+grep -vq "^_PID=$PID" /output
be0c12
+
be0c12
 # Don't lose streams on restart
be0c12
 systemctl start forever-print-hola
be0c12
 sleep 3