1abbee
From 6d1ef1fb841a0b3b4c53b560892f3570b3379dc9 Mon Sep 17 00:00:00 2001
1abbee
From: Lennart Poettering <lennart@poettering.net>
1abbee
Date: Wed, 10 Jun 2015 19:24:58 +0200
1abbee
Subject: [PATCH] journald: don't employ inner loop for reading from incoming
1abbee
 sockets
1abbee
1abbee
Otherwise, if the socket is constantly busy we will never return to the
1abbee
event loop, but we really need to to dispatch other (possibly more
1abbee
high-priority) events too. Hence, return after dispatching one message
1abbee
to the event handler, and rely on the event loop calling us back
1abbee
right-away.
1abbee
1abbee
Fixes #125
1abbee
1abbee
Related: #1318994
1abbee
Cherry-picked from: a315ac4e076c4ce7ce3e5c95792cf916d5e918c5
1abbee
---
1abbee
 src/journal/journald-server.c | 204 +++++++++++++++++++++---------------------
1abbee
 1 file changed, 100 insertions(+), 104 deletions(-)
1abbee
1abbee
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
181b3f
index 1eb1394d1..275224dc9 100644
1abbee
--- a/src/journal/journald-server.c
1abbee
+++ b/src/journal/journald-server.c
1abbee
@@ -1103,6 +1103,42 @@ finish:
1abbee
 
1abbee
 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1abbee
         Server *s = userdata;
1abbee
+        struct ucred *ucred = NULL;
1abbee
+        struct timeval *tv = NULL;
1abbee
+        struct cmsghdr *cmsg;
1abbee
+        char *label = NULL;
1abbee
+        size_t label_len = 0, m;
1abbee
+        struct iovec iovec;
1abbee
+        ssize_t n;
1abbee
+        int *fds = NULL, v = 0;
1abbee
+        unsigned n_fds = 0;
1abbee
+
1abbee
+        union {
1abbee
+                struct cmsghdr cmsghdr;
1abbee
+
1abbee
+                /* We use NAME_MAX space for the SELinux label
1abbee
+                 * here. The kernel currently enforces no
1abbee
+                 * limit, but according to suggestions from
1abbee
+                 * the SELinux people this will change and it
1abbee
+                 * will probably be identical to NAME_MAX. For
1abbee
+                 * now we use that, but this should be updated
1abbee
+                 * one day when the final limit is known. */
1abbee
+                uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1abbee
+                            CMSG_SPACE(sizeof(struct timeval)) +
1abbee
+                            CMSG_SPACE(sizeof(int)) + /* fd */
1abbee
+                            CMSG_SPACE(NAME_MAX)]; /* selinux label */
1abbee
+        } control = {};
1abbee
+
1abbee
+        union sockaddr_union sa = {};
1abbee
+
1abbee
+        struct msghdr msghdr = {
1abbee
+                .msg_iov = &iovec,
1abbee
+                .msg_iovlen = 1,
1abbee
+                .msg_control = &control,
1abbee
+                .msg_controllen = sizeof(control),
1abbee
+                .msg_name = &sa,
1abbee
+                .msg_namelen = sizeof(sa),
1abbee
+        };
1abbee
 
1abbee
         assert(s);
1abbee
         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1abbee
@@ -1112,119 +1148,79 @@ int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void
1abbee
                 return -EIO;
1abbee
         }
1abbee
 
1abbee
-        for (;;) {
1abbee
-                struct ucred *ucred = NULL;
1abbee
-                struct timeval *tv = NULL;
1abbee
-                struct cmsghdr *cmsg;
1abbee
-                char *label = NULL;
1abbee
-                size_t label_len = 0;
1abbee
-                struct iovec iovec;
1abbee
-
1abbee
-                union {
1abbee
-                        struct cmsghdr cmsghdr;
1abbee
-
1abbee
-                        /* We use NAME_MAX space for the SELinux label
1abbee
-                         * here. The kernel currently enforces no
1abbee
-                         * limit, but according to suggestions from
1abbee
-                         * the SELinux people this will change and it
1abbee
-                         * will probably be identical to NAME_MAX. For
1abbee
-                         * now we use that, but this should be updated
1abbee
-                         * one day when the final limit is known. */
1abbee
-                        uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1abbee
-                                    CMSG_SPACE(sizeof(struct timeval)) +
1abbee
-                                    CMSG_SPACE(sizeof(int)) + /* fd */
1abbee
-                                    CMSG_SPACE(NAME_MAX)]; /* selinux label */
1abbee
-                } control = {};
1abbee
-                union sockaddr_union sa = {};
1abbee
-                struct msghdr msghdr = {
1abbee
-                        .msg_iov = &iovec,
1abbee
-                        .msg_iovlen = 1,
1abbee
-                        .msg_control = &control,
1abbee
-                        .msg_controllen = sizeof(control),
1abbee
-                        .msg_name = &sa,
1abbee
-                        .msg_namelen = sizeof(sa),
1abbee
-                };
1abbee
-
1abbee
-                ssize_t n;
1abbee
-                int *fds = NULL;
1abbee
-                unsigned n_fds = 0;
1abbee
-                int v = 0;
1abbee
-                size_t m;
1abbee
-
1abbee
-                /* Try to get the right size, if we can. (Not all
1abbee
-                 * sockets support SIOCINQ, hence we just try, but
1abbee
-                 * don't rely on it. */
1abbee
-                (void) ioctl(fd, SIOCINQ, &v);
1abbee
-
1abbee
-                /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1abbee
-                m = PAGE_ALIGN(MAX3((size_t) v + 1,
1abbee
-                                    (size_t) LINE_MAX,
1abbee
-                                    ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1abbee
-
1abbee
-                if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1abbee
-                        return log_oom();
1abbee
-
1abbee
-                iovec.iov_base = s->buffer;
1abbee
-                iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1abbee
-
1abbee
-                n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1abbee
-                if (n < 0) {
1abbee
-                        if (errno == EINTR || errno == EAGAIN)
1abbee
-                                return 0;
1abbee
-
1abbee
-                        log_error_errno(errno, "recvmsg() failed: %m");
1abbee
-                        return -errno;
1abbee
-                }
1abbee
+        /* Try to get the right size, if we can. (Not all
1abbee
+         * sockets support SIOCINQ, hence we just try, but
1abbee
+         * don't rely on it. */
1abbee
+        (void) ioctl(fd, SIOCINQ, &v);
1abbee
 
1abbee
-                CMSG_FOREACH(cmsg, &msghdr) {
1abbee
-
1abbee
-                        if (cmsg->cmsg_level == SOL_SOCKET &&
1abbee
-                            cmsg->cmsg_type == SCM_CREDENTIALS &&
1abbee
-                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1abbee
-                                ucred = (struct ucred*) CMSG_DATA(cmsg);
1abbee
-                        else if (cmsg->cmsg_level == SOL_SOCKET &&
1abbee
-                                 cmsg->cmsg_type == SCM_SECURITY) {
1abbee
-                                label = (char*) CMSG_DATA(cmsg);
1abbee
-                                label_len = cmsg->cmsg_len - CMSG_LEN(0);
1abbee
-                        } else if (cmsg->cmsg_level == SOL_SOCKET &&
1abbee
-                                   cmsg->cmsg_type == SO_TIMESTAMP &&
1abbee
-                                   cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1abbee
-                                tv = (struct timeval*) CMSG_DATA(cmsg);
1abbee
-                        else if (cmsg->cmsg_level == SOL_SOCKET &&
1abbee
-                                 cmsg->cmsg_type == SCM_RIGHTS) {
1abbee
-                                fds = (int*) CMSG_DATA(cmsg);
1abbee
-                                n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1abbee
-                        }
1abbee
-                }
1abbee
+        /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1abbee
+        m = PAGE_ALIGN(MAX3((size_t) v + 1,
1abbee
+                            (size_t) LINE_MAX,
1abbee
+                            ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1abbee
 
1abbee
-                /* And a trailing NUL, just in case */
1abbee
-                s->buffer[n] = 0;
1abbee
+        if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1abbee
+                return log_oom();
1abbee
 
1abbee
-                if (fd == s->syslog_fd) {
1abbee
-                        if (n > 0 && n_fds == 0)
1abbee
-                                server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1abbee
-                        else if (n_fds > 0)
1abbee
-                                log_warning("Got file descriptors via syslog socket. Ignoring.");
1abbee
+        iovec.iov_base = s->buffer;
1abbee
+        iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1abbee
 
1abbee
-                } else if (fd == s->native_fd) {
1abbee
-                        if (n > 0 && n_fds == 0)
1abbee
-                                server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1abbee
-                        else if (n == 0 && n_fds == 1)
1abbee
-                                server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1abbee
-                        else if (n_fds > 0)
1abbee
-                                log_warning("Got too many file descriptors via native socket. Ignoring.");
1abbee
+        n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1abbee
+        if (n < 0) {
1abbee
+                if (errno == EINTR || errno == EAGAIN)
1abbee
+                        return 0;
1abbee
 
1abbee
-                } else {
1abbee
-                        assert(fd == s->audit_fd);
1abbee
+                return log_error_errno(errno, "recvmsg() failed: %m");
1abbee
+        }
1abbee
 
1abbee
-                        if (n > 0 && n_fds == 0)
1abbee
-                                server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1abbee
-                        else if (n_fds > 0)
1abbee
-                                log_warning("Got file descriptors via audit socket. Ignoring.");
1abbee
+        CMSG_FOREACH(cmsg, &msghdr) {
1abbee
+
1abbee
+                if (cmsg->cmsg_level == SOL_SOCKET &&
1abbee
+                    cmsg->cmsg_type == SCM_CREDENTIALS &&
1abbee
+                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1abbee
+                        ucred = (struct ucred*) CMSG_DATA(cmsg);
1abbee
+                else if (cmsg->cmsg_level == SOL_SOCKET &&
1abbee
+                         cmsg->cmsg_type == SCM_SECURITY) {
1abbee
+                        label = (char*) CMSG_DATA(cmsg);
1abbee
+                        label_len = cmsg->cmsg_len - CMSG_LEN(0);
1abbee
+                } else if (cmsg->cmsg_level == SOL_SOCKET &&
1abbee
+                           cmsg->cmsg_type == SO_TIMESTAMP &&
1abbee
+                           cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1abbee
+                        tv = (struct timeval*) CMSG_DATA(cmsg);
1abbee
+                else if (cmsg->cmsg_level == SOL_SOCKET &&
1abbee
+                         cmsg->cmsg_type == SCM_RIGHTS) {
1abbee
+                        fds = (int*) CMSG_DATA(cmsg);
1abbee
+                        n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1abbee
                 }
1abbee
+        }
1abbee
+
1abbee
+        /* And a trailing NUL, just in case */
1abbee
+        s->buffer[n] = 0;
1abbee
+
1abbee
+        if (fd == s->syslog_fd) {
1abbee
+                if (n > 0 && n_fds == 0)
1abbee
+                        server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1abbee
+                else if (n_fds > 0)
1abbee
+                        log_warning("Got file descriptors via syslog socket. Ignoring.");
1abbee
+
1abbee
+        } else if (fd == s->native_fd) {
1abbee
+                if (n > 0 && n_fds == 0)
1abbee
+                        server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1abbee
+                else if (n == 0 && n_fds == 1)
1abbee
+                        server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1abbee
+                else if (n_fds > 0)
1abbee
+                        log_warning("Got too many file descriptors via native socket. Ignoring.");
1abbee
 
1abbee
-                close_many(fds, n_fds);
1abbee
+        } else {
1abbee
+                assert(fd == s->audit_fd);
1abbee
+
1abbee
+                if (n > 0 && n_fds == 0)
1abbee
+                        server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1abbee
+                else if (n_fds > 0)
1abbee
+                        log_warning("Got file descriptors via audit socket. Ignoring.");
1abbee
         }
1abbee
+
1abbee
+        close_many(fds, n_fds);
1abbee
+        return 0;
1abbee
 }
1abbee
 
1abbee
 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {