From 6d1ef1fb841a0b3b4c53b560892f3570b3379dc9 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 10 Jun 2015 19:24:58 +0200
Subject: [PATCH] journald: don't employ inner loop for reading from incoming
sockets
Otherwise, if the socket is constantly busy we will never return to the
event loop, but we really need to to dispatch other (possibly more
high-priority) events too. Hence, return after dispatching one message
to the event handler, and rely on the event loop calling us back
right-away.
Fixes #125
Related: #1318994
Cherry-picked from: a315ac4e076c4ce7ce3e5c95792cf916d5e918c5
---
src/journal/journald-server.c | 204 +++++++++++++++++++++---------------------
1 file changed, 100 insertions(+), 104 deletions(-)
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index 1eb1394..275224d 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -1103,6 +1103,42 @@ finish:
int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
Server *s = userdata;
+ struct ucred *ucred = NULL;
+ struct timeval *tv = NULL;
+ struct cmsghdr *cmsg;
+ char *label = NULL;
+ size_t label_len = 0, m;
+ struct iovec iovec;
+ ssize_t n;
+ int *fds = NULL, v = 0;
+ unsigned n_fds = 0;
+
+ union {
+ struct cmsghdr cmsghdr;
+
+ /* We use NAME_MAX space for the SELinux label
+ * here. The kernel currently enforces no
+ * limit, but according to suggestions from
+ * the SELinux people this will change and it
+ * will probably be identical to NAME_MAX. For
+ * now we use that, but this should be updated
+ * one day when the final limit is known. */
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(struct timeval)) +
+ CMSG_SPACE(sizeof(int)) + /* fd */
+ CMSG_SPACE(NAME_MAX)]; /* selinux label */
+ } control = {};
+
+ union sockaddr_union sa = {};
+
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_name = &sa,
+ .msg_namelen = sizeof(sa),
+ };
assert(s);
assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
@@ -1112,119 +1148,79 @@ int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void
return -EIO;
}
- for (;;) {
- struct ucred *ucred = NULL;
- struct timeval *tv = NULL;
- struct cmsghdr *cmsg;
- char *label = NULL;
- size_t label_len = 0;
- struct iovec iovec;
-
- union {
- struct cmsghdr cmsghdr;
-
- /* We use NAME_MAX space for the SELinux label
- * here. The kernel currently enforces no
- * limit, but according to suggestions from
- * the SELinux people this will change and it
- * will probably be identical to NAME_MAX. For
- * now we use that, but this should be updated
- * one day when the final limit is known. */
- uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
- CMSG_SPACE(sizeof(struct timeval)) +
- CMSG_SPACE(sizeof(int)) + /* fd */
- CMSG_SPACE(NAME_MAX)]; /* selinux label */
- } control = {};
- union sockaddr_union sa = {};
- struct msghdr msghdr = {
- .msg_iov = &iovec,
- .msg_iovlen = 1,
- .msg_control = &control,
- .msg_controllen = sizeof(control),
- .msg_name = &sa,
- .msg_namelen = sizeof(sa),
- };
-
- ssize_t n;
- int *fds = NULL;
- unsigned n_fds = 0;
- int v = 0;
- size_t m;
-
- /* Try to get the right size, if we can. (Not all
- * sockets support SIOCINQ, hence we just try, but
- * don't rely on it. */
- (void) ioctl(fd, SIOCINQ, &v);
-
- /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
- m = PAGE_ALIGN(MAX3((size_t) v + 1,
- (size_t) LINE_MAX,
- ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
-
- if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
- return log_oom();
-
- iovec.iov_base = s->buffer;
- iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
-
- n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
- if (n < 0) {
- if (errno == EINTR || errno == EAGAIN)
- return 0;
-
- log_error_errno(errno, "recvmsg() failed: %m");
- return -errno;
- }
+ /* Try to get the right size, if we can. (Not all
+ * sockets support SIOCINQ, hence we just try, but
+ * don't rely on it. */
+ (void) ioctl(fd, SIOCINQ, &v);
- CMSG_FOREACH(cmsg, &msghdr) {
-
- if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_CREDENTIALS &&
- cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
- ucred = (struct ucred*) CMSG_DATA(cmsg);
- else if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_SECURITY) {
- label = (char*) CMSG_DATA(cmsg);
- label_len = cmsg->cmsg_len - CMSG_LEN(0);
- } else if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SO_TIMESTAMP &&
- cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
- tv = (struct timeval*) CMSG_DATA(cmsg);
- else if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_RIGHTS) {
- fds = (int*) CMSG_DATA(cmsg);
- n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
- }
- }
+ /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
+ m = PAGE_ALIGN(MAX3((size_t) v + 1,
+ (size_t) LINE_MAX,
+ ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
- /* And a trailing NUL, just in case */
- s->buffer[n] = 0;
+ if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
+ return log_oom();
- if (fd == s->syslog_fd) {
- if (n > 0 && n_fds == 0)
- server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
- else if (n_fds > 0)
- log_warning("Got file descriptors via syslog socket. Ignoring.");
+ iovec.iov_base = s->buffer;
+ iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
- } else if (fd == s->native_fd) {
- if (n > 0 && n_fds == 0)
- server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
- else if (n == 0 && n_fds == 1)
- server_process_native_file(s, fds[0], ucred, tv, label, label_len);
- else if (n_fds > 0)
- log_warning("Got too many file descriptors via native socket. Ignoring.");
+ n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (n < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ return 0;
- } else {
- assert(fd == s->audit_fd);
+ return log_error_errno(errno, "recvmsg() failed: %m");
+ }
- if (n > 0 && n_fds == 0)
- server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
- else if (n_fds > 0)
- log_warning("Got file descriptors via audit socket. Ignoring.");
+ CMSG_FOREACH(cmsg, &msghdr) {
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_SECURITY) {
+ label = (char*) CMSG_DATA(cmsg);
+ label_len = cmsg->cmsg_len - CMSG_LEN(0);
+ } else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SO_TIMESTAMP &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
+ tv = (struct timeval*) CMSG_DATA(cmsg);
+ else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ fds = (int*) CMSG_DATA(cmsg);
+ n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
}
+ }
+
+ /* And a trailing NUL, just in case */
+ s->buffer[n] = 0;
+
+ if (fd == s->syslog_fd) {
+ if (n > 0 && n_fds == 0)
+ server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
+ else if (n_fds > 0)
+ log_warning("Got file descriptors via syslog socket. Ignoring.");
+
+ } else if (fd == s->native_fd) {
+ if (n > 0 && n_fds == 0)
+ server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
+ else if (n == 0 && n_fds == 1)
+ server_process_native_file(s, fds[0], ucred, tv, label, label_len);
+ else if (n_fds > 0)
+ log_warning("Got too many file descriptors via native socket. Ignoring.");
- close_many(fds, n_fds);
+ } else {
+ assert(fd == s->audit_fd);
+
+ if (n > 0 && n_fds == 0)
+ server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
+ else if (n_fds > 0)
+ log_warning("Got file descriptors via audit socket. Ignoring.");
}
+
+ close_many(fds, n_fds);
+ return 0;
}
static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {