From 581edd240f8dd68b1dbb4070353ddb2059eb8a67 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 27 Oct 2017 10:56:42 +0200
Subject: [PATCH] fd-util: add new acquire_data_fd() API helper
All this function does is place some data in an in-memory read-only fd,
that may be read back to get the original data back.
Doing this in a way that works everywhere, given the different kernels
we support as well as different privilege levels is surprisingly
complex.
(cherry picked from commit a548e14d690133dd8cca2d5ab8082bb23259fd5f)
Related: #1446095
---
src/shared/util.c | 156 +++++++++++++++++++++++++++++++++++++++++++
src/shared/util.h | 10 +++
src/test/test-util.c | 49 ++++++++++++++
3 files changed, 215 insertions(+)
diff --git a/src/shared/util.c b/src/shared/util.c
index af09532733..982f5e044f 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -95,6 +95,7 @@
#include "sparse-endian.h"
#include "conf-parser.h"
#include "cgroup-util.h"
+#include "memfd-util.h"
int saved_argc = 0;
char **saved_argv = NULL;
@@ -8893,3 +8894,158 @@ uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
return m / max;
}
+
+int acquire_data_fd(const void *data, size_t size, unsigned flags) {
+
+ char procfs_path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
+ char pattern[] = "/dev/shm/data-fd-XXXXXX";
+ _cleanup_close_ int fd = -1;
+ int isz = 0, r;
+ ssize_t n;
+ off_t f;
+
+ assert(data || size == 0);
+
+ /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
+ * complex than I wish it was. But here's why:
+ *
+ * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
+ * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
+ *
+ * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
+ * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
+ * clients can only bump their size to a system-wide limit, which might be quite low.
+ *
+ * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
+ * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
+ * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
+ *
+ * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
+ *
+ * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
+ * figure. */
+
+ if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
+ /* As a special case, return /dev/null if we have been called for an empty data block */
+ r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (r < 0)
+ return -errno;
+
+ return r;
+ }
+
+ if ((flags & ACQUIRE_NO_MEMFD) == 0) {
+ fd = memfd_new("data-fd");
+ if (fd < 0)
+ goto try_pipe;
+
+ n = write(fd, data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ f = lseek(fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ r = memfd_set_sealed(fd);
+ if (r < 0)
+ return r;
+
+ r = fd;
+ fd = -1;
+
+ return r;
+ }
+
+try_pipe:
+ if ((flags & ACQUIRE_NO_PIPE) == 0) {
+ if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
+ return -errno;
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ if ((size_t) isz < size) {
+ isz = (int) size;
+ if (isz < 0 || (size_t) isz != size)
+ return -E2BIG;
+
+ /* Try to bump the pipe size */
+ (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
+
+ /* See if that worked */
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ if ((size_t) isz < size)
+ goto try_dev_shm;
+ }
+
+ n = write(pipefds[1], data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ (void) fd_nonblock(pipefds[0], false);
+
+ r = pipefds[0];
+ pipefds[0] = -1;
+
+ return r;
+ }
+
+try_dev_shm:
+ if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
+ fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
+ if (fd < 0)
+ goto try_dev_shm_without_o_tmpfile;
+
+ n = write(fd, data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ r = open(procfs_path, O_RDONLY|O_CLOEXEC);
+ if (r < 0)
+ return -errno;
+
+ return r;
+ }
+
+try_dev_shm_without_o_tmpfile:
+ if ((flags & ACQUIRE_NO_REGULAR) == 0) {
+ fd = mkostemp_safe(pattern, O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ n = write(fd, data, size);
+ if (n < 0) {
+ r = -errno;
+ goto unlink_and_return;
+ }
+ if ((size_t) n != size) {
+ r = -EIO;
+ goto unlink_and_return;
+ }
+
+ /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
+ r = open(pattern, O_RDONLY|O_CLOEXEC);
+ if (r < 0)
+ r = -errno;
+
+ unlink_and_return:
+ (void) unlink(pattern);
+ return r;
+ }
+
+ return -EOPNOTSUPP;
+}
diff --git a/src/shared/util.h b/src/shared/util.h
index 526a6fe848..9c4be02566 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -1112,3 +1112,13 @@ int parse_percent(const char *p);
uint64_t system_tasks_max(void);
uint64_t system_tasks_max_scale(uint64_t v, uint64_t max);
+
+enum {
+ ACQUIRE_NO_DEV_NULL = 1 << 0,
+ ACQUIRE_NO_MEMFD = 1 << 1,
+ ACQUIRE_NO_PIPE = 1 << 2,
+ ACQUIRE_NO_TMPFILE = 1 << 3,
+ ACQUIRE_NO_REGULAR = 1 << 4,
+};
+
+int acquire_data_fd(const void *data, size_t size, unsigned flags);
diff --git a/src/test/test-util.c b/src/test/test-util.c
index f2c52edcee..efb02ff530 100644
--- a/src/test/test-util.c
+++ b/src/test/test-util.c
@@ -1861,6 +1861,54 @@ static void test_system_tasks_max_scale(void) {
assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX);
}
+static void test_acquire_data_fd_one(unsigned flags) {
+ char wbuffer[196*1024 - 7];
+ char rbuffer[sizeof(wbuffer)];
+ int fd;
+
+ fd = acquire_data_fd("foo", 3, flags);
+ assert_se(fd >= 0);
+
+ zero(rbuffer);
+ assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 3);
+ assert_se(streq(rbuffer, "foo"));
+
+ fd = safe_close(fd);
+
+ fd = acquire_data_fd("", 0, flags);
+ assert_se(fd >= 0);
+
+ zero(rbuffer);
+ assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 0);
+ assert_se(streq(rbuffer, ""));
+
+ fd = safe_close(fd);
+
+ random_bytes(wbuffer, sizeof(wbuffer));
+
+ fd = acquire_data_fd(wbuffer, sizeof(wbuffer), flags);
+ assert_se(fd >= 0);
+
+ zero(rbuffer);
+ assert_se(read(fd, rbuffer, sizeof(rbuffer)) == sizeof(rbuffer));
+ assert_se(memcmp(rbuffer, wbuffer, sizeof(rbuffer)) == 0);
+
+ fd = safe_close(fd);
+}
+
+static void test_acquire_data_fd(void) {
+
+ test_acquire_data_fd_one(0);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL);
+ test_acquire_data_fd_one(ACQUIRE_NO_MEMFD);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD);
+ test_acquire_data_fd_one(ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE|ACQUIRE_NO_TMPFILE);
+}
+
int main(int argc, char *argv[]) {
log_parse_environment();
log_open();
@@ -1943,6 +1991,7 @@ int main(int argc, char *argv[]) {
test_shell_maybe_quote();
test_system_tasks_max();
test_system_tasks_max_scale();
+ test_acquire_data_fd();
return 0;
}