From 581edd240f8dd68b1dbb4070353ddb2059eb8a67 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 27 Oct 2017 10:56:42 +0200 Subject: [PATCH] fd-util: add new acquire_data_fd() API helper All this function does is place some data in an in-memory read-only fd, that may be read back to get the original data back. Doing this in a way that works everywhere, given the different kernels we support as well as different privilege levels is surprisingly complex. (cherry picked from commit a548e14d690133dd8cca2d5ab8082bb23259fd5f) Related: #1446095 --- src/shared/util.c | 156 +++++++++++++++++++++++++++++++++++++++++++ src/shared/util.h | 10 +++ src/test/test-util.c | 49 ++++++++++++++ 3 files changed, 215 insertions(+) diff --git a/src/shared/util.c b/src/shared/util.c index af0953273..982f5e044 100644 --- a/src/shared/util.c +++ b/src/shared/util.c @@ -95,6 +95,7 @@ #include "sparse-endian.h" #include "conf-parser.h" #include "cgroup-util.h" +#include "memfd-util.h" int saved_argc = 0; char **saved_argv = NULL; @@ -8893,3 +8894,158 @@ uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) { return m / max; } + +int acquire_data_fd(const void *data, size_t size, unsigned flags) { + + char procfs_path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + _cleanup_close_pair_ int pipefds[2] = { -1, -1 }; + char pattern[] = "/dev/shm/data-fd-XXXXXX"; + _cleanup_close_ int fd = -1; + int isz = 0, r; + ssize_t n; + off_t f; + + assert(data || size == 0); + + /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more + * complex than I wish it was. But here's why: + * + * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them + * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14. + * + * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining + * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged + * clients can only bump their size to a system-wide limit, which might be quite low. + * + * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from + * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via + * /proc/self/. Unfortunately O_TMPFILE is not available on older kernels on tmpfs. + * + * d) Finally, we try creating a regular file in /dev/shm, which we then delete. + * + * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I + * figure. */ + + if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) { + /* As a special case, return /dev/null if we have been called for an empty data block */ + r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (r < 0) + return -errno; + + return r; + } + + if ((flags & ACQUIRE_NO_MEMFD) == 0) { + fd = memfd_new("data-fd"); + if (fd < 0) + goto try_pipe; + + n = write(fd, data, size); + if (n < 0) + return -errno; + if ((size_t) n != size) + return -EIO; + + f = lseek(fd, 0, SEEK_SET); + if (f != 0) + return -errno; + + r = memfd_set_sealed(fd); + if (r < 0) + return r; + + r = fd; + fd = -1; + + return r; + } + +try_pipe: + if ((flags & ACQUIRE_NO_PIPE) == 0) { + if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0) + return -errno; + + isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0); + if (isz < 0) + return -errno; + + if ((size_t) isz < size) { + isz = (int) size; + if (isz < 0 || (size_t) isz != size) + return -E2BIG; + + /* Try to bump the pipe size */ + (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz); + + /* See if that worked */ + isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0); + if (isz < 0) + return -errno; + + if ((size_t) isz < size) + goto try_dev_shm; + } + + n = write(pipefds[1], data, size); + if (n < 0) + return -errno; + if ((size_t) n != size) + return -EIO; + + (void) fd_nonblock(pipefds[0], false); + + r = pipefds[0]; + pipefds[0] = -1; + + return r; + } + +try_dev_shm: + if ((flags & ACQUIRE_NO_TMPFILE) == 0) { + fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500); + if (fd < 0) + goto try_dev_shm_without_o_tmpfile; + + n = write(fd, data, size); + if (n < 0) + return -errno; + if ((size_t) n != size) + return -EIO; + + /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */ + xsprintf(procfs_path, "/proc/self/fd/%i", fd); + r = open(procfs_path, O_RDONLY|O_CLOEXEC); + if (r < 0) + return -errno; + + return r; + } + +try_dev_shm_without_o_tmpfile: + if ((flags & ACQUIRE_NO_REGULAR) == 0) { + fd = mkostemp_safe(pattern, O_CLOEXEC); + if (fd < 0) + return fd; + + n = write(fd, data, size); + if (n < 0) { + r = -errno; + goto unlink_and_return; + } + if ((size_t) n != size) { + r = -EIO; + goto unlink_and_return; + } + + /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */ + r = open(pattern, O_RDONLY|O_CLOEXEC); + if (r < 0) + r = -errno; + + unlink_and_return: + (void) unlink(pattern); + return r; + } + + return -EOPNOTSUPP; +} diff --git a/src/shared/util.h b/src/shared/util.h index 526a6fe84..9c4be0256 100644 --- a/src/shared/util.h +++ b/src/shared/util.h @@ -1112,3 +1112,13 @@ int parse_percent(const char *p); uint64_t system_tasks_max(void); uint64_t system_tasks_max_scale(uint64_t v, uint64_t max); + +enum { + ACQUIRE_NO_DEV_NULL = 1 << 0, + ACQUIRE_NO_MEMFD = 1 << 1, + ACQUIRE_NO_PIPE = 1 << 2, + ACQUIRE_NO_TMPFILE = 1 << 3, + ACQUIRE_NO_REGULAR = 1 << 4, +}; + +int acquire_data_fd(const void *data, size_t size, unsigned flags); diff --git a/src/test/test-util.c b/src/test/test-util.c index f2c52edce..efb02ff53 100644 --- a/src/test/test-util.c +++ b/src/test/test-util.c @@ -1861,6 +1861,54 @@ static void test_system_tasks_max_scale(void) { assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX); } +static void test_acquire_data_fd_one(unsigned flags) { + char wbuffer[196*1024 - 7]; + char rbuffer[sizeof(wbuffer)]; + int fd; + + fd = acquire_data_fd("foo", 3, flags); + assert_se(fd >= 0); + + zero(rbuffer); + assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 3); + assert_se(streq(rbuffer, "foo")); + + fd = safe_close(fd); + + fd = acquire_data_fd("", 0, flags); + assert_se(fd >= 0); + + zero(rbuffer); + assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 0); + assert_se(streq(rbuffer, "")); + + fd = safe_close(fd); + + random_bytes(wbuffer, sizeof(wbuffer)); + + fd = acquire_data_fd(wbuffer, sizeof(wbuffer), flags); + assert_se(fd >= 0); + + zero(rbuffer); + assert_se(read(fd, rbuffer, sizeof(rbuffer)) == sizeof(rbuffer)); + assert_se(memcmp(rbuffer, wbuffer, sizeof(rbuffer)) == 0); + + fd = safe_close(fd); +} + +static void test_acquire_data_fd(void) { + + test_acquire_data_fd_one(0); + test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL); + test_acquire_data_fd_one(ACQUIRE_NO_MEMFD); + test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD); + test_acquire_data_fd_one(ACQUIRE_NO_PIPE); + test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_PIPE); + test_acquire_data_fd_one(ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE); + test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE); + test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE|ACQUIRE_NO_TMPFILE); +} + int main(int argc, char *argv[]) { log_parse_environment(); log_open(); @@ -1943,6 +1991,7 @@ int main(int argc, char *argv[]) { test_shell_maybe_quote(); test_system_tasks_max(); test_system_tasks_max_scale(); + test_acquire_data_fd(); return 0; }